refactor: flatten Microsoft skills from nested to flat directory structure

Rewrote sync_microsoft_skills.py (v4) to use each SKILL.md's frontmatter
'name' field as the flat directory name under skills/, replacing the nested
skills/official/microsoft/<lang>/<category>/<service>/ hierarchy.

This fixes CI failures caused by the indexing, validation, and catalog
scripts expecting skills/<id>/SKILL.md (depth 1).

Changes:
- Rewrite scripts/sync_microsoft_skills.py for flat output with collision detection
- Update scripts/tests/inspect_microsoft_repo.py for flat name mapping
- Update scripts/tests/test_comprehensive_coverage.py for name uniqueness checks
- Delete skills/official/ nested directory
- Add 129 Microsoft skills as flat directories (e.g. skills/azure-mgmt-botservice-dotnet/)
- Move attribution files to docs/ (LICENSE-MICROSOFT, microsoft-skills-attribution.json)
- Rebuild skills_index.json, CATALOG.md, README.md (845 total skills)
This commit is contained in:
Ahmed Rehan
2026-02-12 00:07:15 +05:00
parent e06454dafd
commit e7ae616385
142 changed files with 5683 additions and 6097 deletions

View File

@@ -1,161 +1,454 @@
const fs = require('fs');
const path = require('path');
const fs = require("fs");
const path = require("path");
const {
listSkillIdsRecursive,
readSkill,
tokenize,
unique,
} = require('../lib/skill-utils');
} = require("../lib/skill-utils");
const ROOT = path.resolve(__dirname, '..');
const SKILLS_DIR = path.join(ROOT, 'skills');
const ROOT = path.resolve(__dirname, "..");
const SKILLS_DIR = path.join(ROOT, "skills");
const STOPWORDS = new Set([
'a', 'an', 'and', 'are', 'as', 'at', 'be', 'but', 'by', 'for', 'from', 'has', 'have', 'in', 'into',
'is', 'it', 'its', 'of', 'on', 'or', 'our', 'out', 'over', 'that', 'the', 'their', 'they', 'this',
'to', 'use', 'when', 'with', 'you', 'your', 'will', 'can', 'if', 'not', 'only', 'also', 'more',
'best', 'practice', 'practices', 'expert', 'specialist', 'focused', 'focus', 'master', 'modern',
'advanced', 'comprehensive', 'production', 'production-ready', 'ready', 'build', 'create', 'deliver',
'design', 'implement', 'implementation', 'strategy', 'strategies', 'patterns', 'pattern', 'workflow',
'workflows', 'guide', 'template', 'templates', 'tool', 'tools', 'project', 'projects', 'support',
'manage', 'management', 'system', 'systems', 'services', 'service', 'across', 'end', 'end-to-end',
'using', 'based', 'ensure', 'ensure', 'help', 'needs', 'need', 'focuses', 'handles', 'builds', 'make',
"a",
"an",
"and",
"are",
"as",
"at",
"be",
"but",
"by",
"for",
"from",
"has",
"have",
"in",
"into",
"is",
"it",
"its",
"of",
"on",
"or",
"our",
"out",
"over",
"that",
"the",
"their",
"they",
"this",
"to",
"use",
"when",
"with",
"you",
"your",
"will",
"can",
"if",
"not",
"only",
"also",
"more",
"best",
"practice",
"practices",
"expert",
"specialist",
"focused",
"focus",
"master",
"modern",
"advanced",
"comprehensive",
"production",
"production-ready",
"ready",
"build",
"create",
"deliver",
"design",
"implement",
"implementation",
"strategy",
"strategies",
"patterns",
"pattern",
"workflow",
"workflows",
"guide",
"template",
"templates",
"tool",
"tools",
"project",
"projects",
"support",
"manage",
"management",
"system",
"systems",
"services",
"service",
"across",
"end",
"end-to-end",
"using",
"based",
"ensure",
"ensure",
"help",
"needs",
"need",
"focuses",
"handles",
"builds",
"make",
]);
const TAG_STOPWORDS = new Set([
'pro', 'expert', 'patterns', 'pattern', 'workflow', 'workflows', 'templates', 'template', 'toolkit',
'tools', 'tool', 'project', 'projects', 'guide', 'management', 'engineer', 'architect', 'developer',
'specialist', 'assistant', 'analysis', 'review', 'reviewer', 'automation', 'orchestration', 'scaffold',
'scaffolding', 'implementation', 'strategy', 'context', 'management', 'feature', 'features', 'smart',
'system', 'systems', 'design', 'development', 'development', 'test', 'testing', 'workflow',
"pro",
"expert",
"patterns",
"pattern",
"workflow",
"workflows",
"templates",
"template",
"toolkit",
"tools",
"tool",
"project",
"projects",
"guide",
"management",
"engineer",
"architect",
"developer",
"specialist",
"assistant",
"analysis",
"review",
"reviewer",
"automation",
"orchestration",
"scaffold",
"scaffolding",
"implementation",
"strategy",
"context",
"management",
"feature",
"features",
"smart",
"system",
"systems",
"design",
"development",
"development",
"test",
"testing",
"workflow",
]);
const CATEGORY_RULES = [
{
name: 'security',
name: "security",
keywords: [
'security', 'sast', 'compliance', 'privacy', 'threat', 'vulnerability', 'owasp', 'pci', 'gdpr',
'secrets', 'risk', 'malware', 'forensics', 'attack', 'incident', 'auth', 'mtls', 'zero', 'trust',
"security",
"sast",
"compliance",
"privacy",
"threat",
"vulnerability",
"owasp",
"pci",
"gdpr",
"secrets",
"risk",
"malware",
"forensics",
"attack",
"incident",
"auth",
"mtls",
"zero",
"trust",
],
},
{
name: 'infrastructure',
name: "infrastructure",
keywords: [
'kubernetes', 'k8s', 'helm', 'terraform', 'cloud', 'network', 'devops', 'gitops', 'prometheus',
'grafana', 'observability', 'monitoring', 'logging', 'tracing', 'deployment', 'istio', 'linkerd',
'service', 'mesh', 'slo', 'sre', 'oncall', 'incident', 'pipeline', 'cicd', 'ci', 'cd', 'kafka',
"kubernetes",
"k8s",
"helm",
"terraform",
"cloud",
"network",
"devops",
"gitops",
"prometheus",
"grafana",
"observability",
"monitoring",
"logging",
"tracing",
"deployment",
"istio",
"linkerd",
"service",
"mesh",
"slo",
"sre",
"oncall",
"incident",
"pipeline",
"cicd",
"ci",
"cd",
"kafka",
],
},
{
name: 'data-ai',
name: "data-ai",
keywords: [
'data', 'database', 'db', 'sql', 'postgres', 'mysql', 'analytics', 'etl', 'warehouse', 'dbt',
'ml', 'ai', 'llm', 'rag', 'vector', 'embedding', 'spark', 'airflow', 'cdc', 'pipeline',
"data",
"database",
"db",
"sql",
"postgres",
"mysql",
"analytics",
"etl",
"warehouse",
"dbt",
"ml",
"ai",
"llm",
"rag",
"vector",
"embedding",
"spark",
"airflow",
"cdc",
"pipeline",
],
},
{
name: 'development',
name: "development",
keywords: [
'python', 'javascript', 'typescript', 'java', 'golang', 'go', 'rust', 'csharp', 'dotnet', 'php',
'ruby', 'node', 'react', 'frontend', 'backend', 'mobile', 'ios', 'android', 'flutter', 'fastapi',
'django', 'nextjs', 'vue', 'api',
"python",
"javascript",
"typescript",
"java",
"golang",
"go",
"rust",
"csharp",
"dotnet",
"php",
"ruby",
"node",
"react",
"frontend",
"backend",
"mobile",
"ios",
"android",
"flutter",
"fastapi",
"django",
"nextjs",
"vue",
"api",
],
},
{
name: 'architecture',
name: "architecture",
keywords: [
'architecture', 'c4', 'microservices', 'event', 'cqrs', 'saga', 'domain', 'ddd', 'patterns',
'decision', 'adr',
"architecture",
"c4",
"microservices",
"event",
"cqrs",
"saga",
"domain",
"ddd",
"patterns",
"decision",
"adr",
],
},
{
name: 'testing',
keywords: ['testing', 'tdd', 'unit', 'e2e', 'qa', 'test'],
name: "testing",
keywords: ["testing", "tdd", "unit", "e2e", "qa", "test"],
},
{
name: 'business',
name: "business",
keywords: [
'business', 'market', 'sales', 'finance', 'startup', 'legal', 'hr', 'product', 'customer', 'seo',
'marketing', 'kpi', 'contract', 'employment',
"business",
"market",
"sales",
"finance",
"startup",
"legal",
"hr",
"product",
"customer",
"seo",
"marketing",
"kpi",
"contract",
"employment",
],
},
{
name: 'workflow',
keywords: ['workflow', 'orchestration', 'conductor', 'automation', 'process', 'collaboration'],
name: "workflow",
keywords: [
"workflow",
"orchestration",
"conductor",
"automation",
"process",
"collaboration",
],
},
];
const BUNDLE_RULES = {
'core-dev': {
description: 'Core development skills across languages, frameworks, and backend/frontend fundamentals.',
"core-dev": {
description:
"Core development skills across languages, frameworks, and backend/frontend fundamentals.",
keywords: [
'python', 'javascript', 'typescript', 'go', 'golang', 'rust', 'java', 'node', 'frontend', 'backend',
'react', 'fastapi', 'django', 'nextjs', 'api', 'mobile', 'ios', 'android', 'flutter', 'php', 'ruby',
"python",
"javascript",
"typescript",
"go",
"golang",
"rust",
"java",
"node",
"frontend",
"backend",
"react",
"fastapi",
"django",
"nextjs",
"api",
"mobile",
"ios",
"android",
"flutter",
"php",
"ruby",
],
},
'security-core': {
description: 'Security, privacy, and compliance essentials.',
"security-core": {
description: "Security, privacy, and compliance essentials.",
keywords: [
'security', 'sast', 'compliance', 'threat', 'risk', 'privacy', 'secrets', 'owasp', 'gdpr', 'pci',
'vulnerability', 'auth',
"security",
"sast",
"compliance",
"threat",
"risk",
"privacy",
"secrets",
"owasp",
"gdpr",
"pci",
"vulnerability",
"auth",
],
},
'k8s-core': {
description: 'Kubernetes and service mesh essentials.',
keywords: ['kubernetes', 'k8s', 'helm', 'istio', 'linkerd', 'service', 'mesh'],
},
'data-core': {
description: 'Data engineering and analytics foundations.',
"k8s-core": {
description: "Kubernetes and service mesh essentials.",
keywords: [
'data', 'database', 'sql', 'dbt', 'airflow', 'spark', 'analytics', 'etl', 'warehouse', 'postgres',
'mysql', 'kafka',
"kubernetes",
"k8s",
"helm",
"istio",
"linkerd",
"service",
"mesh",
],
},
'ops-core': {
description: 'Operations, observability, and delivery pipelines.',
"data-core": {
description: "Data engineering and analytics foundations.",
keywords: [
'observability', 'monitoring', 'logging', 'tracing', 'prometheus', 'grafana', 'devops', 'gitops',
'deployment', 'cicd', 'pipeline', 'slo', 'sre', 'incident',
"data",
"database",
"sql",
"dbt",
"airflow",
"spark",
"analytics",
"etl",
"warehouse",
"postgres",
"mysql",
"kafka",
],
},
"ops-core": {
description: "Operations, observability, and delivery pipelines.",
keywords: [
"observability",
"monitoring",
"logging",
"tracing",
"prometheus",
"grafana",
"devops",
"gitops",
"deployment",
"cicd",
"pipeline",
"slo",
"sre",
"incident",
],
},
};
const CURATED_COMMON = [
'bash-pro',
'python-pro',
'javascript-pro',
'typescript-pro',
'golang-pro',
'rust-pro',
'java-pro',
'frontend-developer',
'backend-architect',
'nodejs-backend-patterns',
'fastapi-pro',
'api-design-principles',
'sql-pro',
'database-architect',
'kubernetes-architect',
'terraform-specialist',
'observability-engineer',
'security-auditor',
'sast-configuration',
'gitops-workflow',
"bash-pro",
"python-pro",
"javascript-pro",
"typescript-pro",
"golang-pro",
"rust-pro",
"java-pro",
"frontend-developer",
"backend-architect",
"nodejs-backend-patterns",
"fastapi-pro",
"api-design-principles",
"sql-pro",
"database-architect",
"kubernetes-architect",
"terraform-specialist",
"observability-engineer",
"security-auditor",
"sast-configuration",
"gitops-workflow",
];
function normalizeTokens(tokens) {
return unique(tokens.map(token => token.toLowerCase())).filter(Boolean);
return unique(tokens.map((token) => token.toLowerCase())).filter(Boolean);
}
function deriveTags(skill) {
let tags = Array.isArray(skill.tags) ? skill.tags : [];
tags = tags.map(tag => tag.toLowerCase()).filter(Boolean);
tags = tags.map((tag) => tag.toLowerCase()).filter(Boolean);
if (!tags.length) {
tags = skill.id
.split('-')
.map(tag => tag.toLowerCase())
.filter(tag => tag && !TAG_STOPWORDS.has(tag));
.split("-")
.map((tag) => tag.toLowerCase())
.filter((tag) => tag && !TAG_STOPWORDS.has(tag));
}
return normalizeTokens(tags);
@@ -177,17 +470,18 @@ function detectCategory(skill, tags) {
}
}
return 'general';
return "general";
}
function buildTriggers(skill, tags) {
const tokens = tokenize(`${skill.name} ${skill.description}`)
.filter(token => token.length >= 2 && !STOPWORDS.has(token));
const tokens = tokenize(`${skill.name} ${skill.description}`).filter(
(token) => token.length >= 2 && !STOPWORDS.has(token),
);
return unique([...tags, ...tokens]).slice(0, 12);
}
function buildAliases(skills) {
const existingIds = new Set(skills.map(skill => skill.id));
const existingIds = new Set(skills.map((skill) => skill.id));
const aliases = {};
const used = new Set();
@@ -200,7 +494,7 @@ function buildAliases(skills) {
}
}
const tokens = skill.id.split('-').filter(Boolean);
const tokens = skill.id.split("-").filter(Boolean);
if (skill.id.length < 28 || tokens.length < 4) continue;
const deduped = [];
@@ -211,10 +505,11 @@ function buildAliases(skills) {
deduped.push(token);
}
const aliasTokens = deduped.length > 3
? [deduped[0], deduped[1], deduped[deduped.length - 1]]
: deduped;
const alias = unique(aliasTokens).join('-');
const aliasTokens =
deduped.length > 3
? [deduped[0], deduped[1], deduped[deduped.length - 1]]
: deduped;
const alias = unique(aliasTokens).join("-");
if (!alias || alias === skill.id) continue;
if (existingIds.has(alias) || used.has(alias)) continue;
@@ -241,11 +536,11 @@ function buildBundles(skills) {
for (const [bundleName, rule] of Object.entries(BUNDLE_RULES)) {
const bundleSkills = [];
const keywords = rule.keywords.map(keyword => keyword.toLowerCase());
const keywords = rule.keywords.map((keyword) => keyword.toLowerCase());
for (const skill of skills) {
const tokenSet = skillTokens.get(skill.id) || new Set();
if (keywords.some(keyword => tokenSet.has(keyword))) {
if (keywords.some((keyword) => tokenSet.has(keyword))) {
bundleSkills.push(skill.id);
}
}
@@ -256,49 +551,58 @@ function buildBundles(skills) {
};
}
const common = CURATED_COMMON.filter(skillId => skillTokens.has(skillId));
const common = CURATED_COMMON.filter((skillId) => skillTokens.has(skillId));
return { bundles, common };
}
function truncate(value, limit) {
if (!value || value.length <= limit) return value || '';
if (!value || value.length <= limit) return value || "";
return `${value.slice(0, limit - 3)}...`;
}
function renderCatalogMarkdown(catalog) {
const lines = [];
lines.push('# Skill Catalog');
lines.push('');
lines.push("# Skill Catalog");
lines.push("");
lines.push(`Generated at: ${catalog.generatedAt}`);
lines.push('');
lines.push("");
lines.push(`Total skills: ${catalog.total}`);
lines.push('');
lines.push("");
const categories = Array.from(new Set(catalog.skills.map(skill => skill.category))).sort();
const categories = Array.from(
new Set(catalog.skills.map((skill) => skill.category)),
).sort();
for (const category of categories) {
const grouped = catalog.skills.filter(skill => skill.category === category);
const grouped = catalog.skills.filter(
(skill) => skill.category === category,
);
lines.push(`## ${category} (${grouped.length})`);
lines.push('');
lines.push('| Skill | Description | Tags | Triggers |');
lines.push('| --- | --- | --- | --- |');
lines.push("");
lines.push("| Skill | Description | Tags | Triggers |");
lines.push("| --- | --- | --- | --- |");
for (const skill of grouped) {
const description = truncate(skill.description, 160).replace(/\|/g, '\\|');
const tags = skill.tags.join(', ');
const triggers = skill.triggers.join(', ');
lines.push(`| \`${skill.id}\` | ${description} | ${tags} | ${triggers} |`);
const description = truncate(skill.description, 160).replace(
/\|/g,
"\\|",
);
const tags = skill.tags.join(", ");
const triggers = skill.triggers.join(", ");
lines.push(
`| \`${skill.id}\` | ${description} | ${tags} | ${triggers} |`,
);
}
lines.push('');
lines.push("");
}
return lines.join('\n');
return lines.join("\n");
}
function buildCatalog() {
const skillRelPaths = listSkillIdsRecursive(SKILLS_DIR);
const skills = skillRelPaths.map(relPath => readSkill(SKILLS_DIR, relPath));
const skills = skillRelPaths.map((relPath) => readSkill(SKILLS_DIR, relPath));
const catalogSkills = [];
for (const skill of skills) {
@@ -318,26 +622,32 @@ function buildCatalog() {
}
const catalog = {
generatedAt: process.env.SOURCE_DATE_EPOCH
? new Date(process.env.SOURCE_DATE_EPOCH * 1000).toISOString()
: (process.env.CI ? '2026-02-08T00:00:00.000Z' : new Date().toISOString()),
generatedAt: process.env.SOURCE_DATE_EPOCH
? new Date(process.env.SOURCE_DATE_EPOCH * 1000).toISOString()
: "2026-02-08T00:00:00.000Z",
total: catalogSkills.length,
skills: catalogSkills.sort((a, b) => (a.id < b.id ? -1 : a.id > b.id ? 1 : 0)),
skills: catalogSkills.sort((a, b) =>
a.id < b.id ? -1 : a.id > b.id ? 1 : 0,
),
};
const aliases = buildAliases(catalog.skills);
const bundleData = buildBundles(catalog.skills);
const catalogPath = path.join(ROOT, 'data', 'catalog.json');
const catalogMarkdownPath = path.join(ROOT, 'CATALOG.md');
const bundlesPath = path.join(ROOT, 'data', 'bundles.json');
const aliasesPath = path.join(ROOT, 'data', 'aliases.json');
const catalogPath = path.join(ROOT, "data", "catalog.json");
const catalogMarkdownPath = path.join(ROOT, "CATALOG.md");
const bundlesPath = path.join(ROOT, "data", "bundles.json");
const aliasesPath = path.join(ROOT, "data", "aliases.json");
fs.writeFileSync(catalogPath, JSON.stringify(catalog, null, 2));
fs.writeFileSync(catalogMarkdownPath, renderCatalogMarkdown(catalog));
fs.writeFileSync(
bundlesPath,
JSON.stringify({ generatedAt: catalog.generatedAt, ...bundleData }, null, 2),
JSON.stringify(
{ generatedAt: catalog.generatedAt, ...bundleData },
null,
2,
),
);
fs.writeFileSync(
aliasesPath,