refactor: flatten Microsoft skills from nested to flat directory structure

Rewrote sync_microsoft_skills.py (v4) to use each SKILL.md's frontmatter
'name' field as the flat directory name under skills/, replacing the nested
skills/official/microsoft/<lang>/<category>/<service>/ hierarchy.

This fixes CI failures caused by the indexing, validation, and catalog
scripts expecting skills/<id>/SKILL.md (depth 1).

Changes:
- Rewrite scripts/sync_microsoft_skills.py for flat output with collision detection
- Update scripts/tests/inspect_microsoft_repo.py for flat name mapping
- Update scripts/tests/test_comprehensive_coverage.py for name uniqueness checks
- Delete skills/official/ nested directory
- Add 129 Microsoft skills as flat directories (e.g. skills/azure-mgmt-botservice-dotnet/)
- Move attribution files to docs/ (LICENSE-MICROSOFT, microsoft-skills-attribution.json)
- Rebuild skills_index.json, CATALOG.md, README.md (845 total skills)
This commit is contained in:
Ahmed Rehan
2026-02-12 00:07:15 +05:00
parent e06454dafd
commit e7ae616385
142 changed files with 5683 additions and 6097 deletions

View File

@@ -1,149 +1,98 @@
#!/usr/bin/env python3
"""
Debug script to inspect Microsoft Skills repository structure - v2
Handles all skill locations including plugins
Inspect Microsoft Skills Repository Structure
Shows the repository layout, skill locations, and what flat names would be generated.
"""
import re
import subprocess
import tempfile
from pathlib import Path
MS_REPO = "https://github.com/microsoft/skills.git"
def extract_skill_name(skill_md_path: Path) -> str | None:
"""Extract the 'name' field from SKILL.md YAML frontmatter."""
try:
content = skill_md_path.read_text(encoding="utf-8")
except Exception:
return None
fm_match = re.search(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
if not fm_match:
return None
for line in fm_match.group(1).splitlines():
match = re.match(r"^name:\s*(.+)$", line)
if match:
value = match.group(1).strip().strip("\"'")
if value:
return value
return None
def inspect_repo():
"""Inspect the Microsoft skills repository structure"""
"""Inspect the Microsoft skills repository structure."""
print("🔍 Inspecting Microsoft Skills Repository Structure")
print("=" * 60)
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
print("\n1⃣ Cloning repository...")
subprocess.run(
["git", "clone", "--depth", "1", MS_REPO, str(temp_path)],
check=True,
capture_output=True
capture_output=True,
)
print("\n2⃣ Repository structure:")
print("\nTop-level directories:")
for item in temp_path.iterdir():
if item.is_dir():
print(f" 📁 {item.name}/")
# Check .github/skills
github_skills = temp_path / ".github" / "skills"
if github_skills.exists():
skill_dirs = [d for d in github_skills.iterdir() if d.is_dir()]
print(f"\n3⃣ Found {len(skill_dirs)} directories in .github/skills/:")
for skill_dir in skill_dirs[:5]:
has_skill_md = (skill_dir / "SKILL.md").exists()
print(f" {'' if has_skill_md else ''} {skill_dir.name}")
if len(skill_dirs) > 5:
print(f" ... and {len(skill_dirs) - 5} more")
# Check .github/plugins
github_plugins = temp_path / ".github" / "plugins"
if github_plugins.exists():
plugin_skills = list(github_plugins.rglob("SKILL.md"))
print(f"\n🔌 Found {len(plugin_skills)} plugin skills in .github/plugins/:")
for skill_file in plugin_skills[:5]:
try:
rel_path = skill_file.relative_to(github_plugins)
print(f"{rel_path}")
except ValueError:
print(f"{skill_file.name}")
if len(plugin_skills) > 5:
print(f" ... and {len(plugin_skills) - 5} more")
# Check skills directory
skills_dir = temp_path / "skills"
if skills_dir.exists():
print(f"\n4⃣ Checking skills/ directory structure:")
# Count items
all_items = list(skills_dir.rglob("*"))
symlink_dirs = [s for s in all_items if s.is_symlink() and s.is_dir()]
symlink_files = [s for s in all_items if s.is_symlink() and not s.is_dir()]
regular_dirs = [s for s in all_items if s.is_dir() and not s.is_symlink()]
print(f" Total items: {len(all_items)}")
print(f" Regular directories: {len(regular_dirs)}")
print(f" Symlinked directories: {len(symlink_dirs)}")
print(f" Symlinked files: {len(symlink_files)}")
# Show directory structure
print(f"\n Top-level categories in skills/:")
for item in skills_dir.iterdir():
if item.is_dir():
# Count subdirs
subdirs = [d for d in item.iterdir() if d.is_dir()]
print(f" 📁 {item.name}/ ({len(subdirs)} items)")
if symlink_dirs:
print(f"\n Sample symlinked directories:")
for symlink in symlink_dirs[:5]:
try:
target = symlink.resolve()
relative = symlink.relative_to(skills_dir)
target_name = target.name if target.exists() else "broken"
print(f" {relative}{target_name}")
except:
pass
# Check for all SKILL.md files
print(f"\n5⃣ Comprehensive SKILL.md search:")
# Find all SKILL.md files
all_skill_mds = list(temp_path.rglob("SKILL.md"))
print(f" Total SKILL.md files found: {len(all_skill_mds)}")
# Categorize by location
locations = {}
for skill_md in all_skill_mds:
print(f"\n2 Total SKILL.md files found: {len(all_skill_mds)}")
# Show flat name mapping
print(f"\n3⃣ Flat Name Mapping (frontmatter 'name' → directory name):")
print("-" * 60)
names_seen: dict[str, list[str]] = {}
for skill_md in sorted(all_skill_mds, key=lambda p: str(p)):
try:
if ".github/skills" in str(skill_md):
loc = ".github/skills"
elif ".github/plugins" in str(skill_md):
loc = ".github/plugins"
elif "/skills/" in str(skill_md):
loc = "skills/ (structure)"
else:
loc = "other"
locations[loc] = locations.get(loc, 0) + 1
except:
pass
print(f"\n Distribution by location:")
for loc, count in sorted(locations.items()):
print(f" {loc}: {count}")
# Show sample skills from each major category
print(f"\n6️⃣ Sample skills by category:")
if skills_dir.exists():
for category in list(skills_dir.iterdir())[:3]:
if category.is_dir():
skills_in_cat = [s for s in category.rglob("*") if s.is_dir() and (s.is_symlink() or (s / "SKILL.md").exists())]
print(f"\n {category.name}/ ({len(skills_in_cat)} skills):")
for skill in skills_in_cat[:3]:
try:
rel = skill.relative_to(skills_dir)
print(f" - {rel}")
except:
pass
print("\n7⃣ Recommendations:")
print(" ✅ Preserve skills/ directory structure (Microsoft's organization)")
print(" ✅ Resolve symlinks to actual content in .github/skills/")
print(" ✅ Include plugin skills from .github/plugins/")
print(" ✅ This gives you the cleanest, most maintainable structure")
rel = skill_md.parent.relative_to(temp_path)
except ValueError:
rel = skill_md.parent
name = extract_skill_name(skill_md)
display_name = name if name else f"(no name → ms-{'-'.join(rel.parts[1:])})"
print(f" {rel}{display_name}")
effective_name = name if name else f"ms-{'-'.join(rel.parts[1:])}"
if effective_name not in names_seen:
names_seen[effective_name] = []
names_seen[effective_name].append(str(rel))
# Collision check
collisions = {n: paths for n, paths in names_seen.items()
if len(paths) > 1}
if collisions:
print(f"\n4️⃣ ⚠️ Name Collisions Detected ({len(collisions)}):")
for name, paths in collisions.items():
print(f" '{name}':")
for p in paths:
print(f" - {p}")
else:
print(
f"\n4⃣ ✅ No name collisions — all {len(names_seen)} names are unique!")
print("\n✨ Inspection complete!")
if __name__ == "__main__":
try:
inspect_repo()
except Exception as e:
print(f"\n❌ Error: {e}")
import traceback
traceback.print_exc()
traceback.print_exc()