refactor: flatten Microsoft skills from nested to flat directory structure
Rewrote sync_microsoft_skills.py (v4) to use each SKILL.md's frontmatter 'name' field as the flat directory name under skills/, replacing the nested skills/official/microsoft/<lang>/<category>/<service>/ hierarchy. This fixes CI failures caused by the indexing, validation, and catalog scripts expecting skills/<id>/SKILL.md (depth 1). Changes: - Rewrite scripts/sync_microsoft_skills.py for flat output with collision detection - Update scripts/tests/inspect_microsoft_repo.py for flat name mapping - Update scripts/tests/test_comprehensive_coverage.py for name uniqueness checks - Delete skills/official/ nested directory - Add 129 Microsoft skills as flat directories (e.g. skills/azure-mgmt-botservice-dotnet/) - Move attribution files to docs/ (LICENSE-MICROSOFT, microsoft-skills-attribution.json) - Rebuild skills_index.json, CATALOG.md, README.md (845 total skills)
This commit is contained in:
@@ -1,149 +1,98 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Debug script to inspect Microsoft Skills repository structure - v2
|
||||
Handles all skill locations including plugins
|
||||
Inspect Microsoft Skills Repository Structure
|
||||
Shows the repository layout, skill locations, and what flat names would be generated.
|
||||
"""
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
MS_REPO = "https://github.com/microsoft/skills.git"
|
||||
|
||||
|
||||
def extract_skill_name(skill_md_path: Path) -> str | None:
|
||||
"""Extract the 'name' field from SKILL.md YAML frontmatter."""
|
||||
try:
|
||||
content = skill_md_path.read_text(encoding="utf-8")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
fm_match = re.search(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
|
||||
if not fm_match:
|
||||
return None
|
||||
|
||||
for line in fm_match.group(1).splitlines():
|
||||
match = re.match(r"^name:\s*(.+)$", line)
|
||||
if match:
|
||||
value = match.group(1).strip().strip("\"'")
|
||||
if value:
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def inspect_repo():
|
||||
"""Inspect the Microsoft skills repository structure"""
|
||||
"""Inspect the Microsoft skills repository structure."""
|
||||
print("🔍 Inspecting Microsoft Skills Repository Structure")
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
temp_path = Path(temp_dir)
|
||||
|
||||
|
||||
print("\n1️⃣ Cloning repository...")
|
||||
subprocess.run(
|
||||
["git", "clone", "--depth", "1", MS_REPO, str(temp_path)],
|
||||
check=True,
|
||||
capture_output=True
|
||||
capture_output=True,
|
||||
)
|
||||
|
||||
print("\n2️⃣ Repository structure:")
|
||||
print("\nTop-level directories:")
|
||||
for item in temp_path.iterdir():
|
||||
if item.is_dir():
|
||||
print(f" 📁 {item.name}/")
|
||||
|
||||
# Check .github/skills
|
||||
github_skills = temp_path / ".github" / "skills"
|
||||
if github_skills.exists():
|
||||
skill_dirs = [d for d in github_skills.iterdir() if d.is_dir()]
|
||||
print(f"\n3️⃣ Found {len(skill_dirs)} directories in .github/skills/:")
|
||||
for skill_dir in skill_dirs[:5]:
|
||||
has_skill_md = (skill_dir / "SKILL.md").exists()
|
||||
print(f" {'✅' if has_skill_md else '❌'} {skill_dir.name}")
|
||||
if len(skill_dirs) > 5:
|
||||
print(f" ... and {len(skill_dirs) - 5} more")
|
||||
|
||||
# Check .github/plugins
|
||||
github_plugins = temp_path / ".github" / "plugins"
|
||||
if github_plugins.exists():
|
||||
plugin_skills = list(github_plugins.rglob("SKILL.md"))
|
||||
print(f"\n🔌 Found {len(plugin_skills)} plugin skills in .github/plugins/:")
|
||||
for skill_file in plugin_skills[:5]:
|
||||
try:
|
||||
rel_path = skill_file.relative_to(github_plugins)
|
||||
print(f" ✅ {rel_path}")
|
||||
except ValueError:
|
||||
print(f" ✅ {skill_file.name}")
|
||||
if len(plugin_skills) > 5:
|
||||
print(f" ... and {len(plugin_skills) - 5} more")
|
||||
|
||||
# Check skills directory
|
||||
skills_dir = temp_path / "skills"
|
||||
if skills_dir.exists():
|
||||
print(f"\n4️⃣ Checking skills/ directory structure:")
|
||||
|
||||
# Count items
|
||||
all_items = list(skills_dir.rglob("*"))
|
||||
symlink_dirs = [s for s in all_items if s.is_symlink() and s.is_dir()]
|
||||
symlink_files = [s for s in all_items if s.is_symlink() and not s.is_dir()]
|
||||
regular_dirs = [s for s in all_items if s.is_dir() and not s.is_symlink()]
|
||||
|
||||
print(f" Total items: {len(all_items)}")
|
||||
print(f" Regular directories: {len(regular_dirs)}")
|
||||
print(f" Symlinked directories: {len(symlink_dirs)}")
|
||||
print(f" Symlinked files: {len(symlink_files)}")
|
||||
|
||||
# Show directory structure
|
||||
print(f"\n Top-level categories in skills/:")
|
||||
for item in skills_dir.iterdir():
|
||||
if item.is_dir():
|
||||
# Count subdirs
|
||||
subdirs = [d for d in item.iterdir() if d.is_dir()]
|
||||
print(f" 📁 {item.name}/ ({len(subdirs)} items)")
|
||||
|
||||
if symlink_dirs:
|
||||
print(f"\n Sample symlinked directories:")
|
||||
for symlink in symlink_dirs[:5]:
|
||||
try:
|
||||
target = symlink.resolve()
|
||||
relative = symlink.relative_to(skills_dir)
|
||||
target_name = target.name if target.exists() else "broken"
|
||||
print(f" {relative} → {target_name}")
|
||||
except:
|
||||
pass
|
||||
|
||||
# Check for all SKILL.md files
|
||||
print(f"\n5️⃣ Comprehensive SKILL.md search:")
|
||||
|
||||
# Find all SKILL.md files
|
||||
all_skill_mds = list(temp_path.rglob("SKILL.md"))
|
||||
print(f" Total SKILL.md files found: {len(all_skill_mds)}")
|
||||
|
||||
# Categorize by location
|
||||
locations = {}
|
||||
for skill_md in all_skill_mds:
|
||||
print(f"\n2️⃣ Total SKILL.md files found: {len(all_skill_mds)}")
|
||||
|
||||
# Show flat name mapping
|
||||
print(f"\n3️⃣ Flat Name Mapping (frontmatter 'name' → directory name):")
|
||||
print("-" * 60)
|
||||
|
||||
names_seen: dict[str, list[str]] = {}
|
||||
|
||||
for skill_md in sorted(all_skill_mds, key=lambda p: str(p)):
|
||||
try:
|
||||
if ".github/skills" in str(skill_md):
|
||||
loc = ".github/skills"
|
||||
elif ".github/plugins" in str(skill_md):
|
||||
loc = ".github/plugins"
|
||||
elif "/skills/" in str(skill_md):
|
||||
loc = "skills/ (structure)"
|
||||
else:
|
||||
loc = "other"
|
||||
|
||||
locations[loc] = locations.get(loc, 0) + 1
|
||||
except:
|
||||
pass
|
||||
|
||||
print(f"\n Distribution by location:")
|
||||
for loc, count in sorted(locations.items()):
|
||||
print(f" {loc}: {count}")
|
||||
|
||||
# Show sample skills from each major category
|
||||
print(f"\n6️⃣ Sample skills by category:")
|
||||
|
||||
if skills_dir.exists():
|
||||
for category in list(skills_dir.iterdir())[:3]:
|
||||
if category.is_dir():
|
||||
skills_in_cat = [s for s in category.rglob("*") if s.is_dir() and (s.is_symlink() or (s / "SKILL.md").exists())]
|
||||
print(f"\n {category.name}/ ({len(skills_in_cat)} skills):")
|
||||
for skill in skills_in_cat[:3]:
|
||||
try:
|
||||
rel = skill.relative_to(skills_dir)
|
||||
print(f" - {rel}")
|
||||
except:
|
||||
pass
|
||||
|
||||
print("\n7️⃣ Recommendations:")
|
||||
print(" ✅ Preserve skills/ directory structure (Microsoft's organization)")
|
||||
print(" ✅ Resolve symlinks to actual content in .github/skills/")
|
||||
print(" ✅ Include plugin skills from .github/plugins/")
|
||||
print(" ✅ This gives you the cleanest, most maintainable structure")
|
||||
|
||||
rel = skill_md.parent.relative_to(temp_path)
|
||||
except ValueError:
|
||||
rel = skill_md.parent
|
||||
|
||||
name = extract_skill_name(skill_md)
|
||||
display_name = name if name else f"(no name → ms-{'-'.join(rel.parts[1:])})"
|
||||
|
||||
print(f" {rel} → {display_name}")
|
||||
|
||||
effective_name = name if name else f"ms-{'-'.join(rel.parts[1:])}"
|
||||
if effective_name not in names_seen:
|
||||
names_seen[effective_name] = []
|
||||
names_seen[effective_name].append(str(rel))
|
||||
|
||||
# Collision check
|
||||
collisions = {n: paths for n, paths in names_seen.items()
|
||||
if len(paths) > 1}
|
||||
if collisions:
|
||||
print(f"\n4️⃣ ⚠️ Name Collisions Detected ({len(collisions)}):")
|
||||
for name, paths in collisions.items():
|
||||
print(f" '{name}':")
|
||||
for p in paths:
|
||||
print(f" - {p}")
|
||||
else:
|
||||
print(
|
||||
f"\n4️⃣ ✅ No name collisions — all {len(names_seen)} names are unique!")
|
||||
|
||||
print("\n✨ Inspection complete!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
inspect_repo()
|
||||
except Exception as e:
|
||||
print(f"\n❌ Error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
traceback.print_exc()
|
||||
|
||||
Reference in New Issue
Block a user