feat: implement Phase 2 Automation & CI (validate_skills, generate_index, ci.yml) [skip ci]

2026-01-25 19:19:51 +01:00
parent 1557826c5d
commit 4fe8a1e6a4
4 changed files with 1210 additions and 324 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,34 @@
 name: Skills Registry CI
 on:
  push:
    branches: ["main", "feat/*"]
  pull_request:
    branches: ["main"]
 jobs:
  validate-and-build:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"
      - name: 🔍 Validate Skills (Strict Mode)
        run: |
          python3 scripts/validate_skills.py --strict
      - name: 🏗️ Generate Index
        run: |
          python3 scripts/generate_index.py
      - name: 📝 Update README
        run: |
          python3 scripts/update_readme.py
      - name: 🚨 Check for Uncommitted Drift
        run: |
          git diff --exit-code || (echo "❌ Detected uncommitted changes in README.md or skills_index.json. Please run scripts locally and commit." && exit 1)
--- a/scripts/generate_index.py
+++ b/scripts/generate_index.py
@@ -2,69 +2,90 @@ import os
 import json
 import re
 def parse_frontmatter(content):
    """
    Simple frontmatter parser using regex (consistent with validate_skills.py).
    """
    fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
    if not fm_match:
        return {}
    fm_text = fm_match.group(1)
    metadata = {}
    for line in fm_text.split('\n'):
        if ':' in line:
            key, val = line.split(':', 1)
            metadata[key.strip()] = val.strip().strip('"').strip("'")
    return metadata
 def generate_index(skills_dir, output_file):
    print(f"🏗️ Generating index from: {skills_dir}")
    skills = []
    for root, dirs, files in os.walk(skills_dir):
-        # Skip .disabled directories
+        # Skip .disabled or hidden directories
-        dirs[:] = [d for d in dirs if d != '.disabled']
+        dirs[:] = [d for d in dirs if not d.startswith('.')]
        if "SKILL.md" in files:
            skill_path = os.path.join(root, "SKILL.md")
            dir_name = os.path.basename(root)
            parent_dir = os.path.basename(os.path.dirname(root))
            # Default values
            skill_info = {
                "id": dir_name,
                "path": os.path.relpath(root, os.path.dirname(skills_dir)),
                "category": parent_dir if parent_dir != "skills" else "uncategorized",
                "name": dir_name.replace("-", " ").title(),
-                "description": ""
+                "description": "",
                "risk": "unknown",
                "source": "unknown"
            }
-            with open(skill_path, 'r', encoding='utf-8') as f:
+            try:
-                content = f.read()
+                with open(skill_path, 'r', encoding='utf-8') as f:
-                
+                    content = f.read()
-                # Try to extract from frontmatter first
+            except Exception as e:
-                fm_match = re.search(r'^---\s*(.*?)\s*---', content, re.DOTALL)
+                print(f"⚠️ Error reading {skill_path}: {e}")
-                if fm_match:
+                continue
-                    fm_content = fm_match.group(1)
+
-                    name_fm = re.search(r'^name:\s*(.+)$', fm_content, re.MULTILINE)
+            # Parse Metadata
-                    desc_fm = re.search(r'^description:\s*(.+)$', fm_content, re.MULTILINE)
+            metadata = parse_frontmatter(content)
                    if name_fm:
                        skill_info["name"] = name_fm.group(1).strip()
                    if desc_fm:
                        skill_info["description"] = desc_fm.group(1).strip()
                # Fallback to Header and First Paragraph if needed
                if not skill_info["description"] or skill_info["description"] == "":
                    name_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
                    if name_match and not fm_match: # Only override if no frontmatter name
                         skill_info["name"] = name_match.group(1).strip()
                    # Extract first paragraph
                    body = content
                    if fm_match:
                        body = content[fm_match.end():].strip()
                    lines = body.split('\n')
                    desc_lines = []
                    for line in lines:
                        if line.startswith('#') or not line.strip():
                            if desc_lines: break
                            continue
                        desc_lines.append(line.strip())
                    if desc_lines:
                        skill_info["description"] = " ".join(desc_lines)[:150] + "..."
            # Merge Metadata
            if "name" in metadata: skill_info["name"] = metadata["name"]
            if "description" in metadata: skill_info["description"] = metadata["description"]
            if "risk" in metadata: skill_info["risk"] = metadata["risk"]
            if "source" in metadata: skill_info["source"] = metadata["source"]
            # Fallback for description if missing in frontmatter (legacy support)
            if not skill_info["description"]:
                body = content
                fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
                if fm_match:
                    body = content[fm_match.end():].strip()
                # Simple extraction of first non-header paragraph
                lines = body.split('\n')
                desc_lines = []
                for line in lines:
                    if line.startswith('#') or not line.strip():
                        if desc_lines: break
                        continue
                    desc_lines.append(line.strip())
                if desc_lines:
                    skill_info["description"] = " ".join(desc_lines)[:250].strip()
            skills.append(skill_info)
    # Sort validation: by name
    skills.sort(key=lambda x: x["name"].lower())
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(skills, f, indent=2)
-    print(f"✅ Generated index with {len(skills)} skills at: {output_file}")
+    print(f"✅ Generated rich index with {len(skills)} skills at: {output_file}")
    return skills
 if __name__ == "__main__":
--- a/scripts/validate_skills.py
+++ b/scripts/validate_skills.py
@@ -1,52 +1,124 @@
 import os
 import re
 import argparse
 import sys
-def validate_skills(skills_dir):
+def parse_frontmatter(content):
    """
    Simple frontmatter parser using regex to avoid external dependencies.
    Returns a dict of key-values.
    """
    fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
    if not fm_match:
        return None
    fm_text = fm_match.group(1)
    metadata = {}
    for line in fm_text.split('\n'):
        if ':' in line:
            key, val = line.split(':', 1)
            metadata[key.strip()] = val.strip().strip('"').strip("'")
    return metadata
 def validate_skills(skills_dir, strict_mode=False):
    print(f"🔍 Validating skills in: {skills_dir}")
    print(f"⚙️  Mode: {'STRICT (CI)' if strict_mode else 'Standard (Dev)'}")
    errors = []
    warnings = []
    skill_count = 0
    # Pre-compiled regex
    security_disclaimer_pattern = re.compile(r"AUTHORIZED USE ONLY", re.IGNORECASE)
    trigger_section_pattern = re.compile(r"^##\s+When to Use", re.MULTILINE | re.IGNORECASE)
    valid_risk_levels = ["none", "safe", "critical", "offensive"]
    for root, dirs, files in os.walk(skills_dir):
-        # Skip .disabled directories
+        # Skip .disabled or hidden directories
-        dirs[:] = [d for d in dirs if d != '.disabled']
+        dirs[:] = [d for d in dirs if not d.startswith('.')]
        if "SKILL.md" in files:
            skill_count += 1
            skill_path = os.path.join(root, "SKILL.md")
            rel_path = os.path.relpath(skill_path, skills_dir)
-            with open(skill_path, 'r', encoding='utf-8') as f:
+            try:
-                content = f.read()
+                with open(skill_path, 'r', encoding='utf-8') as f:
                    content = f.read()
            except Exception as e:
                errors.append(f"❌ {rel_path}: Unreadable file - {str(e)}")
                continue
-            # Check for Frontmatter or Header
+            # 1. Frontmatter Check
-            has_frontmatter = content.strip().startswith("---")
+            metadata = parse_frontmatter(content)
-            has_header = re.search(r'^#\s+', content, re.MULTILINE)
+            if not metadata:
-            
+                errors.append(f"❌ {rel_path}: Missing or malformed YAML frontmatter")
-            if not (has_frontmatter or has_header):
+                continue # Cannot proceed without metadata
-                errors.append(f"❌ {rel_path}: Missing frontmatter or top-level heading")
+
-            
+            # 2. Metadata Schema Checks
-            if has_frontmatter:
+            if "name" not in metadata:
-                # Basic check for name and description in frontmatter
+                errors.append(f"❌ {rel_path}: Missing 'name' in frontmatter")
-                fm_match = re.search(r'^---\s*(.*?)\s*---', content, re.DOTALL)
+            elif metadata["name"] != os.path.basename(root):
-                if fm_match:
+                warnings.append(f"⚠️  {rel_path}: Name '{metadata['name']}' does not match folder name '{os.path.basename(root)}'")
-                    fm_content = fm_match.group(1)
+
-                    if "name:" not in fm_content:
+            if "description" not in metadata:
-                        errors.append(f"⚠️  {rel_path}: Frontmatter missing 'name:'")
+                errors.append(f"❌ {rel_path}: Missing 'description' in frontmatter")
-                    if "description:" not in fm_content:
+
-                        errors.append(f"⚠️  {rel_path}: Frontmatter missing 'description:'")
+            # Risk Validation (Quality Bar)
-                else:
+            if "risk" not in metadata:
-                    errors.append(f"❌ {rel_path}: Malformed frontmatter")
+                msg = f"⚠️  {rel_path}: Missing 'risk' label (defaulting to 'unknown')"
                if strict_mode: errors.append(msg.replace("⚠️", "❌"))
                else: warnings.append(msg)
            elif metadata["risk"] not in valid_risk_levels:
                errors.append(f"❌ {rel_path}: Invalid risk level '{metadata['risk']}'. Must be one of {valid_risk_levels}")
            # Source Validation
            if "source" not in metadata:
                msg = f"⚠️  {rel_path}: Missing 'source' attribution"
                if strict_mode: errors.append(msg.replace("⚠️", "❌"))
                else: warnings.append(msg)
            # 3. Content Checks (Triggers)
            if not trigger_section_pattern.search(content):
                msg = f"⚠️  {rel_path}: Missing '## When to Use' section"
                if strict_mode: errors.append(msg.replace("⚠️", "❌"))
                else: warnings.append(msg)
            # 4. Security Guardrails
            if metadata.get("risk") == "offensive":
                if not security_disclaimer_pattern.search(content):
                    errors.append(f"🚨 {rel_path}: OFFENSIVE SKILL MISSING SECURITY DISCLAIMER! (Must contain 'AUTHORIZED USE ONLY')")
    # Reporting
    print(f"\n📊 Checked {skill_count} skills.")
    if warnings:
        print(f"\n⚠️  Found {len(warnings)} Warnings:")
        for w in warnings:
            print(w)
    print(f"✅ Found and checked {skill_count} skills.")
    if errors:
-        print("\n⚠️  Validation Results:")
+        print(f"\n❌ Found {len(errors)} Critical Errors:")
-        for err in errors:
+        for e in errors:
-            print(err)
+            print(e)
        return False
-    else:
+
-        print("✨ All skills passed basic validation!")
+    if strict_mode and warnings:
-        return True
+        print("\n❌ STRICT MODE: Failed due to warnings.")
        return False
    print("\n✨ All skills passed validation!")
    return True
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Validate Antigravity Skills")
    parser.add_argument("--strict", action="store_true", help="Fail on warnings (for CI)")
    args = parser.parse_args()
    base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    skills_path = os.path.join(base_dir, "skills")
-    validate_skills(skills_path)
+    
    success = validate_skills(skills_path, strict_mode=args.strict)
    if not success:
        sys.exit(1)
--- a/skills_index.json
+++ b/skills_index.json