feat: integrate last30days and daily-news-report skills

2026-01-26 19:05:37 +01:00
parent d2569f2107
commit c7f7f23bd7
45 changed files with 7632 additions and 0 deletions
--- a/skills/last30days/scripts/lib/schema.py
+++ b/skills/last30days/scripts/lib/schema.py
@@ -0,0 +1,336 @@
+"""Data schemas for last30days skill."""
+
+from dataclasses import dataclass, field, asdict
+from typing import Any, Dict, List, Optional
+from datetime import datetime, timezone
+
+
+@dataclass
+class Engagement:
+    """Engagement metrics."""
+    # Reddit fields
+    score: Optional[int] = None
+    num_comments: Optional[int] = None
+    upvote_ratio: Optional[float] = None
+
+    # X fields
+    likes: Optional[int] = None
+    reposts: Optional[int] = None
+    replies: Optional[int] = None
+    quotes: Optional[int] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        d = {}
+        if self.score is not None:
+            d['score'] = self.score
+        if self.num_comments is not None:
+            d['num_comments'] = self.num_comments
+        if self.upvote_ratio is not None:
+            d['upvote_ratio'] = self.upvote_ratio
+        if self.likes is not None:
+            d['likes'] = self.likes
+        if self.reposts is not None:
+            d['reposts'] = self.reposts
+        if self.replies is not None:
+            d['replies'] = self.replies
+        if self.quotes is not None:
+            d['quotes'] = self.quotes
+        return d if d else None
+
+
+@dataclass
+class Comment:
+    """Reddit comment."""
+    score: int
+    date: Optional[str]
+    author: str
+    excerpt: str
+    url: str
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            'score': self.score,
+            'date': self.date,
+            'author': self.author,
+            'excerpt': self.excerpt,
+            'url': self.url,
+        }
+
+
+@dataclass
+class SubScores:
+    """Component scores."""
+    relevance: int = 0
+    recency: int = 0
+    engagement: int = 0
+
+    def to_dict(self) -> Dict[str, int]:
+        return {
+            'relevance': self.relevance,
+            'recency': self.recency,
+            'engagement': self.engagement,
+        }
+
+
+@dataclass
+class RedditItem:
+    """Normalized Reddit item."""
+    id: str
+    title: str
+    url: str
+    subreddit: str
+    date: Optional[str] = None
+    date_confidence: str = "low"
+    engagement: Optional[Engagement] = None
+    top_comments: List[Comment] = field(default_factory=list)
+    comment_insights: List[str] = field(default_factory=list)
+    relevance: float = 0.5
+    why_relevant: str = ""
+    subs: SubScores = field(default_factory=SubScores)
+    score: int = 0
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            'id': self.id,
+            'title': self.title,
+            'url': self.url,
+            'subreddit': self.subreddit,
+            'date': self.date,
+            'date_confidence': self.date_confidence,
+            'engagement': self.engagement.to_dict() if self.engagement else None,
+            'top_comments': [c.to_dict() for c in self.top_comments],
+            'comment_insights': self.comment_insights,
+            'relevance': self.relevance,
+            'why_relevant': self.why_relevant,
+            'subs': self.subs.to_dict(),
+            'score': self.score,
+        }
+
+
+@dataclass
+class XItem:
+    """Normalized X item."""
+    id: str
+    text: str
+    url: str
+    author_handle: str
+    date: Optional[str] = None
+    date_confidence: str = "low"
+    engagement: Optional[Engagement] = None
+    relevance: float = 0.5
+    why_relevant: str = ""
+    subs: SubScores = field(default_factory=SubScores)
+    score: int = 0
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            'id': self.id,
+            'text': self.text,
+            'url': self.url,
+            'author_handle': self.author_handle,
+            'date': self.date,
+            'date_confidence': self.date_confidence,
+            'engagement': self.engagement.to_dict() if self.engagement else None,
+            'relevance': self.relevance,
+            'why_relevant': self.why_relevant,
+            'subs': self.subs.to_dict(),
+            'score': self.score,
+        }
+
+
+@dataclass
+class WebSearchItem:
+    """Normalized web search item (no engagement metrics)."""
+    id: str
+    title: str
+    url: str
+    source_domain: str  # e.g., "medium.com", "github.com"
+    snippet: str
+    date: Optional[str] = None
+    date_confidence: str = "low"
+    relevance: float = 0.5
+    why_relevant: str = ""
+    subs: SubScores = field(default_factory=SubScores)
+    score: int = 0
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            'id': self.id,
+            'title': self.title,
+            'url': self.url,
+            'source_domain': self.source_domain,
+            'snippet': self.snippet,
+            'date': self.date,
+            'date_confidence': self.date_confidence,
+            'relevance': self.relevance,
+            'why_relevant': self.why_relevant,
+            'subs': self.subs.to_dict(),
+            'score': self.score,
+        }
+
+
+@dataclass
+class Report:
+    """Full research report."""
+    topic: str
+    range_from: str
+    range_to: str
+    generated_at: str
+    mode: str  # 'reddit-only', 'x-only', 'both', 'web-only', etc.
+    openai_model_used: Optional[str] = None
+    xai_model_used: Optional[str] = None
+    reddit: List[RedditItem] = field(default_factory=list)
+    x: List[XItem] = field(default_factory=list)
+    web: List[WebSearchItem] = field(default_factory=list)
+    best_practices: List[str] = field(default_factory=list)
+    prompt_pack: List[str] = field(default_factory=list)
+    context_snippet_md: str = ""
+    # Status tracking
+    reddit_error: Optional[str] = None
+    x_error: Optional[str] = None
+    web_error: Optional[str] = None
+    # Cache info
+    from_cache: bool = False
+    cache_age_hours: Optional[float] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        d = {
+            'topic': self.topic,
+            'range': {
+                'from': self.range_from,
+                'to': self.range_to,
+            },
+            'generated_at': self.generated_at,
+            'mode': self.mode,
+            'openai_model_used': self.openai_model_used,
+            'xai_model_used': self.xai_model_used,
+            'reddit': [r.to_dict() for r in self.reddit],
+            'x': [x.to_dict() for x in self.x],
+            'web': [w.to_dict() for w in self.web],
+            'best_practices': self.best_practices,
+            'prompt_pack': self.prompt_pack,
+            'context_snippet_md': self.context_snippet_md,
+        }
+        if self.reddit_error:
+            d['reddit_error'] = self.reddit_error
+        if self.x_error:
+            d['x_error'] = self.x_error
+        if self.web_error:
+            d['web_error'] = self.web_error
+        if self.from_cache:
+            d['from_cache'] = self.from_cache
+        if self.cache_age_hours is not None:
+            d['cache_age_hours'] = self.cache_age_hours
+        return d
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "Report":
+        """Create Report from serialized dict (handles cache format)."""
+        # Handle range field conversion
+        range_data = data.get('range', {})
+        range_from = range_data.get('from', data.get('range_from', ''))
+        range_to = range_data.get('to', data.get('range_to', ''))
+
+        # Reconstruct Reddit items
+        reddit_items = []
+        for r in data.get('reddit', []):
+            eng = None
+            if r.get('engagement'):
+                eng = Engagement(**r['engagement'])
+            comments = [Comment(**c) for c in r.get('top_comments', [])]
+            subs = SubScores(**r.get('subs', {})) if r.get('subs') else SubScores()
+            reddit_items.append(RedditItem(
+                id=r['id'],
+                title=r['title'],
+                url=r['url'],
+                subreddit=r['subreddit'],
+                date=r.get('date'),
+                date_confidence=r.get('date_confidence', 'low'),
+                engagement=eng,
+                top_comments=comments,
+                comment_insights=r.get('comment_insights', []),
+                relevance=r.get('relevance', 0.5),
+                why_relevant=r.get('why_relevant', ''),
+                subs=subs,
+                score=r.get('score', 0),
+            ))
+
+        # Reconstruct X items
+        x_items = []
+        for x in data.get('x', []):
+            eng = None
+            if x.get('engagement'):
+                eng = Engagement(**x['engagement'])
+            subs = SubScores(**x.get('subs', {})) if x.get('subs') else SubScores()
+            x_items.append(XItem(
+                id=x['id'],
+                text=x['text'],
+                url=x['url'],
+                author_handle=x['author_handle'],
+                date=x.get('date'),
+                date_confidence=x.get('date_confidence', 'low'),
+                engagement=eng,
+                relevance=x.get('relevance', 0.5),
+                why_relevant=x.get('why_relevant', ''),
+                subs=subs,
+                score=x.get('score', 0),
+            ))
+
+        # Reconstruct Web items
+        web_items = []
+        for w in data.get('web', []):
+            subs = SubScores(**w.get('subs', {})) if w.get('subs') else SubScores()
+            web_items.append(WebSearchItem(
+                id=w['id'],
+                title=w['title'],
+                url=w['url'],
+                source_domain=w.get('source_domain', ''),
+                snippet=w.get('snippet', ''),
+                date=w.get('date'),
+                date_confidence=w.get('date_confidence', 'low'),
+                relevance=w.get('relevance', 0.5),
+                why_relevant=w.get('why_relevant', ''),
+                subs=subs,
+                score=w.get('score', 0),
+            ))
+
+        return cls(
+            topic=data['topic'],
+            range_from=range_from,
+            range_to=range_to,
+            generated_at=data['generated_at'],
+            mode=data['mode'],
+            openai_model_used=data.get('openai_model_used'),
+            xai_model_used=data.get('xai_model_used'),
+            reddit=reddit_items,
+            x=x_items,
+            web=web_items,
+            best_practices=data.get('best_practices', []),
+            prompt_pack=data.get('prompt_pack', []),
+            context_snippet_md=data.get('context_snippet_md', ''),
+            reddit_error=data.get('reddit_error'),
+            x_error=data.get('x_error'),
+            web_error=data.get('web_error'),
+            from_cache=data.get('from_cache', False),
+            cache_age_hours=data.get('cache_age_hours'),
+        )
+
+
+def create_report(
+    topic: str,
+    from_date: str,
+    to_date: str,
+    mode: str,
+    openai_model: Optional[str] = None,
+    xai_model: Optional[str] = None,
+) -> Report:
+    """Create a new report with metadata."""
+    return Report(
+        topic=topic,
+        range_from=from_date,
+        range_to=to_date,
+        generated_at=datetime.now(timezone.utc).isoformat(),
+        mode=mode,
+        openai_model_used=openai_model,
+        xai_model_used=xai_model,
+    )