feat: integrate last30days and daily-news-report skills
This commit is contained in:
521
skills/last30days/scripts/last30days.py
Normal file
521
skills/last30days/scripts/last30days.py
Normal file
@@ -0,0 +1,521 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
last30days - Research a topic from the last 30 days on Reddit + X.
|
||||
|
||||
Usage:
|
||||
python3 last30days.py <topic> [options]
|
||||
|
||||
Options:
|
||||
--mock Use fixtures instead of real API calls
|
||||
--emit=MODE Output mode: compact|json|md|context|path (default: compact)
|
||||
--sources=MODE Source selection: auto|reddit|x|both (default: auto)
|
||||
--quick Faster research with fewer sources (8-12 each)
|
||||
--deep Comprehensive research with more sources (50-70 Reddit, 40-60 X)
|
||||
--debug Enable verbose debug logging
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
# Add lib to path
|
||||
SCRIPT_DIR = Path(__file__).parent.resolve()
|
||||
sys.path.insert(0, str(SCRIPT_DIR))
|
||||
|
||||
from lib import (
|
||||
dates,
|
||||
dedupe,
|
||||
env,
|
||||
http,
|
||||
models,
|
||||
normalize,
|
||||
openai_reddit,
|
||||
reddit_enrich,
|
||||
render,
|
||||
schema,
|
||||
score,
|
||||
ui,
|
||||
websearch,
|
||||
xai_x,
|
||||
)
|
||||
|
||||
|
||||
def load_fixture(name: str) -> dict:
|
||||
"""Load a fixture file."""
|
||||
fixture_path = SCRIPT_DIR.parent / "fixtures" / name
|
||||
if fixture_path.exists():
|
||||
with open(fixture_path) as f:
|
||||
return json.load(f)
|
||||
return {}
|
||||
|
||||
|
||||
def _search_reddit(
|
||||
topic: str,
|
||||
config: dict,
|
||||
selected_models: dict,
|
||||
from_date: str,
|
||||
to_date: str,
|
||||
depth: str,
|
||||
mock: bool,
|
||||
) -> tuple:
|
||||
"""Search Reddit via OpenAI (runs in thread).
|
||||
|
||||
Returns:
|
||||
Tuple of (reddit_items, raw_openai, error)
|
||||
"""
|
||||
raw_openai = None
|
||||
reddit_error = None
|
||||
|
||||
if mock:
|
||||
raw_openai = load_fixture("openai_sample.json")
|
||||
else:
|
||||
try:
|
||||
raw_openai = openai_reddit.search_reddit(
|
||||
config["OPENAI_API_KEY"],
|
||||
selected_models["openai"],
|
||||
topic,
|
||||
from_date,
|
||||
to_date,
|
||||
depth=depth,
|
||||
)
|
||||
except http.HTTPError as e:
|
||||
raw_openai = {"error": str(e)}
|
||||
reddit_error = f"API error: {e}"
|
||||
except Exception as e:
|
||||
raw_openai = {"error": str(e)}
|
||||
reddit_error = f"{type(e).__name__}: {e}"
|
||||
|
||||
# Parse response
|
||||
reddit_items = openai_reddit.parse_reddit_response(raw_openai or {})
|
||||
|
||||
# Quick retry with simpler query if few results
|
||||
if len(reddit_items) < 5 and not mock and not reddit_error:
|
||||
core = openai_reddit._extract_core_subject(topic)
|
||||
if core.lower() != topic.lower():
|
||||
try:
|
||||
retry_raw = openai_reddit.search_reddit(
|
||||
config["OPENAI_API_KEY"],
|
||||
selected_models["openai"],
|
||||
core,
|
||||
from_date, to_date,
|
||||
depth=depth,
|
||||
)
|
||||
retry_items = openai_reddit.parse_reddit_response(retry_raw)
|
||||
# Add items not already found (by URL)
|
||||
existing_urls = {item.get("url") for item in reddit_items}
|
||||
for item in retry_items:
|
||||
if item.get("url") not in existing_urls:
|
||||
reddit_items.append(item)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return reddit_items, raw_openai, reddit_error
|
||||
|
||||
|
||||
def _search_x(
|
||||
topic: str,
|
||||
config: dict,
|
||||
selected_models: dict,
|
||||
from_date: str,
|
||||
to_date: str,
|
||||
depth: str,
|
||||
mock: bool,
|
||||
) -> tuple:
|
||||
"""Search X via xAI (runs in thread).
|
||||
|
||||
Returns:
|
||||
Tuple of (x_items, raw_xai, error)
|
||||
"""
|
||||
raw_xai = None
|
||||
x_error = None
|
||||
|
||||
if mock:
|
||||
raw_xai = load_fixture("xai_sample.json")
|
||||
else:
|
||||
try:
|
||||
raw_xai = xai_x.search_x(
|
||||
config["XAI_API_KEY"],
|
||||
selected_models["xai"],
|
||||
topic,
|
||||
from_date,
|
||||
to_date,
|
||||
depth=depth,
|
||||
)
|
||||
except http.HTTPError as e:
|
||||
raw_xai = {"error": str(e)}
|
||||
x_error = f"API error: {e}"
|
||||
except Exception as e:
|
||||
raw_xai = {"error": str(e)}
|
||||
x_error = f"{type(e).__name__}: {e}"
|
||||
|
||||
# Parse response
|
||||
x_items = xai_x.parse_x_response(raw_xai or {})
|
||||
|
||||
return x_items, raw_xai, x_error
|
||||
|
||||
|
||||
def run_research(
|
||||
topic: str,
|
||||
sources: str,
|
||||
config: dict,
|
||||
selected_models: dict,
|
||||
from_date: str,
|
||||
to_date: str,
|
||||
depth: str = "default",
|
||||
mock: bool = False,
|
||||
progress: ui.ProgressDisplay = None,
|
||||
) -> tuple:
|
||||
"""Run the research pipeline.
|
||||
|
||||
Returns:
|
||||
Tuple of (reddit_items, x_items, web_needed, raw_openai, raw_xai, raw_reddit_enriched, reddit_error, x_error)
|
||||
|
||||
Note: web_needed is True when WebSearch should be performed by Claude.
|
||||
The script outputs a marker and Claude handles WebSearch in its session.
|
||||
"""
|
||||
reddit_items = []
|
||||
x_items = []
|
||||
raw_openai = None
|
||||
raw_xai = None
|
||||
raw_reddit_enriched = []
|
||||
reddit_error = None
|
||||
x_error = None
|
||||
|
||||
# Check if WebSearch is needed (always needed in web-only mode)
|
||||
web_needed = sources in ("all", "web", "reddit-web", "x-web")
|
||||
|
||||
# Web-only mode: no API calls needed, Claude handles everything
|
||||
if sources == "web":
|
||||
if progress:
|
||||
progress.start_web_only()
|
||||
progress.end_web_only()
|
||||
return reddit_items, x_items, True, raw_openai, raw_xai, raw_reddit_enriched, reddit_error, x_error
|
||||
|
||||
# Determine which searches to run
|
||||
run_reddit = sources in ("both", "reddit", "all", "reddit-web")
|
||||
run_x = sources in ("both", "x", "all", "x-web")
|
||||
|
||||
# Run Reddit and X searches in parallel
|
||||
reddit_future = None
|
||||
x_future = None
|
||||
|
||||
with ThreadPoolExecutor(max_workers=2) as executor:
|
||||
# Submit both searches
|
||||
if run_reddit:
|
||||
if progress:
|
||||
progress.start_reddit()
|
||||
reddit_future = executor.submit(
|
||||
_search_reddit, topic, config, selected_models,
|
||||
from_date, to_date, depth, mock
|
||||
)
|
||||
|
||||
if run_x:
|
||||
if progress:
|
||||
progress.start_x()
|
||||
x_future = executor.submit(
|
||||
_search_x, topic, config, selected_models,
|
||||
from_date, to_date, depth, mock
|
||||
)
|
||||
|
||||
# Collect results
|
||||
if reddit_future:
|
||||
try:
|
||||
reddit_items, raw_openai, reddit_error = reddit_future.result()
|
||||
if reddit_error and progress:
|
||||
progress.show_error(f"Reddit error: {reddit_error}")
|
||||
except Exception as e:
|
||||
reddit_error = f"{type(e).__name__}: {e}"
|
||||
if progress:
|
||||
progress.show_error(f"Reddit error: {e}")
|
||||
if progress:
|
||||
progress.end_reddit(len(reddit_items))
|
||||
|
||||
if x_future:
|
||||
try:
|
||||
x_items, raw_xai, x_error = x_future.result()
|
||||
if x_error and progress:
|
||||
progress.show_error(f"X error: {x_error}")
|
||||
except Exception as e:
|
||||
x_error = f"{type(e).__name__}: {e}"
|
||||
if progress:
|
||||
progress.show_error(f"X error: {e}")
|
||||
if progress:
|
||||
progress.end_x(len(x_items))
|
||||
|
||||
# Enrich Reddit items with real data (sequential, but with error handling per-item)
|
||||
if reddit_items:
|
||||
if progress:
|
||||
progress.start_reddit_enrich(1, len(reddit_items))
|
||||
|
||||
for i, item in enumerate(reddit_items):
|
||||
if progress and i > 0:
|
||||
progress.update_reddit_enrich(i + 1, len(reddit_items))
|
||||
|
||||
try:
|
||||
if mock:
|
||||
mock_thread = load_fixture("reddit_thread_sample.json")
|
||||
reddit_items[i] = reddit_enrich.enrich_reddit_item(item, mock_thread)
|
||||
else:
|
||||
reddit_items[i] = reddit_enrich.enrich_reddit_item(item)
|
||||
except Exception as e:
|
||||
# Log but don't crash - keep the unenriched item
|
||||
if progress:
|
||||
progress.show_error(f"Enrich failed for {item.get('url', 'unknown')}: {e}")
|
||||
|
||||
raw_reddit_enriched.append(reddit_items[i])
|
||||
|
||||
if progress:
|
||||
progress.end_reddit_enrich()
|
||||
|
||||
return reddit_items, x_items, web_needed, raw_openai, raw_xai, raw_reddit_enriched, reddit_error, x_error
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Research a topic from the last 30 days on Reddit + X"
|
||||
)
|
||||
parser.add_argument("topic", nargs="?", help="Topic to research")
|
||||
parser.add_argument("--mock", action="store_true", help="Use fixtures")
|
||||
parser.add_argument(
|
||||
"--emit",
|
||||
choices=["compact", "json", "md", "context", "path"],
|
||||
default="compact",
|
||||
help="Output mode",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sources",
|
||||
choices=["auto", "reddit", "x", "both"],
|
||||
default="auto",
|
||||
help="Source selection",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--quick",
|
||||
action="store_true",
|
||||
help="Faster research with fewer sources (8-12 each)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--deep",
|
||||
action="store_true",
|
||||
help="Comprehensive research with more sources (50-70 Reddit, 40-60 X)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--debug",
|
||||
action="store_true",
|
||||
help="Enable verbose debug logging",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include-web",
|
||||
action="store_true",
|
||||
help="Include general web search alongside Reddit/X (lower weighted)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Enable debug logging if requested
|
||||
if args.debug:
|
||||
os.environ["LAST30DAYS_DEBUG"] = "1"
|
||||
# Re-import http to pick up debug flag
|
||||
from lib import http as http_module
|
||||
http_module.DEBUG = True
|
||||
|
||||
# Determine depth
|
||||
if args.quick and args.deep:
|
||||
print("Error: Cannot use both --quick and --deep", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
elif args.quick:
|
||||
depth = "quick"
|
||||
elif args.deep:
|
||||
depth = "deep"
|
||||
else:
|
||||
depth = "default"
|
||||
|
||||
if not args.topic:
|
||||
print("Error: Please provide a topic to research.", file=sys.stderr)
|
||||
print("Usage: python3 last30days.py <topic> [options]", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Load config
|
||||
config = env.get_config()
|
||||
|
||||
# Check available sources
|
||||
available = env.get_available_sources(config)
|
||||
|
||||
# Mock mode can work without keys
|
||||
if args.mock:
|
||||
if args.sources == "auto":
|
||||
sources = "both"
|
||||
else:
|
||||
sources = args.sources
|
||||
else:
|
||||
# Validate requested sources against available
|
||||
sources, error = env.validate_sources(args.sources, available, args.include_web)
|
||||
if error:
|
||||
# If it's a warning about WebSearch fallback, print but continue
|
||||
if "WebSearch fallback" in error:
|
||||
print(f"Note: {error}", file=sys.stderr)
|
||||
else:
|
||||
print(f"Error: {error}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Get date range
|
||||
from_date, to_date = dates.get_date_range(30)
|
||||
|
||||
# Check what keys are missing for promo messaging
|
||||
missing_keys = env.get_missing_keys(config)
|
||||
|
||||
# Initialize progress display
|
||||
progress = ui.ProgressDisplay(args.topic, show_banner=True)
|
||||
|
||||
# Show promo for missing keys BEFORE research
|
||||
if missing_keys != 'none':
|
||||
progress.show_promo(missing_keys)
|
||||
|
||||
# Select models
|
||||
if args.mock:
|
||||
# Use mock models
|
||||
mock_openai_models = load_fixture("models_openai_sample.json").get("data", [])
|
||||
mock_xai_models = load_fixture("models_xai_sample.json").get("data", [])
|
||||
selected_models = models.get_models(
|
||||
{
|
||||
"OPENAI_API_KEY": "mock",
|
||||
"XAI_API_KEY": "mock",
|
||||
**config,
|
||||
},
|
||||
mock_openai_models,
|
||||
mock_xai_models,
|
||||
)
|
||||
else:
|
||||
selected_models = models.get_models(config)
|
||||
|
||||
# Determine mode string
|
||||
if sources == "all":
|
||||
mode = "all" # reddit + x + web
|
||||
elif sources == "both":
|
||||
mode = "both" # reddit + x
|
||||
elif sources == "reddit":
|
||||
mode = "reddit-only"
|
||||
elif sources == "reddit-web":
|
||||
mode = "reddit-web"
|
||||
elif sources == "x":
|
||||
mode = "x-only"
|
||||
elif sources == "x-web":
|
||||
mode = "x-web"
|
||||
elif sources == "web":
|
||||
mode = "web-only"
|
||||
else:
|
||||
mode = sources
|
||||
|
||||
# Run research
|
||||
reddit_items, x_items, web_needed, raw_openai, raw_xai, raw_reddit_enriched, reddit_error, x_error = run_research(
|
||||
args.topic,
|
||||
sources,
|
||||
config,
|
||||
selected_models,
|
||||
from_date,
|
||||
to_date,
|
||||
depth,
|
||||
args.mock,
|
||||
progress,
|
||||
)
|
||||
|
||||
# Processing phase
|
||||
progress.start_processing()
|
||||
|
||||
# Normalize items
|
||||
normalized_reddit = normalize.normalize_reddit_items(reddit_items, from_date, to_date)
|
||||
normalized_x = normalize.normalize_x_items(x_items, from_date, to_date)
|
||||
|
||||
# Hard date filter: exclude items with verified dates outside the range
|
||||
# This is the safety net - even if prompts let old content through, this filters it
|
||||
filtered_reddit = normalize.filter_by_date_range(normalized_reddit, from_date, to_date)
|
||||
filtered_x = normalize.filter_by_date_range(normalized_x, from_date, to_date)
|
||||
|
||||
# Score items
|
||||
scored_reddit = score.score_reddit_items(filtered_reddit)
|
||||
scored_x = score.score_x_items(filtered_x)
|
||||
|
||||
# Sort items
|
||||
sorted_reddit = score.sort_items(scored_reddit)
|
||||
sorted_x = score.sort_items(scored_x)
|
||||
|
||||
# Dedupe items
|
||||
deduped_reddit = dedupe.dedupe_reddit(sorted_reddit)
|
||||
deduped_x = dedupe.dedupe_x(sorted_x)
|
||||
|
||||
progress.end_processing()
|
||||
|
||||
# Create report
|
||||
report = schema.create_report(
|
||||
args.topic,
|
||||
from_date,
|
||||
to_date,
|
||||
mode,
|
||||
selected_models.get("openai"),
|
||||
selected_models.get("xai"),
|
||||
)
|
||||
report.reddit = deduped_reddit
|
||||
report.x = deduped_x
|
||||
report.reddit_error = reddit_error
|
||||
report.x_error = x_error
|
||||
|
||||
# Generate context snippet
|
||||
report.context_snippet_md = render.render_context_snippet(report)
|
||||
|
||||
# Write outputs
|
||||
render.write_outputs(report, raw_openai, raw_xai, raw_reddit_enriched)
|
||||
|
||||
# Show completion
|
||||
if sources == "web":
|
||||
progress.show_web_only_complete()
|
||||
else:
|
||||
progress.show_complete(len(deduped_reddit), len(deduped_x))
|
||||
|
||||
# Output result
|
||||
output_result(report, args.emit, web_needed, args.topic, from_date, to_date, missing_keys)
|
||||
|
||||
|
||||
def output_result(
|
||||
report: schema.Report,
|
||||
emit_mode: str,
|
||||
web_needed: bool = False,
|
||||
topic: str = "",
|
||||
from_date: str = "",
|
||||
to_date: str = "",
|
||||
missing_keys: str = "none",
|
||||
):
|
||||
"""Output the result based on emit mode."""
|
||||
if emit_mode == "compact":
|
||||
print(render.render_compact(report, missing_keys=missing_keys))
|
||||
elif emit_mode == "json":
|
||||
print(json.dumps(report.to_dict(), indent=2))
|
||||
elif emit_mode == "md":
|
||||
print(render.render_full_report(report))
|
||||
elif emit_mode == "context":
|
||||
print(report.context_snippet_md)
|
||||
elif emit_mode == "path":
|
||||
print(render.get_context_path())
|
||||
|
||||
# Output WebSearch instructions if needed
|
||||
if web_needed:
|
||||
print("\n" + "="*60)
|
||||
print("### WEBSEARCH REQUIRED ###")
|
||||
print("="*60)
|
||||
print(f"Topic: {topic}")
|
||||
print(f"Date range: {from_date} to {to_date}")
|
||||
print("")
|
||||
print("Claude: Use your WebSearch tool to find 8-15 relevant web pages.")
|
||||
print("EXCLUDE: reddit.com, x.com, twitter.com (already covered above)")
|
||||
print("INCLUDE: blogs, docs, news, tutorials from the last 30 days")
|
||||
print("")
|
||||
print("After searching, synthesize WebSearch results WITH the Reddit/X")
|
||||
print("results above. WebSearch items should rank LOWER than comparable")
|
||||
print("Reddit/X items (they lack engagement metrics).")
|
||||
print("="*60)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user