feat: integrate last30days and daily-news-report skills
This commit is contained in:
111
skills/last30days/tests/test_dedupe.py
Normal file
111
skills/last30days/tests/test_dedupe.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""Tests for dedupe module."""
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
# Add lib to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
|
||||
|
||||
from lib import dedupe, schema
|
||||
|
||||
|
||||
class TestNormalizeText(unittest.TestCase):
|
||||
def test_lowercase(self):
|
||||
result = dedupe.normalize_text("HELLO World")
|
||||
self.assertEqual(result, "hello world")
|
||||
|
||||
def test_removes_punctuation(self):
|
||||
result = dedupe.normalize_text("Hello, World!")
|
||||
# Punctuation replaced with space, then whitespace collapsed
|
||||
self.assertEqual(result, "hello world")
|
||||
|
||||
def test_collapses_whitespace(self):
|
||||
result = dedupe.normalize_text("hello world")
|
||||
self.assertEqual(result, "hello world")
|
||||
|
||||
|
||||
class TestGetNgrams(unittest.TestCase):
|
||||
def test_short_text(self):
|
||||
result = dedupe.get_ngrams("ab", n=3)
|
||||
self.assertEqual(result, {"ab"})
|
||||
|
||||
def test_normal_text(self):
|
||||
result = dedupe.get_ngrams("hello", n=3)
|
||||
self.assertIn("hel", result)
|
||||
self.assertIn("ell", result)
|
||||
self.assertIn("llo", result)
|
||||
|
||||
|
||||
class TestJaccardSimilarity(unittest.TestCase):
|
||||
def test_identical_sets(self):
|
||||
set1 = {"a", "b", "c"}
|
||||
result = dedupe.jaccard_similarity(set1, set1)
|
||||
self.assertEqual(result, 1.0)
|
||||
|
||||
def test_disjoint_sets(self):
|
||||
set1 = {"a", "b", "c"}
|
||||
set2 = {"d", "e", "f"}
|
||||
result = dedupe.jaccard_similarity(set1, set2)
|
||||
self.assertEqual(result, 0.0)
|
||||
|
||||
def test_partial_overlap(self):
|
||||
set1 = {"a", "b", "c"}
|
||||
set2 = {"b", "c", "d"}
|
||||
result = dedupe.jaccard_similarity(set1, set2)
|
||||
self.assertEqual(result, 0.5) # 2 overlap / 4 union
|
||||
|
||||
def test_empty_sets(self):
|
||||
result = dedupe.jaccard_similarity(set(), set())
|
||||
self.assertEqual(result, 0.0)
|
||||
|
||||
|
||||
class TestFindDuplicates(unittest.TestCase):
|
||||
def test_no_duplicates(self):
|
||||
items = [
|
||||
schema.RedditItem(id="R1", title="Completely different topic A", url="", subreddit=""),
|
||||
schema.RedditItem(id="R2", title="Another unrelated subject B", url="", subreddit=""),
|
||||
]
|
||||
result = dedupe.find_duplicates(items)
|
||||
self.assertEqual(result, [])
|
||||
|
||||
def test_finds_duplicates(self):
|
||||
items = [
|
||||
schema.RedditItem(id="R1", title="Best practices for Claude Code skills", url="", subreddit=""),
|
||||
schema.RedditItem(id="R2", title="Best practices for Claude Code skills guide", url="", subreddit=""),
|
||||
]
|
||||
result = dedupe.find_duplicates(items, threshold=0.7)
|
||||
self.assertEqual(len(result), 1)
|
||||
self.assertEqual(result[0], (0, 1))
|
||||
|
||||
|
||||
class TestDedupeItems(unittest.TestCase):
|
||||
def test_keeps_higher_scored(self):
|
||||
items = [
|
||||
schema.RedditItem(id="R1", title="Best practices for skills", url="", subreddit="", score=90),
|
||||
schema.RedditItem(id="R2", title="Best practices for skills guide", url="", subreddit="", score=50),
|
||||
]
|
||||
result = dedupe.dedupe_items(items, threshold=0.6)
|
||||
self.assertEqual(len(result), 1)
|
||||
self.assertEqual(result[0].id, "R1")
|
||||
|
||||
def test_keeps_all_unique(self):
|
||||
items = [
|
||||
schema.RedditItem(id="R1", title="Topic about apples", url="", subreddit="", score=90),
|
||||
schema.RedditItem(id="R2", title="Discussion of oranges", url="", subreddit="", score=50),
|
||||
]
|
||||
result = dedupe.dedupe_items(items)
|
||||
self.assertEqual(len(result), 2)
|
||||
|
||||
def test_empty_list(self):
|
||||
result = dedupe.dedupe_items([])
|
||||
self.assertEqual(result, [])
|
||||
|
||||
def test_single_item(self):
|
||||
items = [schema.RedditItem(id="R1", title="Test", url="", subreddit="")]
|
||||
result = dedupe.dedupe_items(items)
|
||||
self.assertEqual(len(result), 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user