diff --git a/tests/async/test_content_filter.py b/tests/async/test_content_filter.py
new file mode 100644
index 00000000..a873c414
--- /dev/null
+++ b/tests/async/test_content_filter.py
@@ -0,0 +1,175 @@
+import os, sys
+import pytest
+from bs4 import BeautifulSoup
+from typing import List
+
+# Add the parent directory to the Python path
+parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.append(parent_dir)
+
+from crawl4ai.content_filter_strategy import BM25ContentFilter
+
+@pytest.fixture
+def basic_html():
+ return """
+
+
+ Short but important section header description.
+
+
Long paragraph with sufficient words to meet the minimum threshold. This paragraph continues with more text to ensure we have enough content for proper testing. We need to make sure this has enough words to pass our filters and be considered valid content for extraction purposes.
+
+
+
+ """
+
+@pytest.fixture
+def no_meta_html():
+ return """
+
+
+ First paragraph that should be used as fallback for query when no meta tags exist. This text needs to be long enough to serve as a meaningful fallback for our content extraction process.
+
+
+ """
+
+class TestBM25ContentFilter:
+ def test_basic_extraction(self, basic_html):
+ """Test basic content extraction functionality"""
+ filter = BM25ContentFilter()
+ contents = filter.filter_content(basic_html)
+
+ assert contents, "Should extract content"
+ assert len(contents) >= 1, "Should extract at least one content block"
+ assert "long paragraph" in ' '.join(contents).lower()
+ assert "navigation" not in ' '.join(contents).lower()
+
+ def test_user_query_override(self, basic_html):
+ """Test that user query overrides metadata extraction"""
+ user_query = "specific test query"
+ filter = BM25ContentFilter(user_query=user_query)
+
+ # Access internal state to verify query usage
+ soup = BeautifulSoup(basic_html, 'lxml')
+ extracted_query = filter.extract_page_query(soup.find('head'))
+
+ assert extracted_query == user_query
+ assert "Test description" not in extracted_query
+
+ def test_header_extraction(self, wiki_html):
+ """Test that headers are properly extracted despite length"""
+ filter = BM25ContentFilter()
+ contents = filter.filter_content(wiki_html)
+
+ combined_content = ' '.join(contents).lower()
+ assert "section 1" in combined_content, "Should include section header"
+ assert "article title" in combined_content, "Should include main title"
+
+ def test_no_metadata_fallback(self, no_meta_html):
+ """Test fallback behavior when no metadata is present"""
+ filter = BM25ContentFilter()
+ contents = filter.filter_content(no_meta_html)
+
+ assert contents, "Should extract content even without metadata"
+ assert "First paragraph" in ' '.join(contents), "Should use first paragraph content"
+
+ def test_empty_input(self):
+ """Test handling of empty input"""
+ filter = BM25ContentFilter()
+ assert filter.filter_content("") == []
+ assert filter.filter_content(None) == []
+
+ def test_malformed_html(self):
+ """Test handling of malformed HTML"""
+ malformed_html = "