refactor(crawler): improve HTML handling and cleanup codebase
- Add HTML attribute preservation in GoogleSearchCrawler - Fix lxml import references in utils.py - Remove unused ssl_certificate.json - Clean up imports and code organization in hub.py - Update test case formatting and remove unused image search test BREAKING CHANGE: Removed ssl_certificate.json file which might affect existing certificate validations
This commit is contained in:
@@ -17,12 +17,16 @@ async def google_example():
|
||||
crawler = crawler_cls()
|
||||
|
||||
# Text search
|
||||
text_results = await crawler.run(query="apple inc", search_type="text", schema_cache_path="/Users/unclecode/.crawl4ai")
|
||||
print(json.loads(text_results))
|
||||
text_results = await crawler.run(
|
||||
query="apple inc",
|
||||
search_type="text",
|
||||
schema_cache_path="/Users/unclecode/.crawl4ai"
|
||||
)
|
||||
print(json.dumps(json.loads(text_results), indent=4))
|
||||
|
||||
# Image search
|
||||
image_results = await crawler.run(query="apple inc", search_type="image")
|
||||
print(image_results)
|
||||
# image_results = await crawler.run(query="apple inc", search_type="image")
|
||||
# print(image_results)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
||||
Reference in New Issue
Block a user