Compare commits
1 Commits
fix/linkPr
...
fix/serial
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a0c5f0f79a |
@@ -598,7 +598,7 @@ class BrowserConfig:
|
|||||||
"chrome_channel": self.chrome_channel,
|
"chrome_channel": self.chrome_channel,
|
||||||
"channel": self.channel,
|
"channel": self.channel,
|
||||||
"proxy": self.proxy,
|
"proxy": self.proxy,
|
||||||
"proxy_config": self.proxy_config,
|
"proxy_config": self.proxy_config.to_dict() if self.proxy_config else None,
|
||||||
"viewport_width": self.viewport_width,
|
"viewport_width": self.viewport_width,
|
||||||
"viewport_height": self.viewport_height,
|
"viewport_height": self.viewport_height,
|
||||||
"accept_downloads": self.accept_downloads,
|
"accept_downloads": self.accept_downloads,
|
||||||
|
|||||||
@@ -336,40 +336,8 @@ class LinkPreview:
|
|||||||
|
|
||||||
updated_internal.append(updated_link)
|
updated_internal.append(updated_link)
|
||||||
else:
|
else:
|
||||||
# # Keep original link unchanged
|
# Keep original link unchanged
|
||||||
# updated_internal.append(link)
|
updated_internal.append(link)
|
||||||
|
|
||||||
# Head extraction failed - calculate fallback scores
|
|
||||||
# Use URL-based scoring if query provided
|
|
||||||
contextual_score = None
|
|
||||||
if config.link_preview_config and config.link_preview_config.query:
|
|
||||||
# Calculate URL-based relevance score as fallback
|
|
||||||
contextual_score = self.seeder._calculate_url_relevance_score(
|
|
||||||
config.link_preview_config.query,
|
|
||||||
link.href
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create updated link with fallback scoring
|
|
||||||
updated_link = Link(
|
|
||||||
href=link.href,
|
|
||||||
text=link.text,
|
|
||||||
title=link.title,
|
|
||||||
base_domain=link.base_domain,
|
|
||||||
head_data=None, # No head data available
|
|
||||||
head_extraction_status="failed",
|
|
||||||
intrinsic_score=getattr(link, 'intrinsic_score', None),
|
|
||||||
contextual_score=contextual_score
|
|
||||||
)
|
|
||||||
|
|
||||||
# Calculate total score even without head data
|
|
||||||
updated_link.total_score = calculate_total_score(
|
|
||||||
intrinsic_score=updated_link.intrinsic_score,
|
|
||||||
contextual_score=updated_link.contextual_score,
|
|
||||||
score_links_enabled=getattr(config, 'score_links', False),
|
|
||||||
query_provided=bool(config.link_preview_config and config.link_preview_config.query)
|
|
||||||
)
|
|
||||||
|
|
||||||
updated_internal.append(updated_link)
|
|
||||||
|
|
||||||
# Update external links
|
# Update external links
|
||||||
updated_external = []
|
updated_external = []
|
||||||
@@ -406,40 +374,8 @@ class LinkPreview:
|
|||||||
|
|
||||||
updated_external.append(updated_link)
|
updated_external.append(updated_link)
|
||||||
else:
|
else:
|
||||||
# # Keep original link unchanged
|
# Keep original link unchanged
|
||||||
# updated_external.append(link)
|
updated_external.append(link)
|
||||||
|
|
||||||
# Head extraction failed - calculate fallback scores
|
|
||||||
# Use URL-based scoring if query provided
|
|
||||||
contextual_score = None
|
|
||||||
if config.link_preview_config and config.link_preview_config.query:
|
|
||||||
# Calculate URL-based relevance score as fallback
|
|
||||||
contextual_score = self.seeder._calculate_url_relevance_score(
|
|
||||||
config.link_preview_config.query,
|
|
||||||
link.href
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create updated link with fallback scoring
|
|
||||||
updated_link = Link(
|
|
||||||
href=link.href,
|
|
||||||
text=link.text,
|
|
||||||
title=link.title,
|
|
||||||
base_domain=link.base_domain,
|
|
||||||
head_data=None, # No head data available
|
|
||||||
head_extraction_status="failed",
|
|
||||||
intrinsic_score=getattr(link, 'intrinsic_score', None),
|
|
||||||
contextual_score=contextual_score
|
|
||||||
)
|
|
||||||
|
|
||||||
# Calculate total score even without head data
|
|
||||||
updated_link.total_score = calculate_total_score(
|
|
||||||
intrinsic_score=updated_link.intrinsic_score,
|
|
||||||
contextual_score=updated_link.contextual_score,
|
|
||||||
score_links_enabled=getattr(config, 'score_links', False),
|
|
||||||
query_provided=bool(config.link_preview_config and config.link_preview_config.query)
|
|
||||||
)
|
|
||||||
|
|
||||||
updated_external.append(updated_link)
|
|
||||||
|
|
||||||
# Sort links by relevance score if available
|
# Sort links by relevance score if available
|
||||||
if any(hasattr(link, 'head_data') and link.head_data and 'relevance_score' in link.head_data
|
if any(hasattr(link, 'head_data') and link.head_data and 'relevance_score' in link.head_data
|
||||||
|
|||||||
Reference in New Issue
Block a user