#1105 :fix(metadata): optimize article metadata extraction using XPath for improved performance

This commit is contained in:
Ahmed-Tawfik94
2025-05-19 13:48:02 +08:00
parent faa98eefbc
commit 137ac014fb

View File

@@ -1456,12 +1456,13 @@ def extract_metadata_using_lxml(html, doc=None):
content = tag.get("content", "").strip() content = tag.get("content", "").strip()
if property_name and content: if property_name and content:
metadata[property_name] = content metadata[property_name] = content
# getting the article Values # Article metadata - using starts-with() for performance
metadata.update({ article_tags = head.xpath('.//meta[starts-with(@property, "article:")]')
tag['property'].strip():tag["content"].strip() for tag in article_tags:
for tag in head.find_all("meta", attrs={"property": re.compile(r"^article:")}) property_name = tag.get("property", "").strip()
if tag.has_attr('property') and tag.has_attr('content') content = tag.get("content", "").strip()
}) if property_name and content:
metadata[property_name] = content
return metadata return metadata