diff --git a/crawl4ai/utils.py b/crawl4ai/utils.py index ebf15f24..64d4b210 100644 --- a/crawl4ai/utils.py +++ b/crawl4ai/utils.py @@ -1456,12 +1456,13 @@ def extract_metadata_using_lxml(html, doc=None): content = tag.get("content", "").strip() if property_name and content: metadata[property_name] = content - # getting the article Values - metadata.update({ - tag['property'].strip():tag["content"].strip() - for tag in head.find_all("meta", attrs={"property": re.compile(r"^article:")}) - if tag.has_attr('property') and tag.has_attr('content') - }) + # Article metadata - using starts-with() for performance + article_tags = head.xpath('.//meta[starts-with(@property, "article:")]') + for tag in article_tags: + property_name = tag.get("property", "").strip() + content = tag.get("content", "").strip() + if property_name and content: + metadata[property_name] = content return metadata