From faa98eefbc4f3f87f8751bbb5c534cba4f8507c1 Mon Sep 17 00:00:00 2001 From: Ahmed-Tawfik94 Date: Mon, 19 May 2025 11:35:13 +0800 Subject: [PATCH] #1105 got fixed (metadata now matches with meta property article:* --- crawl4ai/utils.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/crawl4ai/utils.py b/crawl4ai/utils.py index bfa8ce9d..ebf15f24 100644 --- a/crawl4ai/utils.py +++ b/crawl4ai/utils.py @@ -1456,6 +1456,12 @@ def extract_metadata_using_lxml(html, doc=None): content = tag.get("content", "").strip() if property_name and content: metadata[property_name] = content + # getting the article Values + metadata.update({ + tag['property'].strip():tag["content"].strip() + for tag in head.find_all("meta", attrs={"property": re.compile(r"^article:")}) + if tag.has_attr('property') and tag.has_attr('content') + }) return metadata @@ -1531,7 +1537,12 @@ def extract_metadata(html, soup=None): content = tag.get("content", "").strip() if property_name and content: metadata[property_name] = content - + # getting the article Values + metadata.update({ + tag['property'].strip():tag["content"].strip() + for tag in head.find_all("meta", attrs={"property": re.compile(r"^article:")}) + if tag.has_attr('property') and tag.has_attr('content') + }) return metadata