From 36429a63ded80920e37d4925be33bd0d5582fda0 Mon Sep 17 00:00:00 2001
From: ntohidi <nasrin@kidocode.com>
Date: Tue, 8 Jul 2025 12:54:33 +0200
Subject: [PATCH] fix: Improve comments for article metadata extraction in
 extract_metadata functions. ref #1105

---
 crawl4ai/utils.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/crawl4ai/utils.py b/crawl4ai/utils.py
index e029a004..8735dee0 100644
--- a/crawl4ai/utils.py
+++ b/crawl4ai/utils.py
@@ -1547,7 +1547,8 @@ def extract_metadata_using_lxml(html, doc=None):
         content = tag.get("content", "").strip()
         if property_name and content:
             metadata[property_name] = content
-   # Article metadata - using starts-with() for performance
+   
+   # Article metadata
     article_tags = head.xpath('.//meta[starts-with(@property, "article:")]')
     for tag in article_tags:
         property_name = tag.get("property", "").strip()
@@ -1629,12 +1630,15 @@ def extract_metadata(html, soup=None):
         content = tag.get("content", "").strip()
         if property_name and content:
             metadata[property_name] = content
-        # getting the article Values
-    metadata.update({
-        tag['property'].strip():tag["content"].strip()
-        for tag in head.find_all("meta", attrs={"property": re.compile(r"^article:")})
-          if tag.has_attr('property') and tag.has_attr('content')
-    })
+    
+    # Article metadata
+    article_tags = head.find_all("meta", attrs={"property": re.compile(r"^article:")})
+    for tag in article_tags:
+        property_name = tag.get("property", "").strip()
+        content = tag.get("content", "").strip()
+        if property_name and content:
+            metadata[property_name] = content
+    
     return metadata