From 6d3444ba17437062a75ccb99b316021c34e08209 Mon Sep 17 00:00:00 2001
From: lizhuxiong <panda_xlz@163.com>
Date: Tue, 5 Aug 2025 16:18:34 +0800
Subject: [PATCH 1/2] In obtaining cleaned_html, the tag "script" needs to be
 processed separately.

---
 crawl4ai/content_scraping_strategy.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/crawl4ai/content_scraping_strategy.py b/crawl4ai/content_scraping_strategy.py
index 3751d52f..480d9ffe 100644
--- a/crawl4ai/content_scraping_strategy.py
+++ b/crawl4ai/content_scraping_strategy.py
@@ -1668,7 +1668,26 @@ class LXMLWebScrapingStrategy(WebScrapingStrategy):
                 content_element = body
 
             # Remove script and style tags
-            for tag in ["script", "style", "link", "meta", "noscript"]:
+            # Handle script separately
+            for element in body.xpath(f".//script"):
+                parent = element.getparent()
+                if parent is not None:
+                    tail = element.tail  # Get the tail text
+                    if tail:
+                        prev = element.getprevious()  # Get the previous sibling node
+                        if prev is not None:
+                            if prev.tail:
+                                prev.tail += tail 
+                            else:
+                                prev.tail = tail
+                        else:
+                            if parent.text:
+                                parent.text += tail
+                            else:
+                                parent.text = tail
+                    parent.remove(element)  # Delete the element
+
+            for tag in ["style", "link", "meta", "noscript"]:
                 for element in body.xpath(f".//{tag}"):
                     if element.getparent() is not None:
                         element.getparent().remove(element)

From 660d7011b9005cae20df1f4f4ee42bfc8a2908b1 Mon Sep 17 00:00:00 2001
From: lizhuxiong <panda_xlz@163.com>
Date: Tue, 5 Aug 2025 16:27:03 +0800
Subject: [PATCH 2/2] In obtaining cleaned_html, the tag "script" needs to be
 processed separately.

---
 crawl4ai/content_scraping_strategy.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/crawl4ai/content_scraping_strategy.py b/crawl4ai/content_scraping_strategy.py
index 480d9ffe..04d06b4f 100644
--- a/crawl4ai/content_scraping_strategy.py
+++ b/crawl4ai/content_scraping_strategy.py
@@ -1668,6 +1668,11 @@ class LXMLWebScrapingStrategy(WebScrapingStrategy):
                 content_element = body
 
             # Remove script and style tags
+            for tag in ["style", "link", "meta", "noscript"]:
+                for element in body.xpath(f".//{tag}"):
+                    if element.getparent() is not None:
+                        element.getparent().remove(element)
+                        
             # Handle script separately
             for element in body.xpath(f".//script"):
                 parent = element.getparent()
@@ -1687,10 +1692,6 @@ class LXMLWebScrapingStrategy(WebScrapingStrategy):
                                 parent.text = tail
                     parent.remove(element)  # Delete the element
 
-            for tag in ["style", "link", "meta", "noscript"]:
-                for element in body.xpath(f".//{tag}"):
-                    if element.getparent() is not None:
-                        element.getparent().remove(element)
 
             # Handle social media and domain exclusions
             kwargs["exclude_domains"] = set(kwargs.get("exclude_domains", []))