# ==== File: build_dummy_site.py ====
import os
import random
import argparse
from pathlib import Path
from urllib.parse import quote
# --- Configuration ---
NUM_CATEGORIES = 3
NUM_SUBCATEGORIES_PER_CAT = 2 # Results in NUM_CATEGORIES * NUM_SUBCATEGORIES_PER_CAT total L2 categories
NUM_PRODUCTS_PER_SUBCAT = 5 # Products listed on L3 pages
MAX_DEPTH_TARGET = 5 # Explicitly set target depth
# --- Helper Functions ---
def generate_lorem(words=20):
"""Generates simple placeholder text."""
lorem_words = ["lorem", "ipsum", "dolor", "sit", "amet", "consectetur",
"adipiscing", "elit", "sed", "do", "eiusmod", "tempor",
"incididunt", "ut", "labore", "et", "dolore", "magna", "aliqua"]
return " ".join(random.choice(lorem_words) for _ in range(words)).capitalize() + "."
def create_html_page(filepath: Path, title: str, body_content: str, breadcrumbs: list = [], head_extras: str = ""):
"""Creates an HTML file with basic structure and inline CSS."""
os.makedirs(filepath.parent, exist_ok=True)
# Generate breadcrumb HTML using the 'link' provided in the breadcrumbs list
breadcrumb_html = ""
if breadcrumbs:
links_html = " » ".join(f'{bc["name"]}' for bc in breadcrumbs)
breadcrumb_html = f""
# Basic CSS for structure identification (kept the same)
css = """
"""
html_content = f"""
{title} - FakeShop
{head_extras}
{css}
{breadcrumb_html}
{title}
{body_content}
"""
with open(filepath, "w", encoding="utf-8") as f:
f.write(html_content)
# Keep print statement concise for clarity
# print(f"Created: {filepath}")
def generate_site(base_dir: Path, site_name: str = "FakeShop", base_path: str = ""):
"""Generates the dummy website structure."""
base_dir.mkdir(parents=True, exist_ok=True)
# --- Clean and prepare the base path for URL construction ---
# Ensure it starts with '/' if not empty, and remove any trailing '/'
if base_path:
full_base_path = "/" + base_path.strip('/')
else:
full_base_path = "" # Represents the root
print(f"Using base path for links: '{full_base_path}'")
# --- Level 0: Homepage ---
home_body = "
Welcome to FakeShop!
Your one-stop shop for imaginary items.
Categories:
\n
"
# Define the *actual* link path for the homepage breadcrumb
home_link_path = f"{full_base_path}/index.html"
breadcrumbs_home = [{"name": "Home", "link": home_link_path}] # Base breadcrumb
# Links *within* the page content should remain relative
for i in range(NUM_CATEGORIES):
cat_name = f"Category-{i+1}"
cat_folder_name = quote(cat_name.lower().replace(" ", "-"))
# This path is relative to the current directory (index.html)
cat_relative_page_path = f"{cat_folder_name}/index.html"
home_body += f'
"
# Pass the updated breadcrumbs list
create_html_page(cat_dir / "index.html", cat_name, cat_body, breadcrumbs_home) # Parent breadcrumb needed here
for j in range(NUM_SUBCATEGORIES_PER_CAT):
subcat_name = f"{cat_name}-Sub-{j+1}"
subcat_folder_name = quote(subcat_name.lower().replace(" ", "-"))
subcat_dir = cat_dir / subcat_folder_name
# Absolute path for the breadcrumb link
subcat_link_path = f"{full_base_path}/{cat_folder_name}/{subcat_folder_name}/index.html"
# Update breadcrumbs list for this level
breadcrumbs_subcat = breadcrumbs_cat + [{"name": subcat_name, "link": subcat_link_path}]
# --- Level 2: Sub-Category Page (Product List) ---
subcat_body = f"
Explore products in {subcat_name}. {generate_lorem(12)}
Products:
\n
"
for k in range(NUM_PRODUCTS_PER_SUBCAT):
prod_id = f"P{i+1}{j+1}{k+1:03d}" # e.g., P11001
prod_name = f"{subcat_name} Product {k+1} ({prod_id})"
# Filename relative to the subcategory page
prod_filename = f"product_{prod_id}.html"
# Absolute path for the breadcrumb link
prod_link_path = f"{full_base_path}/{cat_folder_name}/{subcat_folder_name}/{prod_filename}"
# Preview on list page (link remains relative)
subcat_body += f"""
"
# Update breadcrumbs list for this level
breadcrumbs_reviews = breadcrumbs_prod + [{"name": "Reviews", "link": reviews_link_path}]
# Pass the updated breadcrumbs list
create_html_page(subcat_dir / reviews_filename, f"{prod_name} - Reviews", reviews_body, breadcrumbs_prod) # Parent breadcrumb needed here
subcat_body += "
" # Close product-list ul
# Pass the correct breadcrumbs list for the subcategory index page
create_html_page(subcat_dir / "index.html", subcat_name, subcat_body, breadcrumbs_cat) # Parent breadcrumb needed here
# --- Main Execution ---
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate a dummy multi-level retail website.")
parser.add_argument(
"-o", "--output-dir",
type=str,
default="dummy_retail_site",
help="Directory to generate the website in."
)
parser.add_argument(
"-n", "--site-name",
type=str,
default="FakeShop",
help="Name of the fake shop."
)
parser.add_argument(
"-b", "--base-path",
type=str,
default="",
help="Base path for hosting the site (e.g., 'samples/deepcrawl'). Leave empty if hosted at the root."
)
# Optional: Add more args to configure counts if needed
args = parser.parse_args()
output_directory = Path(args.output_dir)
site_name = args.site_name
base_path = args.base_path
print(f"Generating dummy site '{site_name}' in '{output_directory}'...")
# Pass the base_path to the generation function
generate_site(output_directory, site_name, base_path)
print(f"\nCreated {sum(1 for _ in output_directory.rglob('*.html'))} HTML pages.")
print("Dummy site generation complete.")
print(f"To serve locally (example): python -m http.server --directory {output_directory} 8000")
if base_path:
print(f"Access the site at: http://localhost:8000/{base_path.strip('/')}/index.html")
else:
print(f"Access the site at: http://localhost:8000/index.html")