# ==== File: build_dummy_site.py ==== import os import random import argparse from pathlib import Path from urllib.parse import quote # --- Configuration --- NUM_CATEGORIES = 3 NUM_SUBCATEGORIES_PER_CAT = 2 # Results in NUM_CATEGORIES * NUM_SUBCATEGORIES_PER_CAT total L2 categories NUM_PRODUCTS_PER_SUBCAT = 5 # Products listed on L3 pages MAX_DEPTH_TARGET = 5 # Explicitly set target depth # --- Helper Functions --- def generate_lorem(words=20): """Generates simple placeholder text.""" lorem_words = ["lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit", "sed", "do", "eiusmod", "tempor", "incididunt", "ut", "labore", "et", "dolore", "magna", "aliqua"] return " ".join(random.choice(lorem_words) for _ in range(words)).capitalize() + "." def create_html_page(filepath: Path, title: str, body_content: str, breadcrumbs: list = [], head_extras: str = ""): """Creates an HTML file with basic structure and inline CSS.""" os.makedirs(filepath.parent, exist_ok=True) # Generate breadcrumb HTML using the 'link' provided in the breadcrumbs list breadcrumb_html = "" if breadcrumbs: links_html = " » ".join(f'{bc["name"]}' for bc in breadcrumbs) breadcrumb_html = f"" # Basic CSS for structure identification (kept the same) css = """ """ html_content = f""" {title} - FakeShop {head_extras} {css}
{breadcrumb_html}

{title}

{body_content}
""" with open(filepath, "w", encoding="utf-8") as f: f.write(html_content) # Keep print statement concise for clarity # print(f"Created: {filepath}") def generate_site(base_dir: Path, site_name: str = "FakeShop", base_path: str = ""): """Generates the dummy website structure.""" base_dir.mkdir(parents=True, exist_ok=True) # --- Clean and prepare the base path for URL construction --- # Ensure it starts with '/' if not empty, and remove any trailing '/' if base_path: full_base_path = "/" + base_path.strip('/') else: full_base_path = "" # Represents the root print(f"Using base path for links: '{full_base_path}'") # --- Level 0: Homepage --- home_body = "

Welcome to FakeShop!

Your one-stop shop for imaginary items.

Categories:

\n" create_html_page(base_dir / "index.html", "Homepage", home_body, []) # No breadcrumbs *on* the homepage itself # --- Levels 1-5 --- for i in range(NUM_CATEGORIES): cat_name = f"Category-{i+1}" cat_folder_name = quote(cat_name.lower().replace(" ", "-")) cat_dir = base_dir / cat_folder_name # This is the *absolute* path for the breadcrumb link cat_link_path = f"{full_base_path}/{cat_folder_name}/index.html" # Update breadcrumbs list for this level breadcrumbs_cat = breadcrumbs_home + [{"name": cat_name, "link": cat_link_path}] # --- Level 1: Category Page --- cat_body = f"

{generate_lorem(15)} for {cat_name}.

Sub-Categories:

\n" # Pass the updated breadcrumbs list create_html_page(cat_dir / "index.html", cat_name, cat_body, breadcrumbs_home) # Parent breadcrumb needed here for j in range(NUM_SUBCATEGORIES_PER_CAT): subcat_name = f"{cat_name}-Sub-{j+1}" subcat_folder_name = quote(subcat_name.lower().replace(" ", "-")) subcat_dir = cat_dir / subcat_folder_name # Absolute path for the breadcrumb link subcat_link_path = f"{full_base_path}/{cat_folder_name}/{subcat_folder_name}/index.html" # Update breadcrumbs list for this level breadcrumbs_subcat = breadcrumbs_cat + [{"name": subcat_name, "link": subcat_link_path}] # --- Level 2: Sub-Category Page (Product List) --- subcat_body = f"

Explore products in {subcat_name}. {generate_lorem(12)}

Products:

\n" # Close product-list ul # Pass the correct breadcrumbs list for the subcategory index page create_html_page(subcat_dir / "index.html", subcat_name, subcat_body, breadcrumbs_cat) # Parent breadcrumb needed here # --- Main Execution --- if __name__ == "__main__": parser = argparse.ArgumentParser(description="Generate a dummy multi-level retail website.") parser.add_argument( "-o", "--output-dir", type=str, default="dummy_retail_site", help="Directory to generate the website in." ) parser.add_argument( "-n", "--site-name", type=str, default="FakeShop", help="Name of the fake shop." ) parser.add_argument( "-b", "--base-path", type=str, default="", help="Base path for hosting the site (e.g., 'samples/deepcrawl'). Leave empty if hosted at the root." ) # Optional: Add more args to configure counts if needed args = parser.parse_args() output_directory = Path(args.output_dir) site_name = args.site_name base_path = args.base_path print(f"Generating dummy site '{site_name}' in '{output_directory}'...") # Pass the base_path to the generation function generate_site(output_directory, site_name, base_path) print(f"\nCreated {sum(1 for _ in output_directory.rglob('*.html'))} HTML pages.") print("Dummy site generation complete.") print(f"To serve locally (example): python -m http.server --directory {output_directory} 8000") if base_path: print(f"Access the site at: http://localhost:8000/{base_path.strip('/')}/index.html") else: print(f"Access the site at: http://localhost:8000/index.html")