diff --git a/.gitignore b/.gitignore index 6cc1e13..21ee503 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ .venv # This should be generated on deploy -static/feed.rss \ No newline at end of file +static/feed.rss +static/sitemap.xml \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index 5cb136c..f64ce3d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -32,6 +32,12 @@ python scripts/generate_rss.py ``` Run this after adding/updating comics to regenerate `static/feed.rss`. +**Generate sitemap:** +```bash +python scripts/generate_sitemap.py +``` +Run this after adding/updating comics to regenerate `static/sitemap.xml` for search engines. + ## Architecture ### Data Layer: comics_data.py @@ -134,6 +140,7 @@ Global context variables injected into all templates: - `static/images/icons/`: Navigation icons (first.png, previous.png, next.png, latest.png) used when `USE_ICON_NAV` is True - `static/images/`: Header images and other site graphics - `static/feed.rss`: Generated RSS feed (run `scripts/generate_rss.py`) +- `static/sitemap.xml`: Generated sitemap (run `scripts/generate_sitemap.py`) ## Important Implementation Details diff --git a/app.py b/app.py index 99d585b..bdac6e8 100644 --- a/app.py +++ b/app.py @@ -232,6 +232,13 @@ def api_comic(comic_id): return jsonify(comic) +@app.route('/sitemap.xml') +def sitemap(): + """Serve the static sitemap.xml file""" + from flask import send_from_directory + return send_from_directory('static', 'sitemap.xml', mimetype='application/xml') + + @app.errorhandler(404) def page_not_found(e): """404 error handler""" diff --git a/scripts/generate_sitemap.py b/scripts/generate_sitemap.py new file mode 100644 index 0000000..341c6dd --- /dev/null +++ b/scripts/generate_sitemap.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +# Sunday Comics - Sitemap generator +# Copyright (c) 2025 Tomasita Cabrera +# Licensed under the MIT License - see LICENSE file for details + +""" +Script to generate a sitemap.xml file for the comic +""" +import sys +import os +from xml.etree.ElementTree import Element, SubElement, tostring +from xml.dom import minidom + +# Add parent directory to path so we can import comics_data +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from comics_data import COMICS, SITE_URL + + +def generate_sitemap(): + """Generate sitemap.xml from COMICS data""" + # Create sitemap root + urlset = Element('urlset', xmlns='http://www.sitemaps.org/schemas/sitemap/0.9') + + # Add homepage + if COMICS: + latest_date = COMICS[-1]['date'] + url = SubElement(urlset, 'url') + SubElement(url, 'loc').text = f'{SITE_URL}/' + SubElement(url, 'lastmod').text = latest_date + SubElement(url, 'changefreq').text = 'weekly' + SubElement(url, 'priority').text = '1.0' + + # Add archive page + if COMICS: + latest_date = COMICS[-1]['date'] + url = SubElement(urlset, 'url') + SubElement(url, 'loc').text = f'{SITE_URL}/archive' + SubElement(url, 'lastmod').text = latest_date + SubElement(url, 'changefreq').text = 'weekly' + SubElement(url, 'priority').text = '0.9' + + # Add about page + url = SubElement(urlset, 'url') + SubElement(url, 'loc').text = f'{SITE_URL}/about' + SubElement(url, 'changefreq').text = 'monthly' + SubElement(url, 'priority').text = '0.7' + + # Add all individual comic pages + for comic in COMICS: + url = SubElement(urlset, 'url') + SubElement(url, 'loc').text = f"{SITE_URL}/comic/{comic['number']}" + SubElement(url, 'lastmod').text = comic['date'] + SubElement(url, 'changefreq').text = 'never' + SubElement(url, 'priority').text = '0.8' + + # Convert to pretty XML + xml_str = minidom.parseString(tostring(urlset)).toprettyxml(indent=' ') + + # Remove extra blank lines + xml_str = '\n'.join([line for line in xml_str.split('\n') if line.strip()]) + + return xml_str + + +def main(): + """Generate and save sitemap""" + # Get path to static folder + script_dir = os.path.dirname(os.path.abspath(__file__)) + parent_dir = os.path.dirname(script_dir) + static_dir = os.path.join(parent_dir, 'static') + + # Create static directory if it doesn't exist + os.makedirs(static_dir, exist_ok=True) + + # Generate sitemap + sitemap_content = generate_sitemap() + + # Save to file + sitemap_file = os.path.join(static_dir, 'sitemap.xml') + with open(sitemap_file, 'w', encoding='utf-8') as f: + f.write(sitemap_content) + + print(f"Sitemap generated: {sitemap_file}") + print(f"Total URLs: {len(COMICS) + 3}") # comics + homepage + archive + about + + +if __name__ == '__main__': + main()