sunday/app.py

# Sunday Comics - A simple webcomic platform
# Copyright (c) 2025 Tomasita Cabrera
# Licensed under the MIT License - see LICENSE file for details

import os
from datetime import datetime
from flask import Flask, render_template, abort, jsonify, request
from comics_data import (
    COMICS, COMIC_NAME, COPYRIGHT_NAME, SITE_URL, FULL_WIDTH_DEFAULT, PLAIN_DEFAULT, LOGO_IMAGE, LOGO_MODE,
    HEADER_IMAGE, FOOTER_IMAGE, BANNER_IMAGE, COMPACT_FOOTER, ARCHIVE_FULL_WIDTH, SECTIONS_ENABLED,
    USE_COMIC_NAV_ICONS, USE_HEADER_NAV_ICONS, USE_FOOTER_SOCIAL_ICONS, NEWSLETTER_ENABLED,
    SOCIAL_INSTAGRAM, SOCIAL_YOUTUBE, SOCIAL_EMAIL, API_SPEC_LINK, EMBED_ENABLED, PERMALINK_ENABLED
)
import markdown

app = Flask(__name__)

# Configuration
app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', 'your-secret-key')


@app.after_request
def add_ai_blocking_headers(response):
    """Add headers to discourage AI scraping"""
    response.headers['X-Robots-Tag'] = 'noai, noimageai'
    return response


@app.context_processor
def inject_global_settings():
    """Make global settings available to all templates"""
    return {
        'comic_name': COMIC_NAME,
        'copyright_name': COPYRIGHT_NAME if COPYRIGHT_NAME else COMIC_NAME,
        'current_year': datetime.now().year,
        'site_url': SITE_URL,
        'logo_image': LOGO_IMAGE,
        'logo_mode': LOGO_MODE,
        'header_image': HEADER_IMAGE,
        'footer_image': FOOTER_IMAGE,
        'banner_image': BANNER_IMAGE,
        'compact_footer': COMPACT_FOOTER,
        'archive_full_width': ARCHIVE_FULL_WIDTH,
        'sections_enabled': SECTIONS_ENABLED,
        'use_comic_nav_icons': USE_COMIC_NAV_ICONS,
        'use_header_nav_icons': USE_HEADER_NAV_ICONS,
        'use_footer_social_icons': USE_FOOTER_SOCIAL_ICONS,
        'newsletter_enabled': NEWSLETTER_ENABLED,
        'social_instagram': SOCIAL_INSTAGRAM,
        'social_youtube': SOCIAL_YOUTUBE,
        'social_email': SOCIAL_EMAIL,
        'api_spec_link': API_SPEC_LINK,
        'embed_enabled': EMBED_ENABLED,
        'permalink_enabled': PERMALINK_ENABLED
    }


def is_full_width(comic):
    """Determine if a comic should be full width based on global and per-comic settings"""
    # If comic explicitly sets full_width, use that value
    if 'full_width' in comic:
        return comic['full_width']
    # Otherwise use the global default
    return FULL_WIDTH_DEFAULT


def is_plain(comic):
    """Determine if a comic should be plain mode based on global and per-comic settings"""
    # If comic explicitly sets plain, use that value
    if 'plain' in comic:
        return comic['plain']
    # Otherwise use the global default
    return PLAIN_DEFAULT


def format_comic_date(date_str):
    """Format date string (YYYY-MM-DD) to 'Day name, Month name day, year'"""
    try:
        date_obj = datetime.strptime(date_str, '%Y-%m-%d')
        # Use %d and strip leading zero for cross-platform compatibility
        day = date_obj.strftime('%d').lstrip('0')
        formatted = date_obj.strftime(f'%A, %B {day}, %Y')
        return formatted
    except:
        return date_str


def get_author_note_from_file(filename):
    """Load author note from markdown file if it exists

    Args:
        filename: Either just a filename (looked up in content/author_notes/)
                  or a path relative to content/
    """
    # If filename contains a path separator, treat as relative to content/
    if '/' in filename or '\\' in filename:
        note_path = os.path.join(os.path.dirname(__file__), 'content', filename)
    else:
        # Just a filename, look in author_notes directory
        note_path = os.path.join(os.path.dirname(__file__), 'content', 'author_notes', filename)

    try:
        with open(note_path, 'r', encoding='utf-8') as f:
            content = f.read()
        return markdown.markdown(content)
    except FileNotFoundError:
        return None


def enrich_comic(comic):
    """Add computed properties to comic data"""
    if comic is None:
        return None
    enriched = comic.copy()
    enriched['full_width'] = is_full_width(comic)
    enriched['plain'] = is_plain(comic)
    enriched['formatted_date'] = format_comic_date(comic['date'])

    # Check for explicitly specified markdown author note file
    if 'author_note_md' in comic and comic['author_note_md']:
        markdown_note = get_author_note_from_file(comic['author_note_md'])
        if markdown_note:
            enriched['author_note'] = markdown_note
            enriched['author_note_is_html'] = True
        else:
            # File specified but not found, use plain text from comic data if it exists
            enriched['author_note_is_html'] = False
    else:
        # No markdown file specified, use plain text from comic data if it exists
        enriched['author_note_is_html'] = False

    return enriched


def get_comic_by_number(number):
    """Get a comic by its number"""
    for comic in COMICS:
        if comic['number'] == number:
            return enrich_comic(comic)
    return None


def get_latest_comic():
    """Get the most recent comic"""
    if COMICS:
        return enrich_comic(COMICS[-1])
    return None


@app.route('/')
def index():
    """Home page - shows latest comic"""
    comic = get_latest_comic()
    if not comic:
        return render_template('index.html', title='Latest Comic',
                             comic=None, total_comics=0)
    return render_template('index.html', title='Latest Comic',
                         comic=comic, total_comics=len(COMICS))


@app.route('/comic/<int:comic_id>')
def comic(comic_id):
    """View a specific comic"""
    comic = get_comic_by_number(comic_id)
    if not comic:
        abort(404)
    # Use comic title if present, otherwise use #X format (matching client-side behavior)
    page_title = comic.get('title', f"#{comic_id}")
    return render_template('comic.html', title=page_title,
                         comic=comic, total_comics=len(COMICS))


@app.route('/embed/<int:comic_id>')
def embed(comic_id):
    """Embeddable comic view - minimal layout for iframes"""
    if not EMBED_ENABLED:
        abort(404)
    comic = get_comic_by_number(comic_id)
    if not comic:
        abort(404)
    # Use comic title if present, otherwise use #X format
    page_title = comic.get('title', f"#{comic_id}")
    return render_template('embed.html', title=page_title, comic=comic)


def group_comics_by_section(comics_list):
    """Group comics by section. Returns list of (section_title, comics) tuples"""
    if not SECTIONS_ENABLED:
        return [(None, comics_list)]

    sections = []
    current_section = None
    current_comics = []

    for comic in comics_list:
        # Check if this comic starts a new section
        if 'section' in comic:
            # Save previous section if it has comics
            if current_comics:
                sections.append((current_section, current_comics))
            # Start new section
            current_section = comic['section']
            current_comics = [comic]
        else:
            # Add to current section
            current_comics.append(comic)

    # Don't forget the last section
    if current_comics:
        sections.append((current_section, current_comics))

    return sections


@app.route('/archive')
def archive():
    """Archive page showing all comics"""
    # Reverse order to show newest first
    comics = [enrich_comic(comic) for comic in reversed(COMICS)]

    # Group by section if enabled
    sections = group_comics_by_section(comics)

    return render_template('archive.html', title='Archive',
                         sections=sections)


@app.route('/about')
def about():
    """About page"""
    # Read and render the markdown file
    about_path = os.path.join(os.path.dirname(__file__), 'content', 'about.md')
    try:
        with open(about_path, 'r', encoding='utf-8') as f:
            content = f.read()
        html_content = markdown.markdown(content)
    except FileNotFoundError:
        html_content = '<p>About content not found.</p>'
    return render_template('page.html', title='About', content=html_content)


@app.route('/terms')
def terms():
    """Terms of Service page"""
    from jinja2 import Template
    # Read and render the markdown file with template variables
    terms_path = os.path.join(os.path.dirname(__file__), 'content', 'terms.md')
    try:
        with open(terms_path, 'r', encoding='utf-8') as f:
            content = f.read()
        # First render as Jinja template to substitute variables
        template = Template(content)
        rendered_content = template.render(
            copyright_name=COPYRIGHT_NAME,
            social_email=SOCIAL_EMAIL if SOCIAL_EMAIL else '[Contact Email]'
        )
        # Then convert markdown to HTML
        html_content = markdown.markdown(rendered_content)
    except FileNotFoundError:
        html_content = '<p>Terms of Service content not found.</p>'
    return render_template('page.html', title='Terms of Service', content=html_content)


@app.route('/api/comics')
def api_comics():
    """API endpoint - returns all comics as JSON"""
    return jsonify([enrich_comic(comic) for comic in COMICS])


@app.route('/api/comics/<int:comic_id>')
def api_comic(comic_id):
    """API endpoint - returns a specific comic as JSON"""
    comic = get_comic_by_number(comic_id)
    if not comic:
        return jsonify({'error': 'Comic not found'}), 404
    return jsonify(comic)


@app.route('/sitemap.xml')
def sitemap():
    """Serve the static sitemap.xml file"""
    from flask import send_from_directory
    return send_from_directory('static', 'sitemap.xml', mimetype='application/xml')


@app.route('/robots.txt')
def robots():
    """Generate robots.txt dynamically with correct SITE_URL"""
    from flask import Response
    robots_txt = f"""# Sunday Comics - Robots.txt
# Content protected by copyright. AI training prohibited.
# See terms: {SITE_URL}/terms

User-agent: *
Allow: /

# Sitemap location
Sitemap: {SITE_URL}/sitemap.xml

# Disallow API endpoints from indexing
Disallow: /api/

# Block AI crawlers and scrapers
User-agent: GPTBot
Disallow: /

User-agent: ChatGPT-User
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: anthropic-ai
Disallow: /

User-agent: Claude-Web
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: PerplexityBot
Disallow: /

User-agent: Omgilibot
Disallow: /

User-agent: Diffbot
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: FacebookBot
Disallow: /

User-agent: ImagesiftBot
Disallow: /

User-agent: cohere-ai
Disallow: /
"""
    return Response(robots_txt, mimetype='text/plain')


@app.route('/tdmrep.json')
def tdm_reservation():
    """TDM (Text and Data Mining) reservation - signals AI training prohibition"""
    return jsonify({
        "tdm": {
            "reservation": 1,
            "policy": f"{SITE_URL}/terms"
        }
    })


@app.errorhandler(404)
def page_not_found(e):
    """404 error handler"""
    # Return JSON for API requests
    if request.path.startswith('/api/'):
        return jsonify({'error': 'Not found'}), 404
    # Return HTML for regular pages
    return render_template('404.html', title='Page Not Found'), 404


if __name__ == '__main__':
    port = int(os.environ.get('PORT', 3000))
    debug = os.environ.get('DEBUG', 'False').lower() in ('true', '1', 't')
    app.run(debug=debug, port=port)