sunday/app.py

# Sunday Comics - A simple webcomic platform
# Copyright (c) 2025 Tomasita Cabrera
# Licensed under the MIT License - see LICENSE file for details

import os
import logging
from datetime import datetime
from flask import Flask, render_template, abort, jsonify, request
from comics_data import (
    COMICS, COMIC_NAME, COPYRIGHT_NAME, SITE_URL, FULL_WIDTH_DEFAULT, PLAIN_DEFAULT, LOGO_IMAGE, LOGO_MODE,
    HEADER_IMAGE, FOOTER_IMAGE, BANNER_IMAGE, COMPACT_FOOTER, ARCHIVE_FULL_WIDTH, SECTIONS_ENABLED,
    USE_COMIC_NAV_ICONS, USE_HEADER_NAV_ICONS, USE_FOOTER_SOCIAL_ICONS, USE_SHARE_ICONS, NEWSLETTER_ENABLED,
    SOCIAL_INSTAGRAM, SOCIAL_YOUTUBE, SOCIAL_EMAIL, API_SPEC_LINK, EMBED_ENABLED, PERMALINK_ENABLED
)
import markdown

# Configure logging
logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger(__name__)

app = Flask(__name__)

# Configuration
app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', 'your-secret-key')


@app.after_request
def add_ai_blocking_headers(response):
    """Add headers to discourage AI scraping"""
    response.headers['X-Robots-Tag'] = 'noai, noimageai'
    return response


@app.context_processor
def inject_global_settings():
    """Make global settings available to all templates"""
    return {
        'comic_name': COMIC_NAME,
        'copyright_name': COPYRIGHT_NAME if COPYRIGHT_NAME else COMIC_NAME,
        'current_year': datetime.now().year,
        'site_url': SITE_URL,
        'logo_image': LOGO_IMAGE,
        'logo_mode': LOGO_MODE,
        'header_image': HEADER_IMAGE,
        'footer_image': FOOTER_IMAGE,
        'banner_image': BANNER_IMAGE,
        'compact_footer': COMPACT_FOOTER,
        'archive_full_width': ARCHIVE_FULL_WIDTH,
        'sections_enabled': SECTIONS_ENABLED,
        'use_comic_nav_icons': USE_COMIC_NAV_ICONS,
        'use_header_nav_icons': USE_HEADER_NAV_ICONS,
        'use_footer_social_icons': USE_FOOTER_SOCIAL_ICONS,
        'use_share_icons': USE_SHARE_ICONS,
        'newsletter_enabled': NEWSLETTER_ENABLED,
        'social_instagram': SOCIAL_INSTAGRAM,
        'social_youtube': SOCIAL_YOUTUBE,
        'social_email': SOCIAL_EMAIL,
        'api_spec_link': API_SPEC_LINK,
        'embed_enabled': EMBED_ENABLED,
        'permalink_enabled': PERMALINK_ENABLED
    }


def is_full_width(comic):
    """Determine if a comic should be full width based on global and per-comic settings"""
    # If comic explicitly sets full_width, use that value
    if 'full_width' in comic:
        return comic['full_width']
    # Otherwise use the global default
    return FULL_WIDTH_DEFAULT


def is_plain(comic):
    """Determine if a comic should be plain mode based on global and per-comic settings"""
    # If comic explicitly sets plain, use that value
    if 'plain' in comic:
        return comic['plain']
    # Otherwise use the global default
    return PLAIN_DEFAULT


def format_comic_date(date_str):
    """Format date string (YYYY-MM-DD) to 'Day name, Month name day, year'"""
    try:
        date_obj = datetime.strptime(date_str, '%Y-%m-%d')
        # Use %d and strip leading zero for cross-platform compatibility
        day = date_obj.strftime('%d').lstrip('0')
        formatted = date_obj.strftime(f'%A, %B {day}, %Y')
        return formatted
    except:
        return date_str


def get_author_note_from_file(filename):
    """Load author note from markdown file if it exists

    Args:
        filename: Either just a filename (looked up in content/author_notes/)
                  or a path relative to content/
    """
    # If filename contains a path separator, treat as relative to content/
    if '/' in filename or '\\' in filename:
        note_path = os.path.join(os.path.dirname(__file__), 'content', filename)
    else:
        # Just a filename, look in author_notes directory
        note_path = os.path.join(os.path.dirname(__file__), 'content', 'author_notes', filename)

    try:
        with open(note_path, 'r', encoding='utf-8') as f:
            content = f.read()
        return markdown.markdown(content)
    except FileNotFoundError:
        return None


def enrich_comic(comic):
    """Add computed properties to comic data"""
    if comic is None:
        return None
    enriched = comic.copy()
    enriched['full_width'] = is_full_width(comic)
    enriched['plain'] = is_plain(comic)
    enriched['formatted_date'] = format_comic_date(comic['date'])

    # Normalize filename to list for multi-image support
    if isinstance(comic.get('filename'), list):
        enriched['filenames'] = comic['filename']
        enriched['is_multi_image'] = True
    else:
        enriched['filenames'] = [comic['filename']] if 'filename' in comic else []
        enriched['is_multi_image'] = False

    # Normalize alt_text to list matching filenames
    if isinstance(comic.get('alt_text'), list):
        enriched['alt_texts'] = comic['alt_text']

        # Warn if alt_text list doesn't match filenames length
        if len(enriched['alt_texts']) != len(enriched['filenames']):
            logger.warning(
                f"Comic #{comic['number']}: alt_text list length ({len(enriched['alt_texts'])}) "
                f"doesn't match filenames length ({len(enriched['filenames'])}). "
                f"Tip: Use a single string for alt_text to apply the same text to all images, "
                f"or provide a list matching the number of images."
            )
    else:
        # If single alt_text string, use it for all images (this is intentional and valid)
        alt_text = comic.get('alt_text', '')
        enriched['alt_texts'] = [alt_text] * len(enriched['filenames'])

    # Ensure alt_texts list matches filenames length (pad with empty strings if too short)
    while len(enriched['alt_texts']) < len(enriched['filenames']):
        enriched['alt_texts'].append('')

    # Trim alt_texts if too long (extra ones won't be used anyway)
    if len(enriched['alt_texts']) > len(enriched['filenames']):
        enriched['alt_texts'] = enriched['alt_texts'][:len(enriched['filenames'])]

    # Keep original filename and alt_text for backward compatibility (first image)
    if enriched['filenames']:
        enriched['filename'] = enriched['filenames'][0]

    # Ensure alt_text is always a string (use first one if it's a list)
    if enriched['alt_texts']:
        enriched['alt_text'] = enriched['alt_texts'][0]

    # Check for explicitly specified markdown author note file
    if 'author_note_md' in comic and comic['author_note_md']:
        markdown_note = get_author_note_from_file(comic['author_note_md'])
        if markdown_note:
            enriched['author_note'] = markdown_note
            enriched['author_note_is_html'] = True
        else:
            # File specified but not found, use plain text from comic data if it exists
            enriched['author_note_is_html'] = False
    else:
        # No markdown file specified, use plain text from comic data if it exists
        enriched['author_note_is_html'] = False

    return enriched


def get_comic_by_number(number):
    """Get a comic by its number"""
    for comic in COMICS:
        if comic['number'] == number:
            return enrich_comic(comic)
    return None


def get_latest_comic():
    """Get the most recent comic"""
    if COMICS:
        return enrich_comic(COMICS[-1])
    return None


@app.route('/')
def index():
    """Home page - shows latest comic"""
    comic = get_latest_comic()
    if not comic:
        return render_template('index.html', title='Latest Comic',
                             comic=None, total_comics=0)
    return render_template('index.html', title='Latest Comic',
                         comic=comic, total_comics=len(COMICS))


@app.route('/comic/<int:comic_id>')
def comic(comic_id):
    """View a specific comic"""
    comic = get_comic_by_number(comic_id)
    if not comic:
        abort(404)
    # Use comic title if present, otherwise use #X format (matching client-side behavior)
    page_title = comic.get('title', f"#{comic_id}")
    return render_template('comic.html', title=page_title,
                         comic=comic, total_comics=len(COMICS))


@app.route('/embed/<int:comic_id>')
def embed(comic_id):
    """Embeddable comic view - minimal layout for iframes"""
    if not EMBED_ENABLED:
        abort(404)
    comic = get_comic_by_number(comic_id)
    if not comic:
        abort(404)
    # Use comic title if present, otherwise use #X format
    page_title = comic.get('title', f"#{comic_id}")
    return render_template('embed.html', title=page_title, comic=comic)


def group_comics_by_section(comics_list):
    """Group comics by section. Returns list of (section_title, comics) tuples"""
    if not SECTIONS_ENABLED:
        return [(None, comics_list)]

    sections = []
    current_section = None
    current_comics = []

    for comic in comics_list:
        # Check if this comic starts a new section
        if 'section' in comic:
            # Save previous section if it has comics
            if current_comics:
                sections.append((current_section, current_comics))
            # Start new section
            current_section = comic['section']
            current_comics = [comic]
        else:
            # Add to current section
            current_comics.append(comic)

    # Don't forget the last section
    if current_comics:
        sections.append((current_section, current_comics))

    return sections


@app.route('/archive')
def archive():
    """Archive page showing all comics"""
    # Initial batch size for server-side rendering
    initial_batch = 24

    # Reverse order to show newest first
    all_comics = [enrich_comic(comic) for comic in reversed(COMICS)]

    # Only take the first batch for initial render
    initial_comics = all_comics[:initial_batch]

    # Group by section if enabled
    sections = group_comics_by_section(initial_comics)

    return render_template('archive.html', title='Archive',
                         sections=sections,
                         total_comics=len(COMICS),
                         initial_batch=initial_batch)


@app.route('/about')
def about():
    """About page"""
    # Read and render the markdown file
    about_path = os.path.join(os.path.dirname(__file__), 'content', 'about.md')
    try:
        with open(about_path, 'r', encoding='utf-8') as f:
            content = f.read()
        html_content = markdown.markdown(content)
    except FileNotFoundError:
        html_content = '<p>About content not found.</p>'
    return render_template('page.html', title='About', content=html_content)


@app.route('/terms')
def terms():
    """Terms of Service page"""
    from jinja2 import Template
    # Read and render the markdown file with template variables
    terms_path = os.path.join(os.path.dirname(__file__), 'content', 'terms.md')
    try:
        with open(terms_path, 'r', encoding='utf-8') as f:
            content = f.read()
        # First render as Jinja template to substitute variables
        template = Template(content)
        rendered_content = template.render(
            copyright_name=COPYRIGHT_NAME,
            social_email=SOCIAL_EMAIL if SOCIAL_EMAIL else '[Contact Email]'
        )
        # Then convert markdown to HTML
        html_content = markdown.markdown(rendered_content)
    except FileNotFoundError:
        html_content = '<p>Terms of Service content not found.</p>'
    return render_template('page.html', title='Terms of Service', content=html_content)


@app.route('/api/comics')
def api_comics():
    """API endpoint - returns all comics as JSON"""
    return jsonify([enrich_comic(comic) for comic in COMICS])


@app.route('/api/comics/<int:comic_id>')
def api_comic(comic_id):
    """API endpoint - returns a specific comic as JSON"""
    comic = get_comic_by_number(comic_id)
    if not comic:
        return jsonify({'error': 'Comic not found'}), 404
    return jsonify(comic)


@app.route('/api/archive')
def api_archive():
    """API endpoint - returns paginated archive data"""
    page = request.args.get('page', 1, type=int)
    per_page = request.args.get('per_page', 24, type=int)

    # Limit per_page to reasonable values
    per_page = min(max(per_page, 1), 100)

    # Reverse order to show newest first
    all_comics = [enrich_comic(comic) for comic in reversed(COMICS)]

    # Group by section if enabled
    sections = group_comics_by_section(all_comics)

    # Calculate pagination
    total_comics = len(all_comics)
    start_idx = (page - 1) * per_page
    end_idx = start_idx + per_page

    # Handle section-aware pagination
    result_sections = []
    current_idx = 0

    for section_title, section_comics in sections:
        section_start = current_idx
        section_end = current_idx + len(section_comics)

        # Check if this section overlaps with our requested page
        if section_end > start_idx and section_start < end_idx:
            # Calculate which comics from this section to include
            comics_start = max(0, start_idx - section_start)
            comics_end = min(len(section_comics), end_idx - section_start)

            paginated_comics = section_comics[comics_start:comics_end]

            if paginated_comics:
                result_sections.append({
                    'section_title': section_title,
                    'comics': paginated_comics
                })

        current_idx = section_end

        # Stop if we've gone past the requested range
        if current_idx >= end_idx:
            break

    return jsonify({
        'sections': result_sections,
        'page': page,
        'per_page': per_page,
        'total_comics': total_comics,
        'has_more': end_idx < total_comics
    })


@app.route('/sitemap.xml')
def sitemap():
    """Serve the static sitemap.xml file"""
    from flask import send_from_directory
    return send_from_directory('static', 'sitemap.xml', mimetype='application/xml')


@app.route('/robots.txt')
def robots():
    """Generate robots.txt dynamically with correct SITE_URL"""
    from flask import Response
    robots_txt = f"""# Sunday Comics - Robots.txt
# Content protected by copyright. AI training prohibited.
# See terms: {SITE_URL}/terms

User-agent: *
Allow: /

# Sitemap location
Sitemap: {SITE_URL}/sitemap.xml

# Disallow API endpoints from indexing
Disallow: /api/

# Block AI crawlers and scrapers
User-agent: GPTBot
Disallow: /

User-agent: ChatGPT-User
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: anthropic-ai
Disallow: /

User-agent: Claude-Web
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: PerplexityBot
Disallow: /

User-agent: Omgilibot
Disallow: /

User-agent: Diffbot
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: FacebookBot
Disallow: /

User-agent: ImagesiftBot
Disallow: /

User-agent: cohere-ai
Disallow: /
"""
    return Response(robots_txt, mimetype='text/plain')


@app.route('/tdmrep.json')
def tdm_reservation():
    """TDM (Text and Data Mining) reservation - signals AI training prohibition"""
    return jsonify({
        "tdm": {
            "reservation": 1,
            "policy": f"{SITE_URL}/terms"
        }
    })


@app.errorhandler(404)
def page_not_found(e):
    """404 error handler"""
    # Return JSON for API requests
    if request.path.startswith('/api/'):
        return jsonify({'error': 'Not found'}), 404
    # Return HTML for regular pages
    return render_template('404.html', title='Page Not Found'), 404


if __name__ == '__main__':
    port = int(os.environ.get('PORT', 3000))
    debug = os.environ.get('DEBUG', 'False').lower() in ('true', '1', 't')
    app.run(debug=debug, port=port)