Files
sunday/app.py
2025-11-18 13:22:56 +10:00

507 lines
16 KiB
Python

# Sunday Comics - A simple webcomic platform
# Copyright (c) 2025 Tomasita Cabrera
# Licensed under the MIT License - see LICENSE file for details
import os
import logging
from datetime import datetime
from flask import Flask, render_template, abort, jsonify, request
from comics_data import (
COMICS, COMIC_NAME, COPYRIGHT_NAME, SITE_URL, CDN_URL, FULL_WIDTH_DEFAULT, PLAIN_DEFAULT, LOGO_IMAGE, LOGO_MODE,
HEADER_IMAGE, FOOTER_IMAGE, BANNER_IMAGE, COMPACT_FOOTER, ARCHIVE_FULL_WIDTH, SECTIONS_ENABLED,
USE_COMIC_NAV_ICONS, USE_HEADER_NAV_ICONS, USE_FOOTER_SOCIAL_ICONS, USE_SHARE_ICONS,
NEWSLETTER_ENABLED, NEWSLETTER_HTML,
SOCIAL_LINKS, API_SPEC_LINK, EMBED_ENABLED, PERMALINK_ENABLED
)
import markdown
from version import __version__
# Configure logging
logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger(__name__)
app = Flask(__name__)
# Configuration
app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', 'your-secret-key')
@app.after_request
def add_ai_blocking_headers(response):
"""Add headers to discourage AI scraping"""
response.headers['X-Robots-Tag'] = 'noai, noimageai'
return response
@app.template_filter('cdn_static')
def cdn_static(filename):
"""Generate URL for static assets with CDN support
When CDN_URL is set, returns CDN URL. Otherwise returns local static URL.
Args:
filename: Path to static file (e.g., 'css/style.css')
Returns:
Full URL to the static asset
"""
from flask import url_for
if CDN_URL:
return f"{CDN_URL}/static/{filename}"
return url_for('static', filename=filename)
@app.context_processor
def inject_global_settings():
"""Make global settings available to all templates"""
return {
'comic_name': COMIC_NAME,
'copyright_name': COPYRIGHT_NAME if COPYRIGHT_NAME else COMIC_NAME,
'current_year': datetime.now().year,
'site_url': SITE_URL,
'cdn_url': CDN_URL,
'logo_image': LOGO_IMAGE,
'logo_mode': LOGO_MODE,
'header_image': HEADER_IMAGE,
'footer_image': FOOTER_IMAGE,
'banner_image': BANNER_IMAGE,
'compact_footer': COMPACT_FOOTER,
'archive_full_width': ARCHIVE_FULL_WIDTH,
'sections_enabled': SECTIONS_ENABLED,
'use_comic_nav_icons': USE_COMIC_NAV_ICONS,
'use_header_nav_icons': USE_HEADER_NAV_ICONS,
'use_footer_social_icons': USE_FOOTER_SOCIAL_ICONS,
'use_share_icons': USE_SHARE_ICONS,
'newsletter_enabled': NEWSLETTER_ENABLED,
'newsletter_html': NEWSLETTER_HTML,
'social_links': SOCIAL_LINKS,
'api_spec_link': API_SPEC_LINK,
'embed_enabled': EMBED_ENABLED,
'permalink_enabled': PERMALINK_ENABLED,
'version': __version__
}
def is_full_width(comic):
"""Determine if a comic should be full width based on global and per-comic settings"""
# If comic explicitly sets full_width, use that value
if 'full_width' in comic:
return comic['full_width']
# Otherwise use the global default
return FULL_WIDTH_DEFAULT
def is_plain(comic):
"""Determine if a comic should be plain mode based on global and per-comic settings"""
# If comic explicitly sets plain, use that value
if 'plain' in comic:
return comic['plain']
# Otherwise use the global default
return PLAIN_DEFAULT
def format_comic_date(date_str):
"""Format date string (YYYY-MM-DD) to 'Day name, Month name day, year'"""
try:
date_obj = datetime.strptime(date_str, '%Y-%m-%d')
# Use %d and strip leading zero for cross-platform compatibility
day = date_obj.strftime('%d').lstrip('0')
formatted = date_obj.strftime(f'%A, %B {day}, %Y')
return formatted
except:
return date_str
def get_author_note_from_file(filename):
"""Load author note from markdown file if it exists
Args:
filename: Either just a filename (looked up in content/author_notes/)
or a path relative to content/
"""
# If filename contains a path separator, treat as relative to content/
if '/' in filename or '\\' in filename:
note_path = os.path.join(os.path.dirname(__file__), 'content', filename)
else:
# Just a filename, look in author_notes directory
note_path = os.path.join(os.path.dirname(__file__), 'content', 'author_notes', filename)
try:
with open(note_path, 'r', encoding='utf-8') as f:
content = f.read()
return markdown.markdown(content)
except FileNotFoundError:
return None
def enrich_comic(comic):
"""Add computed properties to comic data"""
if comic is None:
return None
enriched = comic.copy()
enriched['full_width'] = is_full_width(comic)
enriched['plain'] = is_plain(comic)
enriched['formatted_date'] = format_comic_date(comic['date'])
# Normalize filename to list for multi-image support
if isinstance(comic.get('filename'), list):
enriched['filenames'] = comic['filename']
enriched['is_multi_image'] = True
else:
enriched['filenames'] = [comic['filename']] if 'filename' in comic else []
enriched['is_multi_image'] = False
# Normalize alt_text to list matching filenames
if isinstance(comic.get('alt_text'), list):
enriched['alt_texts'] = comic['alt_text']
# Warn if alt_text list doesn't match filenames length
if len(enriched['alt_texts']) != len(enriched['filenames']):
logger.warning(
f"Comic #{comic['number']}: alt_text list length ({len(enriched['alt_texts'])}) "
f"doesn't match filenames length ({len(enriched['filenames'])}). "
f"Tip: Use a single string for alt_text to apply the same text to all images, "
f"or provide a list matching the number of images."
)
else:
# If single alt_text string, use it for all images (this is intentional and valid)
alt_text = comic.get('alt_text', '')
enriched['alt_texts'] = [alt_text] * len(enriched['filenames'])
# Ensure alt_texts list matches filenames length (pad with empty strings if too short)
while len(enriched['alt_texts']) < len(enriched['filenames']):
enriched['alt_texts'].append('')
# Trim alt_texts if too long (extra ones won't be used anyway)
if len(enriched['alt_texts']) > len(enriched['filenames']):
enriched['alt_texts'] = enriched['alt_texts'][:len(enriched['filenames'])]
# Keep original filename and alt_text for backward compatibility (first image)
if enriched['filenames']:
enriched['filename'] = enriched['filenames'][0]
# Ensure alt_text is always a string (use first one if it's a list)
if enriched['alt_texts']:
enriched['alt_text'] = enriched['alt_texts'][0]
# Check for explicitly specified markdown author note file
if 'author_note_md' in comic and comic['author_note_md']:
markdown_note = get_author_note_from_file(comic['author_note_md'])
if markdown_note:
enriched['author_note'] = markdown_note
enriched['author_note_is_html'] = True
else:
# File specified but not found, use plain text from comic data if it exists
enriched['author_note_is_html'] = False
else:
# No markdown file specified, use plain text from comic data if it exists
enriched['author_note_is_html'] = False
return enriched
def get_comic_by_number(number):
"""Get a comic by its number"""
for comic in COMICS:
if comic['number'] == number:
return enrich_comic(comic)
return None
def get_latest_comic():
"""Get the most recent comic"""
if COMICS:
return enrich_comic(COMICS[-1])
return None
@app.route('/')
def index():
"""Home page - shows latest comic"""
comic = get_latest_comic()
if not comic:
return render_template('index.html', title='Latest Comic',
comic=None, total_comics=0)
return render_template('index.html', title='Latest Comic',
comic=comic, total_comics=len(COMICS))
@app.route('/comic/<int:comic_id>')
def comic(comic_id):
"""View a specific comic"""
comic = get_comic_by_number(comic_id)
if not comic:
abort(404)
# Use comic title if present, otherwise use #X format (matching client-side behavior)
page_title = comic.get('title', f"#{comic_id}")
return render_template('comic.html', title=page_title,
comic=comic, total_comics=len(COMICS))
@app.route('/embed/<int:comic_id>')
def embed(comic_id):
"""Embeddable comic view - minimal layout for iframes"""
if not EMBED_ENABLED:
abort(404)
comic = get_comic_by_number(comic_id)
if not comic:
abort(404)
# Use comic title if present, otherwise use #X format
page_title = comic.get('title', f"#{comic_id}")
return render_template('embed.html', title=page_title, comic=comic)
def group_comics_by_section(comics_list):
"""Group comics by section. Returns list of (section_title, comics) tuples"""
if not SECTIONS_ENABLED:
return [(None, comics_list)]
sections = []
current_section = None
current_comics = []
for comic in comics_list:
# Check if this comic starts a new section
if 'section' in comic:
# Save previous section if it has comics
if current_comics:
sections.append((current_section, current_comics))
# Start new section
current_section = comic['section']
current_comics = [comic]
else:
# Add to current section
current_comics.append(comic)
# Don't forget the last section
if current_comics:
sections.append((current_section, current_comics))
return sections
@app.route('/archive')
def archive():
"""Archive page showing all comics"""
# Initial batch size for server-side rendering
initial_batch = 24
# Reverse order to show newest first
all_comics = [enrich_comic(comic) for comic in reversed(COMICS)]
# Only take the first batch for initial render
initial_comics = all_comics[:initial_batch]
# Group by section if enabled
sections = group_comics_by_section(initial_comics)
return render_template('archive.html', title='Archive',
sections=sections,
total_comics=len(COMICS),
initial_batch=initial_batch)
@app.route('/about')
def about():
"""About page"""
# Read and render the markdown file
about_path = os.path.join(os.path.dirname(__file__), 'content', 'about.md')
try:
with open(about_path, 'r', encoding='utf-8') as f:
content = f.read()
html_content = markdown.markdown(content)
except FileNotFoundError:
html_content = '<p>About content not found.</p>'
return render_template('page.html', title='About', content=html_content)
@app.route('/terms')
def terms():
"""Terms of Service page"""
from jinja2 import Template
# Read and render the markdown file with template variables
terms_path = os.path.join(os.path.dirname(__file__), 'content', 'terms.md')
try:
with open(terms_path, 'r', encoding='utf-8') as f:
content = f.read()
# First render as Jinja template to substitute variables
template = Template(content)
rendered_content = template.render(
copyright_name=COPYRIGHT_NAME,
social_email=SOCIAL_EMAIL if SOCIAL_EMAIL else '[Contact Email]'
)
# Then convert markdown to HTML
html_content = markdown.markdown(rendered_content)
except FileNotFoundError:
html_content = '<p>Terms of Service content not found.</p>'
return render_template('page.html', title='Terms of Service', content=html_content)
@app.route('/api/comics')
def api_comics():
"""API endpoint - returns all comics as JSON (optionally paginated with sections)"""
# Check for pagination parameters
page = request.args.get('page', type=int)
per_page = request.args.get('per_page', type=int)
group_by_section = request.args.get('group_by_section', 'false').lower() in ('true', '1', 'yes')
# If no pagination requested, return simple array (backward compatible)
if page is None and per_page is None and not group_by_section:
return jsonify([enrich_comic(comic) for comic in COMICS])
# Pagination requested - return paginated response
page = page or 1
per_page = per_page or 24
# Limit per_page to reasonable values
per_page = min(max(per_page, 1), 100)
# Reverse order to show newest first
all_comics = [enrich_comic(comic) for comic in reversed(COMICS)]
# Group by section if enabled globally or requested via parameter
sections = group_comics_by_section(all_comics) if (SECTIONS_ENABLED or group_by_section) else [(None, all_comics)]
# Calculate pagination
total_comics = len(all_comics)
start_idx = (page - 1) * per_page
end_idx = start_idx + per_page
# Handle section-aware pagination
result_sections = []
current_idx = 0
for section_title, section_comics in sections:
section_start = current_idx
section_end = current_idx + len(section_comics)
# Check if this section overlaps with our requested page
if section_end > start_idx and section_start < end_idx:
# Calculate which comics from this section to include
comics_start = max(0, start_idx - section_start)
comics_end = min(len(section_comics), end_idx - section_start)
paginated_comics = section_comics[comics_start:comics_end]
if paginated_comics:
result_sections.append({
'section_title': section_title,
'comics': paginated_comics
})
current_idx = section_end
# Stop if we've gone past the requested range
if current_idx >= end_idx:
break
return jsonify({
'sections': result_sections,
'page': page,
'per_page': per_page,
'total_comics': total_comics,
'has_more': end_idx < total_comics
})
@app.route('/api/comics/<int:comic_id>')
def api_comic(comic_id):
"""API endpoint - returns a specific comic as JSON"""
comic = get_comic_by_number(comic_id)
if not comic:
return jsonify({'error': 'Comic not found'}), 404
return jsonify(comic)
@app.route('/sitemap.xml')
def sitemap():
"""Serve the static sitemap.xml file"""
from flask import send_from_directory
return send_from_directory('static', 'sitemap.xml', mimetype='application/xml')
@app.route('/robots.txt')
def robots():
"""Generate robots.txt dynamically with correct SITE_URL"""
from flask import Response
robots_txt = f"""# Sunday Comics - Robots.txt
# Content protected by copyright. AI training prohibited.
# See terms: {SITE_URL}/terms
User-agent: *
Allow: /
# Sitemap location
Sitemap: {SITE_URL}/sitemap.xml
# Disallow API endpoints from indexing
Disallow: /api/
# Block AI crawlers and scrapers
User-agent: GPTBot
Disallow: /
User-agent: ChatGPT-User
Disallow: /
User-agent: CCBot
Disallow: /
User-agent: anthropic-ai
Disallow: /
User-agent: Claude-Web
Disallow: /
User-agent: Google-Extended
Disallow: /
User-agent: PerplexityBot
Disallow: /
User-agent: Omgilibot
Disallow: /
User-agent: Diffbot
Disallow: /
User-agent: Bytespider
Disallow: /
User-agent: FacebookBot
Disallow: /
User-agent: ImagesiftBot
Disallow: /
User-agent: cohere-ai
Disallow: /
"""
return Response(robots_txt, mimetype='text/plain')
@app.route('/tdmrep.json')
def tdm_reservation():
"""TDM (Text and Data Mining) reservation - signals AI training prohibition"""
return jsonify({
"tdm": {
"reservation": 1,
"policy": f"{SITE_URL}/terms"
}
})
@app.errorhandler(404)
def page_not_found(e):
"""404 error handler"""
# Return JSON for API requests
if request.path.startswith('/api/'):
return jsonify({'error': 'Not found'}), 404
# Return HTML for regular pages
return render_template('404.html', title='Page Not Found'), 404
if __name__ == '__main__':
port = int(os.environ.get('PORT', 3000))
debug = os.environ.get('DEBUG', 'False').lower() in ('true', '1', 't')
app.run(debug=debug, port=port)