482 lines
15 KiB
Python
482 lines
15 KiB
Python
# Sunday Comics - A simple webcomic platform
|
|
# Copyright (c) 2025 Tomasita Cabrera
|
|
# Licensed under the MIT License - see LICENSE file for details
|
|
|
|
import os
|
|
import logging
|
|
from datetime import datetime
|
|
from flask import Flask, render_template, abort, jsonify, request
|
|
from comics_data import (
|
|
COMICS, COMIC_NAME, COPYRIGHT_NAME, SITE_URL, FULL_WIDTH_DEFAULT, PLAIN_DEFAULT, LOGO_IMAGE, LOGO_MODE,
|
|
HEADER_IMAGE, FOOTER_IMAGE, BANNER_IMAGE, COMPACT_FOOTER, ARCHIVE_FULL_WIDTH, SECTIONS_ENABLED,
|
|
USE_COMIC_NAV_ICONS, USE_HEADER_NAV_ICONS, USE_FOOTER_SOCIAL_ICONS, USE_SHARE_ICONS, NEWSLETTER_ENABLED,
|
|
SOCIAL_INSTAGRAM, SOCIAL_YOUTUBE, SOCIAL_EMAIL, API_SPEC_LINK, EMBED_ENABLED, PERMALINK_ENABLED
|
|
)
|
|
import markdown
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.WARNING)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
app = Flask(__name__)
|
|
|
|
# Configuration
|
|
app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', 'your-secret-key')
|
|
|
|
|
|
@app.after_request
|
|
def add_ai_blocking_headers(response):
|
|
"""Add headers to discourage AI scraping"""
|
|
response.headers['X-Robots-Tag'] = 'noai, noimageai'
|
|
return response
|
|
|
|
|
|
@app.context_processor
|
|
def inject_global_settings():
|
|
"""Make global settings available to all templates"""
|
|
return {
|
|
'comic_name': COMIC_NAME,
|
|
'copyright_name': COPYRIGHT_NAME if COPYRIGHT_NAME else COMIC_NAME,
|
|
'current_year': datetime.now().year,
|
|
'site_url': SITE_URL,
|
|
'logo_image': LOGO_IMAGE,
|
|
'logo_mode': LOGO_MODE,
|
|
'header_image': HEADER_IMAGE,
|
|
'footer_image': FOOTER_IMAGE,
|
|
'banner_image': BANNER_IMAGE,
|
|
'compact_footer': COMPACT_FOOTER,
|
|
'archive_full_width': ARCHIVE_FULL_WIDTH,
|
|
'sections_enabled': SECTIONS_ENABLED,
|
|
'use_comic_nav_icons': USE_COMIC_NAV_ICONS,
|
|
'use_header_nav_icons': USE_HEADER_NAV_ICONS,
|
|
'use_footer_social_icons': USE_FOOTER_SOCIAL_ICONS,
|
|
'use_share_icons': USE_SHARE_ICONS,
|
|
'newsletter_enabled': NEWSLETTER_ENABLED,
|
|
'social_instagram': SOCIAL_INSTAGRAM,
|
|
'social_youtube': SOCIAL_YOUTUBE,
|
|
'social_email': SOCIAL_EMAIL,
|
|
'api_spec_link': API_SPEC_LINK,
|
|
'embed_enabled': EMBED_ENABLED,
|
|
'permalink_enabled': PERMALINK_ENABLED
|
|
}
|
|
|
|
|
|
def is_full_width(comic):
|
|
"""Determine if a comic should be full width based on global and per-comic settings"""
|
|
# If comic explicitly sets full_width, use that value
|
|
if 'full_width' in comic:
|
|
return comic['full_width']
|
|
# Otherwise use the global default
|
|
return FULL_WIDTH_DEFAULT
|
|
|
|
|
|
def is_plain(comic):
|
|
"""Determine if a comic should be plain mode based on global and per-comic settings"""
|
|
# If comic explicitly sets plain, use that value
|
|
if 'plain' in comic:
|
|
return comic['plain']
|
|
# Otherwise use the global default
|
|
return PLAIN_DEFAULT
|
|
|
|
|
|
def format_comic_date(date_str):
|
|
"""Format date string (YYYY-MM-DD) to 'Day name, Month name day, year'"""
|
|
try:
|
|
date_obj = datetime.strptime(date_str, '%Y-%m-%d')
|
|
# Use %d and strip leading zero for cross-platform compatibility
|
|
day = date_obj.strftime('%d').lstrip('0')
|
|
formatted = date_obj.strftime(f'%A, %B {day}, %Y')
|
|
return formatted
|
|
except:
|
|
return date_str
|
|
|
|
|
|
def get_author_note_from_file(filename):
|
|
"""Load author note from markdown file if it exists
|
|
|
|
Args:
|
|
filename: Either just a filename (looked up in content/author_notes/)
|
|
or a path relative to content/
|
|
"""
|
|
# If filename contains a path separator, treat as relative to content/
|
|
if '/' in filename or '\\' in filename:
|
|
note_path = os.path.join(os.path.dirname(__file__), 'content', filename)
|
|
else:
|
|
# Just a filename, look in author_notes directory
|
|
note_path = os.path.join(os.path.dirname(__file__), 'content', 'author_notes', filename)
|
|
|
|
try:
|
|
with open(note_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
return markdown.markdown(content)
|
|
except FileNotFoundError:
|
|
return None
|
|
|
|
|
|
def enrich_comic(comic):
|
|
"""Add computed properties to comic data"""
|
|
if comic is None:
|
|
return None
|
|
enriched = comic.copy()
|
|
enriched['full_width'] = is_full_width(comic)
|
|
enriched['plain'] = is_plain(comic)
|
|
enriched['formatted_date'] = format_comic_date(comic['date'])
|
|
|
|
# Normalize filename to list for multi-image support
|
|
if isinstance(comic.get('filename'), list):
|
|
enriched['filenames'] = comic['filename']
|
|
enriched['is_multi_image'] = True
|
|
else:
|
|
enriched['filenames'] = [comic['filename']] if 'filename' in comic else []
|
|
enriched['is_multi_image'] = False
|
|
|
|
# Normalize alt_text to list matching filenames
|
|
if isinstance(comic.get('alt_text'), list):
|
|
enriched['alt_texts'] = comic['alt_text']
|
|
|
|
# Warn if alt_text list doesn't match filenames length
|
|
if len(enriched['alt_texts']) != len(enriched['filenames']):
|
|
logger.warning(
|
|
f"Comic #{comic['number']}: alt_text list length ({len(enriched['alt_texts'])}) "
|
|
f"doesn't match filenames length ({len(enriched['filenames'])}). "
|
|
f"Tip: Use a single string for alt_text to apply the same text to all images, "
|
|
f"or provide a list matching the number of images."
|
|
)
|
|
else:
|
|
# If single alt_text string, use it for all images (this is intentional and valid)
|
|
alt_text = comic.get('alt_text', '')
|
|
enriched['alt_texts'] = [alt_text] * len(enriched['filenames'])
|
|
|
|
# Ensure alt_texts list matches filenames length (pad with empty strings if too short)
|
|
while len(enriched['alt_texts']) < len(enriched['filenames']):
|
|
enriched['alt_texts'].append('')
|
|
|
|
# Trim alt_texts if too long (extra ones won't be used anyway)
|
|
if len(enriched['alt_texts']) > len(enriched['filenames']):
|
|
enriched['alt_texts'] = enriched['alt_texts'][:len(enriched['filenames'])]
|
|
|
|
# Keep original filename and alt_text for backward compatibility (first image)
|
|
if enriched['filenames']:
|
|
enriched['filename'] = enriched['filenames'][0]
|
|
|
|
# Ensure alt_text is always a string (use first one if it's a list)
|
|
if enriched['alt_texts']:
|
|
enriched['alt_text'] = enriched['alt_texts'][0]
|
|
|
|
# Check for explicitly specified markdown author note file
|
|
if 'author_note_md' in comic and comic['author_note_md']:
|
|
markdown_note = get_author_note_from_file(comic['author_note_md'])
|
|
if markdown_note:
|
|
enriched['author_note'] = markdown_note
|
|
enriched['author_note_is_html'] = True
|
|
else:
|
|
# File specified but not found, use plain text from comic data if it exists
|
|
enriched['author_note_is_html'] = False
|
|
else:
|
|
# No markdown file specified, use plain text from comic data if it exists
|
|
enriched['author_note_is_html'] = False
|
|
|
|
return enriched
|
|
|
|
|
|
def get_comic_by_number(number):
|
|
"""Get a comic by its number"""
|
|
for comic in COMICS:
|
|
if comic['number'] == number:
|
|
return enrich_comic(comic)
|
|
return None
|
|
|
|
|
|
def get_latest_comic():
|
|
"""Get the most recent comic"""
|
|
if COMICS:
|
|
return enrich_comic(COMICS[-1])
|
|
return None
|
|
|
|
|
|
@app.route('/')
|
|
def index():
|
|
"""Home page - shows latest comic"""
|
|
comic = get_latest_comic()
|
|
if not comic:
|
|
return render_template('index.html', title='Latest Comic',
|
|
comic=None, total_comics=0)
|
|
return render_template('index.html', title='Latest Comic',
|
|
comic=comic, total_comics=len(COMICS))
|
|
|
|
|
|
@app.route('/comic/<int:comic_id>')
|
|
def comic(comic_id):
|
|
"""View a specific comic"""
|
|
comic = get_comic_by_number(comic_id)
|
|
if not comic:
|
|
abort(404)
|
|
# Use comic title if present, otherwise use #X format (matching client-side behavior)
|
|
page_title = comic.get('title', f"#{comic_id}")
|
|
return render_template('comic.html', title=page_title,
|
|
comic=comic, total_comics=len(COMICS))
|
|
|
|
|
|
@app.route('/embed/<int:comic_id>')
|
|
def embed(comic_id):
|
|
"""Embeddable comic view - minimal layout for iframes"""
|
|
if not EMBED_ENABLED:
|
|
abort(404)
|
|
comic = get_comic_by_number(comic_id)
|
|
if not comic:
|
|
abort(404)
|
|
# Use comic title if present, otherwise use #X format
|
|
page_title = comic.get('title', f"#{comic_id}")
|
|
return render_template('embed.html', title=page_title, comic=comic)
|
|
|
|
|
|
def group_comics_by_section(comics_list):
|
|
"""Group comics by section. Returns list of (section_title, comics) tuples"""
|
|
if not SECTIONS_ENABLED:
|
|
return [(None, comics_list)]
|
|
|
|
sections = []
|
|
current_section = None
|
|
current_comics = []
|
|
|
|
for comic in comics_list:
|
|
# Check if this comic starts a new section
|
|
if 'section' in comic:
|
|
# Save previous section if it has comics
|
|
if current_comics:
|
|
sections.append((current_section, current_comics))
|
|
# Start new section
|
|
current_section = comic['section']
|
|
current_comics = [comic]
|
|
else:
|
|
# Add to current section
|
|
current_comics.append(comic)
|
|
|
|
# Don't forget the last section
|
|
if current_comics:
|
|
sections.append((current_section, current_comics))
|
|
|
|
return sections
|
|
|
|
|
|
@app.route('/archive')
|
|
def archive():
|
|
"""Archive page showing all comics"""
|
|
# Initial batch size for server-side rendering
|
|
initial_batch = 24
|
|
|
|
# Reverse order to show newest first
|
|
all_comics = [enrich_comic(comic) for comic in reversed(COMICS)]
|
|
|
|
# Only take the first batch for initial render
|
|
initial_comics = all_comics[:initial_batch]
|
|
|
|
# Group by section if enabled
|
|
sections = group_comics_by_section(initial_comics)
|
|
|
|
return render_template('archive.html', title='Archive',
|
|
sections=sections,
|
|
total_comics=len(COMICS),
|
|
initial_batch=initial_batch)
|
|
|
|
|
|
@app.route('/about')
|
|
def about():
|
|
"""About page"""
|
|
# Read and render the markdown file
|
|
about_path = os.path.join(os.path.dirname(__file__), 'content', 'about.md')
|
|
try:
|
|
with open(about_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
html_content = markdown.markdown(content)
|
|
except FileNotFoundError:
|
|
html_content = '<p>About content not found.</p>'
|
|
return render_template('page.html', title='About', content=html_content)
|
|
|
|
|
|
@app.route('/terms')
|
|
def terms():
|
|
"""Terms of Service page"""
|
|
from jinja2 import Template
|
|
# Read and render the markdown file with template variables
|
|
terms_path = os.path.join(os.path.dirname(__file__), 'content', 'terms.md')
|
|
try:
|
|
with open(terms_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
# First render as Jinja template to substitute variables
|
|
template = Template(content)
|
|
rendered_content = template.render(
|
|
copyright_name=COPYRIGHT_NAME,
|
|
social_email=SOCIAL_EMAIL if SOCIAL_EMAIL else '[Contact Email]'
|
|
)
|
|
# Then convert markdown to HTML
|
|
html_content = markdown.markdown(rendered_content)
|
|
except FileNotFoundError:
|
|
html_content = '<p>Terms of Service content not found.</p>'
|
|
return render_template('page.html', title='Terms of Service', content=html_content)
|
|
|
|
|
|
@app.route('/api/comics')
|
|
def api_comics():
|
|
"""API endpoint - returns all comics as JSON"""
|
|
return jsonify([enrich_comic(comic) for comic in COMICS])
|
|
|
|
|
|
@app.route('/api/comics/<int:comic_id>')
|
|
def api_comic(comic_id):
|
|
"""API endpoint - returns a specific comic as JSON"""
|
|
comic = get_comic_by_number(comic_id)
|
|
if not comic:
|
|
return jsonify({'error': 'Comic not found'}), 404
|
|
return jsonify(comic)
|
|
|
|
|
|
@app.route('/api/archive')
|
|
def api_archive():
|
|
"""API endpoint - returns paginated archive data"""
|
|
page = request.args.get('page', 1, type=int)
|
|
per_page = request.args.get('per_page', 24, type=int)
|
|
|
|
# Limit per_page to reasonable values
|
|
per_page = min(max(per_page, 1), 100)
|
|
|
|
# Reverse order to show newest first
|
|
all_comics = [enrich_comic(comic) for comic in reversed(COMICS)]
|
|
|
|
# Group by section if enabled
|
|
sections = group_comics_by_section(all_comics)
|
|
|
|
# Calculate pagination
|
|
total_comics = len(all_comics)
|
|
start_idx = (page - 1) * per_page
|
|
end_idx = start_idx + per_page
|
|
|
|
# Handle section-aware pagination
|
|
result_sections = []
|
|
current_idx = 0
|
|
|
|
for section_title, section_comics in sections:
|
|
section_start = current_idx
|
|
section_end = current_idx + len(section_comics)
|
|
|
|
# Check if this section overlaps with our requested page
|
|
if section_end > start_idx and section_start < end_idx:
|
|
# Calculate which comics from this section to include
|
|
comics_start = max(0, start_idx - section_start)
|
|
comics_end = min(len(section_comics), end_idx - section_start)
|
|
|
|
paginated_comics = section_comics[comics_start:comics_end]
|
|
|
|
if paginated_comics:
|
|
result_sections.append({
|
|
'section_title': section_title,
|
|
'comics': paginated_comics
|
|
})
|
|
|
|
current_idx = section_end
|
|
|
|
# Stop if we've gone past the requested range
|
|
if current_idx >= end_idx:
|
|
break
|
|
|
|
return jsonify({
|
|
'sections': result_sections,
|
|
'page': page,
|
|
'per_page': per_page,
|
|
'total_comics': total_comics,
|
|
'has_more': end_idx < total_comics
|
|
})
|
|
|
|
|
|
@app.route('/sitemap.xml')
|
|
def sitemap():
|
|
"""Serve the static sitemap.xml file"""
|
|
from flask import send_from_directory
|
|
return send_from_directory('static', 'sitemap.xml', mimetype='application/xml')
|
|
|
|
|
|
@app.route('/robots.txt')
|
|
def robots():
|
|
"""Generate robots.txt dynamically with correct SITE_URL"""
|
|
from flask import Response
|
|
robots_txt = f"""# Sunday Comics - Robots.txt
|
|
# Content protected by copyright. AI training prohibited.
|
|
# See terms: {SITE_URL}/terms
|
|
|
|
User-agent: *
|
|
Allow: /
|
|
|
|
# Sitemap location
|
|
Sitemap: {SITE_URL}/sitemap.xml
|
|
|
|
# Disallow API endpoints from indexing
|
|
Disallow: /api/
|
|
|
|
# Block AI crawlers and scrapers
|
|
User-agent: GPTBot
|
|
Disallow: /
|
|
|
|
User-agent: ChatGPT-User
|
|
Disallow: /
|
|
|
|
User-agent: CCBot
|
|
Disallow: /
|
|
|
|
User-agent: anthropic-ai
|
|
Disallow: /
|
|
|
|
User-agent: Claude-Web
|
|
Disallow: /
|
|
|
|
User-agent: Google-Extended
|
|
Disallow: /
|
|
|
|
User-agent: PerplexityBot
|
|
Disallow: /
|
|
|
|
User-agent: Omgilibot
|
|
Disallow: /
|
|
|
|
User-agent: Diffbot
|
|
Disallow: /
|
|
|
|
User-agent: Bytespider
|
|
Disallow: /
|
|
|
|
User-agent: FacebookBot
|
|
Disallow: /
|
|
|
|
User-agent: ImagesiftBot
|
|
Disallow: /
|
|
|
|
User-agent: cohere-ai
|
|
Disallow: /
|
|
"""
|
|
return Response(robots_txt, mimetype='text/plain')
|
|
|
|
|
|
@app.route('/tdmrep.json')
|
|
def tdm_reservation():
|
|
"""TDM (Text and Data Mining) reservation - signals AI training prohibition"""
|
|
return jsonify({
|
|
"tdm": {
|
|
"reservation": 1,
|
|
"policy": f"{SITE_URL}/terms"
|
|
}
|
|
})
|
|
|
|
|
|
@app.errorhandler(404)
|
|
def page_not_found(e):
|
|
"""404 error handler"""
|
|
# Return JSON for API requests
|
|
if request.path.startswith('/api/'):
|
|
return jsonify({'error': 'Not found'}), 404
|
|
# Return HTML for regular pages
|
|
return render_template('404.html', title='Page Not Found'), 404
|
|
|
|
|
|
if __name__ == '__main__':
|
|
port = int(os.environ.get('PORT', 3000))
|
|
debug = os.environ.get('DEBUG', 'False').lower() in ('true', '1', 't')
|
|
app.run(debug=debug, port=port) |