anti-AI measures

This commit is contained in:
mi
2025-11-15 15:43:32 +10:00
parent 1dac042d25
commit 14415dfcd2
5 changed files with 322 additions and 1 deletions

65
app.py
View File

@@ -217,6 +217,28 @@ def about():
return render_template('page.html', title='About', content=html_content)
@app.route('/terms')
def terms():
"""Terms of Service page"""
from jinja2 import Template
# Read and render the markdown file with template variables
terms_path = os.path.join(os.path.dirname(__file__), 'content', 'terms.md')
try:
with open(terms_path, 'r', encoding='utf-8') as f:
content = f.read()
# First render as Jinja template to substitute variables
template = Template(content)
rendered_content = template.render(
copyright_name=COPYRIGHT_NAME,
social_email=SOCIAL_EMAIL if SOCIAL_EMAIL else '[Contact Email]'
)
# Then convert markdown to HTML
html_content = markdown.markdown(rendered_content)
except FileNotFoundError:
html_content = '<p>Terms of Service content not found.</p>'
return render_template('page.html', title='Terms of Service', content=html_content)
@app.route('/api/comics')
def api_comics():
"""API endpoint - returns all comics as JSON"""
@@ -244,6 +266,9 @@ def robots():
"""Generate robots.txt dynamically with correct SITE_URL"""
from flask import Response
robots_txt = f"""# Sunday Comics - Robots.txt
# Content protected by copyright. AI training prohibited.
# See terms: {SITE_URL}/terms
User-agent: *
Allow: /
@@ -252,6 +277,46 @@ Sitemap: {SITE_URL}/sitemap.xml
# Disallow API endpoints from indexing
Disallow: /api/
# Block AI crawlers and scrapers
User-agent: GPTBot
Disallow: /
User-agent: ChatGPT-User
Disallow: /
User-agent: CCBot
Disallow: /
User-agent: anthropic-ai
Disallow: /
User-agent: Claude-Web
Disallow: /
User-agent: Google-Extended
Disallow: /
User-agent: PerplexityBot
Disallow: /
User-agent: Omgilibot
Disallow: /
User-agent: Diffbot
Disallow: /
User-agent: Bytespider
Disallow: /
User-agent: FacebookBot
Disallow: /
User-agent: ImagesiftBot
Disallow: /
User-agent: cohere-ai
Disallow: /
"""
return Response(robots_txt, mimetype='text/plain')