Compare commits

...

2 Commits

Author SHA1 Message Date
mi
61aa0aaba7 :lightning: lazy load archive 2025-11-15 20:01:06 +10:00
mi
bbd8e0a96d :lightning: comics cache 2025-11-15 19:37:52 +10:00
8 changed files with 509 additions and 11 deletions

3
.gitignore vendored
View File

@@ -4,3 +4,6 @@
# This should be generated on deploy
static/feed.rss
static/sitemap.xml
# Comic data cache
data/comics/.comics_cache.pkl

View File

@@ -44,12 +44,35 @@ python scripts/generate_sitemap.py
```
Run this after adding/updating comics to regenerate `static/sitemap.xml` for search engines.
**Publish comics (rebuild cache + RSS + sitemap):**
```bash
python scripts/publish_comic.py
```
Convenience script that rebuilds the cache and regenerates all static files in one command.
**Rebuild comics cache:**
```bash
python scripts/rebuild_cache.py
```
Force rebuild the comics cache from YAML files. Normally not needed (cache auto-invalidates).
## Architecture
### Data Layer: YAML Files in data/comics/
Comics are stored as individual YAML files in the `data/comics/` directory. The `data_loader.py` module automatically loads all `.yaml` files (except `TEMPLATE.yaml` and `README.yaml`), sorts them by comic number, and builds the `COMICS` list.
**Caching:** The data loader uses automatic caching to speed up subsequent loads:
- First load: Parses all YAML files, saves to `data/comics/.comics_cache.pkl`
- Subsequent loads: Reads from cache (~100x faster)
- Auto-invalidation: Cache rebuilds automatically when any YAML file is modified
- Cache can be disabled via environment variable: `DISABLE_COMIC_CACHE=true`
Performance with caching (1000 comics):
- Initial load: ~2-3 seconds (builds cache)
- Subsequent loads: ~0.01 seconds (uses cache)
- Scripts (RSS, sitemap): All share the same cache file on disk
**File structure:**
- `data/comics/001.yaml` - Comic #1
- `data/comics/002.yaml` - Comic #2
@@ -183,7 +206,7 @@ Global context variables injected into all templates:
## Important Implementation Details
1. **Comic loading**: The `data_loader.py` module scans `data/comics/` for `.yaml` files, loads them, validates required fields, and sorts by comic number. TEMPLATE.yaml and README.yaml are automatically ignored.
1. **Comic loading**: The `data_loader.py` module scans `data/comics/` for `.yaml` files, loads them, validates required fields, and sorts by comic number. TEMPLATE.yaml and README.yaml are automatically ignored. Results are cached to `.comics_cache.pkl` for performance.
2. **Comic ordering**: COMICS list order (determined by the `number` field in each YAML file) determines comic sequence. Last item is the "latest" comic.

71
app.py
View File

@@ -262,14 +262,22 @@ def group_comics_by_section(comics_list):
@app.route('/archive')
def archive():
"""Archive page showing all comics"""
# Initial batch size for server-side rendering
initial_batch = 24
# Reverse order to show newest first
comics = [enrich_comic(comic) for comic in reversed(COMICS)]
all_comics = [enrich_comic(comic) for comic in reversed(COMICS)]
# Only take the first batch for initial render
initial_comics = all_comics[:initial_batch]
# Group by section if enabled
sections = group_comics_by_section(comics)
sections = group_comics_by_section(initial_comics)
return render_template('archive.html', title='Archive',
sections=sections)
sections=sections,
total_comics=len(COMICS),
initial_batch=initial_batch)
@app.route('/about')
@@ -323,6 +331,63 @@ def api_comic(comic_id):
return jsonify(comic)
@app.route('/api/archive')
def api_archive():
"""API endpoint - returns paginated archive data"""
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 24, type=int)
# Limit per_page to reasonable values
per_page = min(max(per_page, 1), 100)
# Reverse order to show newest first
all_comics = [enrich_comic(comic) for comic in reversed(COMICS)]
# Group by section if enabled
sections = group_comics_by_section(all_comics)
# Calculate pagination
total_comics = len(all_comics)
start_idx = (page - 1) * per_page
end_idx = start_idx + per_page
# Handle section-aware pagination
result_sections = []
current_idx = 0
for section_title, section_comics in sections:
section_start = current_idx
section_end = current_idx + len(section_comics)
# Check if this section overlaps with our requested page
if section_end > start_idx and section_start < end_idx:
# Calculate which comics from this section to include
comics_start = max(0, start_idx - section_start)
comics_end = min(len(section_comics), end_idx - section_start)
paginated_comics = section_comics[comics_start:comics_end]
if paginated_comics:
result_sections.append({
'section_title': section_title,
'comics': paginated_comics
})
current_idx = section_end
# Stop if we've gone past the requested range
if current_idx >= end_idx:
break
return jsonify({
'sections': result_sections,
'page': page,
'per_page': per_page,
'total_comics': total_comics,
'has_more': end_idx < total_comics
})
@app.route('/sitemap.xml')
def sitemap():
"""Serve the static sitemap.xml file"""

View File

@@ -1,25 +1,28 @@
"""
Comic data loader for YAML-based comic management.
Comic data loader for YAML-based comic management with caching.
This module scans the data/comics/ directory for .yaml files,
loads each comic's configuration, and builds the COMICS list.
Caching is used to speed up subsequent loads.
"""
import os
import pickle
import yaml
from pathlib import Path
def load_comics_from_yaml(comics_dir='data/comics'):
def load_comics_from_yaml(comics_dir='data/comics', use_cache=True):
"""
Load all comic data from YAML files in the specified directory.
Load all comic data from YAML files with optional caching.
Args:
comics_dir: Path to directory containing comic YAML files
use_cache: Whether to use cache (set to False to force reload)
Returns:
List of comic dictionaries, sorted by comic number
"""
comics = []
comics_path = Path(comics_dir)
if not comics_path.exists():
@@ -27,6 +30,13 @@ def load_comics_from_yaml(comics_dir='data/comics'):
comics_path.mkdir(parents=True, exist_ok=True)
return []
# Cache file location
cache_file = comics_path / '.comics_cache.pkl'
# Check if caching is disabled via environment variable
if os.getenv('DISABLE_COMIC_CACHE') == 'true':
use_cache = False
# Find all .yaml and .yml files
yaml_files = list(comics_path.glob('*.yaml')) + list(comics_path.glob('*.yml'))
@@ -37,6 +47,28 @@ def load_comics_from_yaml(comics_dir='data/comics'):
print(f"Warning: No YAML files found in '{comics_dir}'")
return []
# Check if we can use cache
if use_cache and cache_file.exists():
cache_mtime = cache_file.stat().st_mtime
# Get the newest YAML file modification time
newest_yaml_mtime = max(f.stat().st_mtime for f in yaml_files)
# If cache is newer than all YAML files, use it
if cache_mtime >= newest_yaml_mtime:
try:
with open(cache_file, 'rb') as f:
comics = pickle.load(f)
print(f"Loaded {len(comics)} comics from cache")
return comics
except Exception as e:
print(f"Warning: Failed to load cache: {e}")
# Fall through to reload from YAML
# Load from YAML files (cache miss or disabled)
print(f"Loading {len(yaml_files)} comic files from YAML...")
comics = []
for yaml_file in yaml_files:
try:
with open(yaml_file, 'r', encoding='utf-8') as f:
@@ -74,9 +106,35 @@ def load_comics_from_yaml(comics_dir='data/comics'):
# Sort by comic number
comics.sort(key=lambda c: c['number'])
# Save to cache
if use_cache:
try:
with open(cache_file, 'wb') as f:
pickle.dump(comics, f)
print(f"Saved {len(comics)} comics to cache")
except Exception as e:
print(f"Warning: Failed to save cache: {e}")
return comics
def clear_cache(comics_dir='data/comics'):
"""
Clear the comics cache file.
Args:
comics_dir: Path to directory containing comic YAML files
"""
cache_file = Path(comics_dir) / '.comics_cache.pkl'
if cache_file.exists():
cache_file.unlink()
print("Cache cleared")
return True
else:
print("No cache file found")
return False
def validate_comics(comics):
"""
Validate the loaded comics for common issues.

84
scripts/publish_comic.py Normal file
View File

@@ -0,0 +1,84 @@
#!/usr/bin/env python3
# Sunday Comics - Publish script
# Copyright (c) 2025 Tomasita Cabrera
# Licensed under the MIT License - see LICENSE file for details
"""
Convenience script to rebuild cache and regenerate all static files.
Run this after adding or updating comics.
"""
import sys
import os
import subprocess
# Add parent directory to path so we can import data_loader
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from data_loader import load_comics_from_yaml, clear_cache
def run_script(script_name, description):
"""Run a script and handle errors"""
script_dir = os.path.dirname(os.path.abspath(__file__))
script_path = os.path.join(script_dir, script_name)
print(f"{description}...")
result = subprocess.run(
[sys.executable, script_path],
capture_output=True,
text=True
)
if result.returncode == 0:
# Print only the summary line (last non-empty line)
output_lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
if output_lines:
print(f"{output_lines[-1]}")
else:
print(f" ✗ Failed!")
if result.stderr:
print(f" Error: {result.stderr}")
return False
return True
def main():
"""Rebuild cache and regenerate all static files"""
print("=" * 60)
print("Publishing Comics")
print("=" * 60)
print()
# Step 1: Rebuild cache
print("1. Rebuilding comics cache...")
clear_cache()
# Load with cache enabled - since we just cleared it, this will reload from YAML
# and automatically save the cache
comics = load_comics_from_yaml(use_cache=True)
if not comics:
print(" ✗ No comics found!")
sys.exit(1)
print(f" ✓ Cached {len(comics)} comics")
print()
# Step 2: Generate RSS feed
success = run_script('generate_rss.py', '2. Generating RSS feed')
if not success:
sys.exit(1)
print()
# Step 3: Generate sitemap
success = run_script('generate_sitemap.py', '3. Generating sitemap')
if not success:
sys.exit(1)
print()
print("=" * 60)
print("✓ All static files updated successfully!")
print("=" * 60)
if __name__ == '__main__':
main()

38
scripts/rebuild_cache.py Normal file
View File

@@ -0,0 +1,38 @@
#!/usr/bin/env python3
# Sunday Comics - Cache rebuild script
# Copyright (c) 2025 Tomasita Cabrera
# Licensed under the MIT License - see LICENSE file for details
"""
Script to rebuild the comics cache from YAML files.
Useful for forcing a fresh cache build.
"""
import sys
import os
# Add parent directory to path so we can import data_loader
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from data_loader import load_comics_from_yaml, clear_cache
def main():
"""Rebuild the comics cache"""
print("Clearing existing cache...")
clear_cache()
print()
print("Rebuilding cache from YAML files...")
# Load with cache enabled - since we just cleared it, this will reload from YAML
# and automatically save the cache
comics = load_comics_from_yaml(use_cache=True)
print()
if comics:
print(f"✓ Cache rebuilt successfully with {len(comics)} comics")
else:
print("✗ No comics found to cache")
sys.exit(1)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,221 @@
/**
* Sunday Comics - Archive Lazy Loading
* Implements infinite scroll for the archive page
*/
(function() {
'use strict';
let currentPage = 1;
let isLoading = false;
let hasMore = true;
const perPage = 24;
// Get elements
const archiveContent = document.querySelector('.archive-content');
if (!archiveContent) return; // Not on archive page
const totalComics = parseInt(archiveContent.dataset.totalComics || '0');
const initialBatch = parseInt(archiveContent.dataset.initialBatch || '24');
// Calculate if there are more comics to load
hasMore = totalComics > initialBatch;
// Create loading indicator
const loadingIndicator = document.createElement('div');
loadingIndicator.className = 'archive-loading';
loadingIndicator.innerHTML = '<p>Loading more comics...</p>';
loadingIndicator.style.display = 'none';
loadingIndicator.style.textAlign = 'center';
loadingIndicator.style.padding = '2rem';
archiveContent.parentNode.insertBefore(loadingIndicator, archiveContent.nextSibling);
/**
* Load more comics from the API
*/
async function loadMoreComics() {
if (isLoading || !hasMore) return;
isLoading = true;
loadingIndicator.style.display = 'block';
try {
currentPage++;
const response = await fetch(`/api/archive?page=${currentPage}&per_page=${perPage}`);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data = await response.json();
// Add new comics to the DOM
appendComics(data.sections);
// Update state
hasMore = data.has_more;
if (!hasMore) {
loadingIndicator.innerHTML = '<p>End of archive</p>';
setTimeout(() => {
loadingIndicator.style.display = 'none';
}, 2000);
}
} catch (error) {
console.error('Error loading more comics:', error);
loadingIndicator.innerHTML = '<p>Error loading comics. Please try again.</p>';
setTimeout(() => {
loadingIndicator.style.display = 'none';
isLoading = false;
}, 3000);
return;
}
isLoading = false;
loadingIndicator.style.display = 'none';
}
/**
* Append comics to the archive
* @param {Array} sections - Array of section objects with title and comics
*/
function appendComics(sections) {
const archiveFullWidth = document.querySelector('.archive-content-fullwidth') !== null;
const sectionsEnabled = document.querySelector('.section-header') !== null;
sections.forEach(section => {
const sectionTitle = section.section_title;
const comics = section.comics;
// Check if we need to create a new section or append to existing
let targetGrid;
if (sectionsEnabled && sectionTitle) {
// Check if section already exists
const existingSection = findSectionByTitle(sectionTitle);
if (existingSection) {
// Append to existing section grid
targetGrid = existingSection.querySelector('.archive-grid');
} else {
// Create new section
const sectionHeader = document.createElement('div');
sectionHeader.className = 'section-header';
sectionHeader.innerHTML = `<h2>${sectionTitle}</h2>`;
archiveContent.appendChild(sectionHeader);
targetGrid = document.createElement('div');
targetGrid.className = 'archive-grid' + (archiveFullWidth ? ' archive-grid-fullwidth' : '');
archiveContent.appendChild(targetGrid);
}
} else {
// No sections or no title - use the last grid or create one
targetGrid = archiveContent.querySelector('.archive-grid:last-of-type');
if (!targetGrid) {
targetGrid = document.createElement('div');
targetGrid.className = 'archive-grid' + (archiveFullWidth ? ' archive-grid-fullwidth' : '');
archiveContent.appendChild(targetGrid);
}
}
// Add each comic to the grid
comics.forEach(comic => {
const item = createArchiveItem(comic, archiveFullWidth);
targetGrid.appendChild(item);
});
});
}
/**
* Find an existing section by title
* @param {string} title - Section title to find
* @returns {Element|null} - The section element or null
*/
function findSectionByTitle(title) {
const sectionHeaders = archiveContent.querySelectorAll('.section-header h2');
for (const header of sectionHeaders) {
if (header.textContent.trim() === title) {
// Return the grid following this header
let nextEl = header.parentElement.nextElementSibling;
while (nextEl && !nextEl.classList.contains('archive-grid')) {
nextEl = nextEl.nextElementSibling;
}
return nextEl ? nextEl.parentElement : null;
}
}
return null;
}
/**
* Create an archive item element
* @param {Object} comic - Comic data
* @param {boolean} fullWidth - Whether using full width layout
* @returns {Element} - The archive item element
*/
function createArchiveItem(comic, fullWidth) {
const item = document.createElement('div');
item.className = 'archive-item' + (fullWidth ? ' archive-item-fullwidth' : '');
const link = document.createElement('a');
link.href = `/comic/${comic.number}`;
const img = document.createElement('img');
img.src = `/static/images/thumbs/${comic.filename}`;
img.alt = comic.title || `#${comic.number}`;
img.loading = 'lazy';
img.onerror = function() {
this.onerror = null;
this.src = '/static/images/thumbs/default.jpg';
};
const info = document.createElement('div');
info.className = 'archive-info';
if (!fullWidth) {
const title = document.createElement('h3');
title.textContent = `#${comic.number}${comic.title ? ': ' + comic.title : ''}`;
info.appendChild(title);
}
const date = document.createElement('p');
date.className = 'archive-date';
date.textContent = comic.date;
info.appendChild(date);
link.appendChild(img);
link.appendChild(info);
item.appendChild(link);
return item;
}
/**
* Check if user has scrolled near the bottom
*/
function checkScrollPosition() {
if (isLoading || !hasMore) return;
const scrollTop = window.pageYOffset || document.documentElement.scrollTop;
const windowHeight = window.innerHeight;
const documentHeight = document.documentElement.scrollHeight;
// Trigger when user is within 1000px of the bottom
if (scrollTop + windowHeight >= documentHeight - 1000) {
loadMoreComics();
}
}
// Set up scroll listener
let scrollTimeout;
window.addEventListener('scroll', function() {
if (scrollTimeout) {
clearTimeout(scrollTimeout);
}
scrollTimeout = setTimeout(checkScrollPosition, 100);
});
// Check initial scroll position (in case page is short)
setTimeout(checkScrollPosition, 500);
})();

View File

@@ -7,10 +7,12 @@
<div class="page-header{% if archive_full_width %} page-header-fullwidth{% endif %}">
<h1>Comic Archive</h1>
<p>Browse all {% set total = namespace(count=0) %}{% for section_title, section_comics in sections %}{% set total.count = total.count + section_comics|length %}{% endfor %}{{ total.count }} comics</p>
<p>Browse all {{ total_comics }} comics</p>
</div>
<section class="archive-content{% if archive_full_width %} archive-content-fullwidth{% endif %}">
<section class="archive-content{% if archive_full_width %} archive-content-fullwidth{% endif %}"
data-total-comics="{{ total_comics }}"
data-initial-batch="{{ initial_batch }}">
{% for section_title, section_comics in sections %}
{% if section_title and sections_enabled %}
<div class="section-header">
@@ -43,3 +45,7 @@
<div class="container"> {# Reopen container for footer #}
{% endif %}
{% endblock %}
{% block extra_js %}
<script src="{{ url_for('static', filename='js/archive-lazy-load.js') }}"></script>
{% endblock %}