:lightning: comics cache

This commit is contained in:
mi
2025-11-15 19:37:52 +10:00
parent 13176c68d2
commit bbd8e0a96d
5 changed files with 212 additions and 6 deletions

3
.gitignore vendored
View File

@@ -4,3 +4,6 @@
# This should be generated on deploy
static/feed.rss
static/sitemap.xml
# Comic data cache
data/comics/.comics_cache.pkl

View File

@@ -44,12 +44,35 @@ python scripts/generate_sitemap.py
```
Run this after adding/updating comics to regenerate `static/sitemap.xml` for search engines.
**Publish comics (rebuild cache + RSS + sitemap):**
```bash
python scripts/publish_comic.py
```
Convenience script that rebuilds the cache and regenerates all static files in one command.
**Rebuild comics cache:**
```bash
python scripts/rebuild_cache.py
```
Force rebuild the comics cache from YAML files. Normally not needed (cache auto-invalidates).
## Architecture
### Data Layer: YAML Files in data/comics/
Comics are stored as individual YAML files in the `data/comics/` directory. The `data_loader.py` module automatically loads all `.yaml` files (except `TEMPLATE.yaml` and `README.yaml`), sorts them by comic number, and builds the `COMICS` list.
**Caching:** The data loader uses automatic caching to speed up subsequent loads:
- First load: Parses all YAML files, saves to `data/comics/.comics_cache.pkl`
- Subsequent loads: Reads from cache (~100x faster)
- Auto-invalidation: Cache rebuilds automatically when any YAML file is modified
- Cache can be disabled via environment variable: `DISABLE_COMIC_CACHE=true`
Performance with caching (1000 comics):
- Initial load: ~2-3 seconds (builds cache)
- Subsequent loads: ~0.01 seconds (uses cache)
- Scripts (RSS, sitemap): All share the same cache file on disk
**File structure:**
- `data/comics/001.yaml` - Comic #1
- `data/comics/002.yaml` - Comic #2
@@ -183,7 +206,7 @@ Global context variables injected into all templates:
## Important Implementation Details
1. **Comic loading**: The `data_loader.py` module scans `data/comics/` for `.yaml` files, loads them, validates required fields, and sorts by comic number. TEMPLATE.yaml and README.yaml are automatically ignored.
1. **Comic loading**: The `data_loader.py` module scans `data/comics/` for `.yaml` files, loads them, validates required fields, and sorts by comic number. TEMPLATE.yaml and README.yaml are automatically ignored. Results are cached to `.comics_cache.pkl` for performance.
2. **Comic ordering**: COMICS list order (determined by the `number` field in each YAML file) determines comic sequence. Last item is the "latest" comic.

View File

@@ -1,25 +1,28 @@
"""
Comic data loader for YAML-based comic management.
Comic data loader for YAML-based comic management with caching.
This module scans the data/comics/ directory for .yaml files,
loads each comic's configuration, and builds the COMICS list.
Caching is used to speed up subsequent loads.
"""
import os
import pickle
import yaml
from pathlib import Path
def load_comics_from_yaml(comics_dir='data/comics'):
def load_comics_from_yaml(comics_dir='data/comics', use_cache=True):
"""
Load all comic data from YAML files in the specified directory.
Load all comic data from YAML files with optional caching.
Args:
comics_dir: Path to directory containing comic YAML files
use_cache: Whether to use cache (set to False to force reload)
Returns:
List of comic dictionaries, sorted by comic number
"""
comics = []
comics_path = Path(comics_dir)
if not comics_path.exists():
@@ -27,6 +30,13 @@ def load_comics_from_yaml(comics_dir='data/comics'):
comics_path.mkdir(parents=True, exist_ok=True)
return []
# Cache file location
cache_file = comics_path / '.comics_cache.pkl'
# Check if caching is disabled via environment variable
if os.getenv('DISABLE_COMIC_CACHE') == 'true':
use_cache = False
# Find all .yaml and .yml files
yaml_files = list(comics_path.glob('*.yaml')) + list(comics_path.glob('*.yml'))
@@ -37,6 +47,28 @@ def load_comics_from_yaml(comics_dir='data/comics'):
print(f"Warning: No YAML files found in '{comics_dir}'")
return []
# Check if we can use cache
if use_cache and cache_file.exists():
cache_mtime = cache_file.stat().st_mtime
# Get the newest YAML file modification time
newest_yaml_mtime = max(f.stat().st_mtime for f in yaml_files)
# If cache is newer than all YAML files, use it
if cache_mtime >= newest_yaml_mtime:
try:
with open(cache_file, 'rb') as f:
comics = pickle.load(f)
print(f"Loaded {len(comics)} comics from cache")
return comics
except Exception as e:
print(f"Warning: Failed to load cache: {e}")
# Fall through to reload from YAML
# Load from YAML files (cache miss or disabled)
print(f"Loading {len(yaml_files)} comic files from YAML...")
comics = []
for yaml_file in yaml_files:
try:
with open(yaml_file, 'r', encoding='utf-8') as f:
@@ -74,9 +106,35 @@ def load_comics_from_yaml(comics_dir='data/comics'):
# Sort by comic number
comics.sort(key=lambda c: c['number'])
# Save to cache
if use_cache:
try:
with open(cache_file, 'wb') as f:
pickle.dump(comics, f)
print(f"Saved {len(comics)} comics to cache")
except Exception as e:
print(f"Warning: Failed to save cache: {e}")
return comics
def clear_cache(comics_dir='data/comics'):
"""
Clear the comics cache file.
Args:
comics_dir: Path to directory containing comic YAML files
"""
cache_file = Path(comics_dir) / '.comics_cache.pkl'
if cache_file.exists():
cache_file.unlink()
print("Cache cleared")
return True
else:
print("No cache file found")
return False
def validate_comics(comics):
"""
Validate the loaded comics for common issues.

84
scripts/publish_comic.py Normal file
View File

@@ -0,0 +1,84 @@
#!/usr/bin/env python3
# Sunday Comics - Publish script
# Copyright (c) 2025 Tomasita Cabrera
# Licensed under the MIT License - see LICENSE file for details
"""
Convenience script to rebuild cache and regenerate all static files.
Run this after adding or updating comics.
"""
import sys
import os
import subprocess
# Add parent directory to path so we can import data_loader
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from data_loader import load_comics_from_yaml, clear_cache
def run_script(script_name, description):
"""Run a script and handle errors"""
script_dir = os.path.dirname(os.path.abspath(__file__))
script_path = os.path.join(script_dir, script_name)
print(f"{description}...")
result = subprocess.run(
[sys.executable, script_path],
capture_output=True,
text=True
)
if result.returncode == 0:
# Print only the summary line (last non-empty line)
output_lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
if output_lines:
print(f"{output_lines[-1]}")
else:
print(f" ✗ Failed!")
if result.stderr:
print(f" Error: {result.stderr}")
return False
return True
def main():
"""Rebuild cache and regenerate all static files"""
print("=" * 60)
print("Publishing Comics")
print("=" * 60)
print()
# Step 1: Rebuild cache
print("1. Rebuilding comics cache...")
clear_cache()
# Load with cache enabled - since we just cleared it, this will reload from YAML
# and automatically save the cache
comics = load_comics_from_yaml(use_cache=True)
if not comics:
print(" ✗ No comics found!")
sys.exit(1)
print(f" ✓ Cached {len(comics)} comics")
print()
# Step 2: Generate RSS feed
success = run_script('generate_rss.py', '2. Generating RSS feed')
if not success:
sys.exit(1)
print()
# Step 3: Generate sitemap
success = run_script('generate_sitemap.py', '3. Generating sitemap')
if not success:
sys.exit(1)
print()
print("=" * 60)
print("✓ All static files updated successfully!")
print("=" * 60)
if __name__ == '__main__':
main()

38
scripts/rebuild_cache.py Normal file
View File

@@ -0,0 +1,38 @@
#!/usr/bin/env python3
# Sunday Comics - Cache rebuild script
# Copyright (c) 2025 Tomasita Cabrera
# Licensed under the MIT License - see LICENSE file for details
"""
Script to rebuild the comics cache from YAML files.
Useful for forcing a fresh cache build.
"""
import sys
import os
# Add parent directory to path so we can import data_loader
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from data_loader import load_comics_from_yaml, clear_cache
def main():
"""Rebuild the comics cache"""
print("Clearing existing cache...")
clear_cache()
print()
print("Rebuilding cache from YAML files...")
# Load with cache enabled - since we just cleared it, this will reload from YAML
# and automatically save the cache
comics = load_comics_from_yaml(use_cache=True)
print()
if comics:
print(f"✓ Cache rebuilt successfully with {len(comics)} comics")
else:
print("✗ No comics found to cache")
sys.exit(1)
if __name__ == '__main__':
main()