Files
sunday/data_loader.py
2025-11-15 19:37:52 +10:00

180 lines
5.5 KiB
Python

"""
Comic data loader for YAML-based comic management with caching.
This module scans the data/comics/ directory for .yaml files,
loads each comic's configuration, and builds the COMICS list.
Caching is used to speed up subsequent loads.
"""
import os
import pickle
import yaml
from pathlib import Path
def load_comics_from_yaml(comics_dir='data/comics', use_cache=True):
"""
Load all comic data from YAML files with optional caching.
Args:
comics_dir: Path to directory containing comic YAML files
use_cache: Whether to use cache (set to False to force reload)
Returns:
List of comic dictionaries, sorted by comic number
"""
comics_path = Path(comics_dir)
if not comics_path.exists():
print(f"Warning: Comics directory '{comics_dir}' does not exist. Creating it...")
comics_path.mkdir(parents=True, exist_ok=True)
return []
# Cache file location
cache_file = comics_path / '.comics_cache.pkl'
# Check if caching is disabled via environment variable
if os.getenv('DISABLE_COMIC_CACHE') == 'true':
use_cache = False
# Find all .yaml and .yml files
yaml_files = list(comics_path.glob('*.yaml')) + list(comics_path.glob('*.yml'))
# Filter out template and README files
yaml_files = [f for f in yaml_files if f.stem.upper() not in ('TEMPLATE', 'README')]
if not yaml_files:
print(f"Warning: No YAML files found in '{comics_dir}'")
return []
# Check if we can use cache
if use_cache and cache_file.exists():
cache_mtime = cache_file.stat().st_mtime
# Get the newest YAML file modification time
newest_yaml_mtime = max(f.stat().st_mtime for f in yaml_files)
# If cache is newer than all YAML files, use it
if cache_mtime >= newest_yaml_mtime:
try:
with open(cache_file, 'rb') as f:
comics = pickle.load(f)
print(f"Loaded {len(comics)} comics from cache")
return comics
except Exception as e:
print(f"Warning: Failed to load cache: {e}")
# Fall through to reload from YAML
# Load from YAML files (cache miss or disabled)
print(f"Loading {len(yaml_files)} comic files from YAML...")
comics = []
for yaml_file in yaml_files:
try:
with open(yaml_file, 'r', encoding='utf-8') as f:
comic_data = yaml.safe_load(f)
if comic_data is None:
print(f"Warning: '{yaml_file.name}' is empty, skipping")
continue
if 'number' not in comic_data:
print(f"Warning: '{yaml_file.name}' missing required 'number' field, skipping")
continue
if 'filename' not in comic_data:
print(f"Warning: '{yaml_file.name}' missing required 'filename' field, skipping")
continue
if 'date' not in comic_data:
print(f"Warning: '{yaml_file.name}' missing required 'date' field, skipping")
continue
if 'alt_text' not in comic_data:
print(f"Warning: '{yaml_file.name}' missing required 'alt_text' field, skipping")
continue
comics.append(comic_data)
except yaml.YAMLError as e:
print(f"Error parsing '{yaml_file.name}': {e}")
continue
except Exception as e:
print(f"Error loading '{yaml_file.name}': {e}")
continue
# Sort by comic number
comics.sort(key=lambda c: c['number'])
# Save to cache
if use_cache:
try:
with open(cache_file, 'wb') as f:
pickle.dump(comics, f)
print(f"Saved {len(comics)} comics to cache")
except Exception as e:
print(f"Warning: Failed to save cache: {e}")
return comics
def clear_cache(comics_dir='data/comics'):
"""
Clear the comics cache file.
Args:
comics_dir: Path to directory containing comic YAML files
"""
cache_file = Path(comics_dir) / '.comics_cache.pkl'
if cache_file.exists():
cache_file.unlink()
print("Cache cleared")
return True
else:
print("No cache file found")
return False
def validate_comics(comics):
"""
Validate the loaded comics for common issues.
Args:
comics: List of comic dictionaries
Returns:
True if validation passes, False otherwise
"""
if not comics:
return True
numbers = [c['number'] for c in comics]
# Check for duplicate comic numbers
if len(numbers) != len(set(numbers)):
duplicates = [n for n in numbers if numbers.count(n) > 1]
print(f"Warning: Duplicate comic numbers found: {set(duplicates)}")
return False
# Check for gaps in comic numbering (optional warning)
for i in range(len(comics) - 1):
if comics[i+1]['number'] - comics[i]['number'] > 1:
print(f"Info: Gap in comic numbering between {comics[i]['number']} and {comics[i+1]['number']}")
return True
if __name__ == '__main__':
# Test the loader
print("Loading comics from data/comics/...")
comics = load_comics_from_yaml()
print(f"Loaded {len(comics)} comics")
if validate_comics(comics):
print("Validation passed!")
for comic in comics:
title = comic.get('title', f"#{comic['number']}")
print(f" - Comic {comic['number']}: {title} ({comic['date']})")
else:
print("Validation failed!")