""" Comic data loader for YAML-based comic management with caching. This module scans the data/comics/ directory for .yaml files, loads each comic's configuration, and builds the COMICS list. Caching is used to speed up subsequent loads. """ import os import pickle import yaml from pathlib import Path def load_comics_from_yaml(comics_dir='data/comics', use_cache=True): """ Load all comic data from YAML files with optional caching. Args: comics_dir: Path to directory containing comic YAML files use_cache: Whether to use cache (set to False to force reload) Returns: List of comic dictionaries, sorted by comic number """ comics_path = Path(comics_dir) if not comics_path.exists(): print(f"Warning: Comics directory '{comics_dir}' does not exist. Creating it...") comics_path.mkdir(parents=True, exist_ok=True) return [] # Cache file location cache_file = comics_path / '.comics_cache.pkl' # Check if caching is disabled via environment variable if os.getenv('DISABLE_COMIC_CACHE') == 'true': use_cache = False # Find all .yaml and .yml files yaml_files = list(comics_path.glob('*.yaml')) + list(comics_path.glob('*.yml')) # Filter out template and README files yaml_files = [f for f in yaml_files if f.stem.upper() not in ('TEMPLATE', 'README')] if not yaml_files: print(f"Warning: No YAML files found in '{comics_dir}'") return [] # Check if we can use cache if use_cache and cache_file.exists(): cache_mtime = cache_file.stat().st_mtime # Get the newest YAML file modification time newest_yaml_mtime = max(f.stat().st_mtime for f in yaml_files) # If cache is newer than all YAML files, use it if cache_mtime >= newest_yaml_mtime: try: with open(cache_file, 'rb') as f: comics = pickle.load(f) print(f"Loaded {len(comics)} comics from cache") return comics except Exception as e: print(f"Warning: Failed to load cache: {e}") # Fall through to reload from YAML # Load from YAML files (cache miss or disabled) print(f"Loading {len(yaml_files)} comic files from YAML...") comics = [] for yaml_file in yaml_files: try: with open(yaml_file, 'r', encoding='utf-8') as f: comic_data = yaml.safe_load(f) if comic_data is None: print(f"Warning: '{yaml_file.name}' is empty, skipping") continue if 'number' not in comic_data: print(f"Warning: '{yaml_file.name}' missing required 'number' field, skipping") continue if 'filename' not in comic_data: print(f"Warning: '{yaml_file.name}' missing required 'filename' field, skipping") continue if 'date' not in comic_data: print(f"Warning: '{yaml_file.name}' missing required 'date' field, skipping") continue if 'alt_text' not in comic_data: print(f"Warning: '{yaml_file.name}' missing required 'alt_text' field, skipping") continue comics.append(comic_data) except yaml.YAMLError as e: print(f"Error parsing '{yaml_file.name}': {e}") continue except Exception as e: print(f"Error loading '{yaml_file.name}': {e}") continue # Sort by comic number comics.sort(key=lambda c: c['number']) # Save to cache if use_cache: try: with open(cache_file, 'wb') as f: pickle.dump(comics, f) print(f"Saved {len(comics)} comics to cache") except Exception as e: print(f"Warning: Failed to save cache: {e}") return comics def clear_cache(comics_dir='data/comics'): """ Clear the comics cache file. Args: comics_dir: Path to directory containing comic YAML files """ cache_file = Path(comics_dir) / '.comics_cache.pkl' if cache_file.exists(): cache_file.unlink() print("Cache cleared") return True else: print("No cache file found") return False def validate_comics(comics): """ Validate the loaded comics for common issues. Args: comics: List of comic dictionaries Returns: True if validation passes, False otherwise """ if not comics: return True numbers = [c['number'] for c in comics] # Check for duplicate comic numbers if len(numbers) != len(set(numbers)): duplicates = [n for n in numbers if numbers.count(n) > 1] print(f"Warning: Duplicate comic numbers found: {set(duplicates)}") return False # Check for gaps in comic numbering (optional warning) for i in range(len(comics) - 1): if comics[i+1]['number'] - comics[i]['number'] > 1: print(f"Info: Gap in comic numbering between {comics[i]['number']} and {comics[i+1]['number']}") return True if __name__ == '__main__': # Test the loader print("Loading comics from data/comics/...") comics = load_comics_from_yaml() print(f"Loaded {len(comics)} comics") if validate_comics(comics): print("Validation passed!") for comic in comics: title = comic.get('title', f"#{comic['number']}") print(f" - Comic {comic['number']}: {title} ({comic['date']})") else: print("Validation failed!")