""" VocabListGenerator — Manifest cleaner ---------------------------------------- Removes entries from vocab_manifest.json whose output files no longer exist on disk. Reads output_dir and manifest filename from batch.yaml (falls back to sensible defaults if batch.yaml is missing). Usage: python prune_manifest.py """ import os import sys import yaml from manifest_manager import prune_missing_files, print_manifest def main() -> None: # ── Resolve paths from batch.yaml (or defaults) ────────────────────────── script_dir = os.path.dirname(os.path.abspath(__file__)) batch_yaml = os.path.join(script_dir, "batch.yaml") output_dir = "output" manifest_file = "vocab_manifest.json" if os.path.isfile(batch_yaml): try: with open(batch_yaml, "r", encoding="utf-8") as f: cfg = yaml.safe_load(f) or {} settings = cfg.get("settings", {}) output_dir = settings.get("output_dir", output_dir) manifest_file = settings.get("manifest_filename", manifest_file) except yaml.YAMLError as e: print(f" WARNING: Could not parse batch.yaml ({e}), using defaults.") output_dir = os.path.join(script_dir, output_dir) manifest_path = os.path.join(output_dir, manifest_file) # ── Run prune ───────────────────────────────────────────────────────────── print("=" * 50) print(" VocabListGenerator — Manifest Cleaner") print("=" * 50) print(f" Manifest : {manifest_path}") print(f" Output : {output_dir}") print() if not os.path.isfile(manifest_path): print(" No manifest file found — nothing to do.") sys.exit(0) removed = prune_missing_files(manifest_path, output_dir) if removed == 0: print(" Manifest is already clean — no stale entries found.") else: print() print_manifest(manifest_path) if __name__ == "__main__": main()