65 lines
2.1 KiB
Python
65 lines
2.1 KiB
Python
"""
|
|
VocabListGenerator — Manifest cleaner
|
|
----------------------------------------
|
|
Removes entries from vocab_manifest.json whose output files no longer
|
|
exist on disk. Reads output_dir and manifest filename from batch.yaml
|
|
(falls back to sensible defaults if batch.yaml is missing).
|
|
|
|
Usage:
|
|
python prune_manifest.py
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
|
|
import yaml
|
|
|
|
from manifest_manager import prune_missing_files, print_manifest
|
|
|
|
|
|
def main() -> None:
|
|
# ── Resolve paths from batch.yaml (or defaults) ──────────────────────────
|
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
batch_yaml = os.path.join(script_dir, "batch.yaml")
|
|
output_dir = "output"
|
|
manifest_file = "vocab_manifest.json"
|
|
|
|
if os.path.isfile(batch_yaml):
|
|
try:
|
|
with open(batch_yaml, "r", encoding="utf-8") as f:
|
|
cfg = yaml.safe_load(f) or {}
|
|
settings = cfg.get("settings", {})
|
|
output_dir = settings.get("output_dir", output_dir)
|
|
manifest_file = settings.get("manifest_filename", manifest_file)
|
|
except yaml.YAMLError as e:
|
|
print(f" WARNING: Could not parse batch.yaml ({e}), using defaults.")
|
|
|
|
output_dir = os.path.join(script_dir, output_dir)
|
|
manifest_path = os.path.join(output_dir, manifest_file)
|
|
|
|
# ── Run prune ─────────────────────────────────────────────────────────────
|
|
print("=" * 50)
|
|
print(" VocabListGenerator — Manifest Cleaner")
|
|
print("=" * 50)
|
|
print(f" Manifest : {manifest_path}")
|
|
print(f" Output : {output_dir}")
|
|
print()
|
|
|
|
if not os.path.isfile(manifest_path):
|
|
print(" No manifest file found — nothing to do.")
|
|
sys.exit(0)
|
|
|
|
removed = prune_missing_files(manifest_path, output_dir)
|
|
|
|
if removed == 0:
|
|
print(" Manifest is already clean — no stale entries found.")
|
|
else:
|
|
print()
|
|
|
|
print_manifest(manifest_path)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|