224 lines
7.6 KiB
Python
224 lines
7.6 KiB
Python
"""
|
|
Manifest manager for VocabListGenerator
|
|
-----------------------------------------
|
|
Maintains vocab_manifest.json — the index file the app fetches to discover
|
|
all available vocabulary lists, their metadata, and download info.
|
|
|
|
Manifest entry schema
|
|
---------------------
|
|
{
|
|
"id": "verbs_beginners", // filename stem (no .json)
|
|
"name": "Verbs for Beginners (DE-PT)",
|
|
"description": "...",
|
|
"filename": "verbs_beginners.json", // file the app downloads
|
|
"language_ids": [15, 7], // [lang_first_id, lang_second_id]
|
|
"category": "Verbs for beginners",
|
|
"item_count": 104,
|
|
"level": "A1",
|
|
"emoji": "🏃",
|
|
"version": 1,
|
|
"size_bytes": 45312,
|
|
"checksum_sha256": "A1B2C3...",
|
|
"created_at": "2026-02-18T20:53:54Z", // first generation
|
|
"updated_at": "2026-02-18T21:10:00Z" // last re-generation
|
|
}
|
|
"""
|
|
|
|
import hashlib
|
|
import json
|
|
import os
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
|
|
MANIFEST_VERSION = "1.0"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Public API
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def prune_missing_files(manifest_path: str, output_dir: str) -> int:
|
|
"""
|
|
Remove entries from the manifest whose vocab file no longer exists in
|
|
*output_dir*. Saves the manifest only when at least one entry is removed.
|
|
|
|
Returns the number of entries that were pruned.
|
|
"""
|
|
manifest = _load_manifest(manifest_path)
|
|
output_path = Path(output_dir)
|
|
|
|
before = len(manifest["lists"])
|
|
surviving = []
|
|
for entry in manifest["lists"]:
|
|
file_path = output_path / entry["filename"]
|
|
if file_path.is_file():
|
|
surviving.append(entry)
|
|
else:
|
|
print(f" [manifest] Pruned missing file: {entry['filename']} (id={entry['id']})")
|
|
|
|
removed = before - len(surviving)
|
|
if removed:
|
|
manifest["lists"] = surviving
|
|
manifest["updated_at"] = _utc_now()
|
|
_save_manifest(manifest_path, manifest)
|
|
print(f" [manifest] Pruned {removed} stale entr{'y' if removed == 1 else 'ies'} → {manifest_path}")
|
|
|
|
return removed
|
|
|
|
|
|
def update_manifest(
|
|
manifest_path: str,
|
|
vocab_file_path: str,
|
|
lang_first_id: int,
|
|
lang_second_id: int,
|
|
category: str,
|
|
item_count: int,
|
|
name: str = "",
|
|
description: str = "",
|
|
emoji: str = "",
|
|
level: str = "",
|
|
) -> None:
|
|
"""
|
|
Compute size + checksum of *vocab_file_path*, then upsert an entry in the
|
|
manifest at *manifest_path*. Creates the manifest if it does not exist yet.
|
|
|
|
If an entry with the same ``id`` already exists it is updated in-place
|
|
(created_at is preserved, updated_at is refreshed).
|
|
"""
|
|
vocab_path = Path(vocab_file_path)
|
|
if not vocab_path.is_file():
|
|
print(f" [manifest] WARNING: vocab file not found: {vocab_file_path}")
|
|
return
|
|
|
|
entry_id = vocab_path.stem # e.g. "verbs_beginners"
|
|
filename = vocab_path.name # e.g. "verbs_beginners.json"
|
|
|
|
size_bytes = _file_size(vocab_path)
|
|
checksum_sha256 = _sha256(vocab_path)
|
|
now_iso = _utc_now()
|
|
|
|
# Load manifest; drop any entries whose files have since been deleted
|
|
output_dir = str(vocab_path.parent)
|
|
manifest = _load_manifest(manifest_path)
|
|
manifest["lists"] = [
|
|
e for e in manifest["lists"]
|
|
if (vocab_path.parent / e["filename"]).is_file()
|
|
]
|
|
|
|
# Find existing entry (if any)
|
|
existing = _find_entry(manifest["lists"], entry_id)
|
|
|
|
if existing is None:
|
|
# Brand-new entry
|
|
entry: Dict[str, Any] = {
|
|
"id": entry_id,
|
|
"name": name or entry_id,
|
|
"description": description,
|
|
"filename": filename,
|
|
"language_ids": [lang_first_id, lang_second_id],
|
|
"category": category,
|
|
"item_count": item_count,
|
|
"level": level,
|
|
"emoji": emoji,
|
|
"version": 1,
|
|
"size_bytes": size_bytes,
|
|
"checksum_sha256": checksum_sha256,
|
|
"created_at": now_iso,
|
|
"updated_at": now_iso,
|
|
}
|
|
manifest["lists"].append(entry)
|
|
print(f" [manifest] Added new entry: {entry_id}")
|
|
else:
|
|
# Update mutable fields; keep created_at and version
|
|
existing["name"] = name or existing.get("name", entry_id)
|
|
existing["description"] = description or existing.get("description", "")
|
|
existing["filename"] = filename
|
|
existing["language_ids"] = [lang_first_id, lang_second_id]
|
|
existing["category"] = category
|
|
existing["item_count"] = item_count
|
|
existing["level"] = level or existing.get("level", "")
|
|
existing["emoji"] = emoji or existing.get("emoji", "")
|
|
existing.setdefault("version", 1) # preserve existing version if already set
|
|
existing["size_bytes"] = size_bytes
|
|
existing["checksum_sha256"] = checksum_sha256
|
|
existing["updated_at"] = now_iso
|
|
print(f" [manifest] Updated existing entry: {entry_id}")
|
|
|
|
# Sort list alphabetically by id for stable output
|
|
manifest["lists"].sort(key=lambda e: e["id"])
|
|
manifest["updated_at"] = now_iso
|
|
|
|
_save_manifest(manifest_path, manifest)
|
|
print(f" [manifest] Saved → {manifest_path}")
|
|
|
|
|
|
def print_manifest(manifest_path: str) -> None:
|
|
"""Pretty-print a summary of the manifest to stdout."""
|
|
manifest = _load_manifest(manifest_path)
|
|
lists = manifest.get("lists", [])
|
|
print(f"\nManifest: {manifest_path} ({len(lists)} lists)")
|
|
print("-" * 60)
|
|
for entry in lists:
|
|
lang_ids = ", ".join(str(i) for i in entry.get("language_ids", []))
|
|
print(
|
|
f" [{entry['id']}] {entry['name']}\n"
|
|
f" category={entry['category']} "
|
|
f"items={entry['item_count']} "
|
|
f"langs=[{lang_ids}] "
|
|
f"size={entry['size_bytes']} B\n"
|
|
f" updated={entry['updated_at']}"
|
|
)
|
|
print()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Private helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _load_manifest(path: str) -> Dict[str, Any]:
|
|
"""Load existing manifest or return a fresh skeleton."""
|
|
if os.path.isfile(path):
|
|
try:
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
except (json.JSONDecodeError, OSError) as e:
|
|
print(f" [manifest] WARNING: could not read manifest ({e}), starting fresh.")
|
|
|
|
return {
|
|
"manifest_version": MANIFEST_VERSION,
|
|
"updated_at": _utc_now(),
|
|
"lists": [],
|
|
}
|
|
|
|
|
|
def _save_manifest(path: str, manifest: Dict[str, Any]) -> None:
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
json.dump(manifest, f, indent=2, ensure_ascii=False)
|
|
|
|
|
|
def _find_entry(
|
|
lists: List[Dict[str, Any]], entry_id: str
|
|
) -> Optional[Dict[str, Any]]:
|
|
for entry in lists:
|
|
if entry.get("id") == entry_id:
|
|
return entry
|
|
return None
|
|
|
|
|
|
def _file_size(path: Path) -> int:
|
|
return path.stat().st_size
|
|
|
|
|
|
def _sha256(path: Path) -> str:
|
|
h = hashlib.sha256()
|
|
with open(path, "rb") as f:
|
|
for chunk in iter(lambda: f.read(65536), b""):
|
|
h.update(chunk)
|
|
return h.hexdigest().upper()
|
|
|
|
|
|
def _utc_now() -> str:
|
|
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|