welcome gitea
This commit is contained in:
223
manifest_manager.py
Normal file
223
manifest_manager.py
Normal file
@@ -0,0 +1,223 @@
|
||||
"""
|
||||
Manifest manager for VocabListGenerator
|
||||
-----------------------------------------
|
||||
Maintains vocab_manifest.json — the index file the app fetches to discover
|
||||
all available vocabulary lists, their metadata, and download info.
|
||||
|
||||
Manifest entry schema
|
||||
---------------------
|
||||
{
|
||||
"id": "verbs_beginners", // filename stem (no .json)
|
||||
"name": "Verbs for Beginners (DE-PT)",
|
||||
"description": "...",
|
||||
"filename": "verbs_beginners.json", // file the app downloads
|
||||
"language_ids": [15, 7], // [lang_first_id, lang_second_id]
|
||||
"category": "Verbs for beginners",
|
||||
"item_count": 104,
|
||||
"level": "A1",
|
||||
"emoji": "🏃",
|
||||
"version": 1,
|
||||
"size_bytes": 45312,
|
||||
"checksum_sha256": "A1B2C3...",
|
||||
"created_at": "2026-02-18T20:53:54Z", // first generation
|
||||
"updated_at": "2026-02-18T21:10:00Z" // last re-generation
|
||||
}
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
MANIFEST_VERSION = "1.0"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def prune_missing_files(manifest_path: str, output_dir: str) -> int:
|
||||
"""
|
||||
Remove entries from the manifest whose vocab file no longer exists in
|
||||
*output_dir*. Saves the manifest only when at least one entry is removed.
|
||||
|
||||
Returns the number of entries that were pruned.
|
||||
"""
|
||||
manifest = _load_manifest(manifest_path)
|
||||
output_path = Path(output_dir)
|
||||
|
||||
before = len(manifest["lists"])
|
||||
surviving = []
|
||||
for entry in manifest["lists"]:
|
||||
file_path = output_path / entry["filename"]
|
||||
if file_path.is_file():
|
||||
surviving.append(entry)
|
||||
else:
|
||||
print(f" [manifest] Pruned missing file: {entry['filename']} (id={entry['id']})")
|
||||
|
||||
removed = before - len(surviving)
|
||||
if removed:
|
||||
manifest["lists"] = surviving
|
||||
manifest["updated_at"] = _utc_now()
|
||||
_save_manifest(manifest_path, manifest)
|
||||
print(f" [manifest] Pruned {removed} stale entr{'y' if removed == 1 else 'ies'} → {manifest_path}")
|
||||
|
||||
return removed
|
||||
|
||||
|
||||
def update_manifest(
|
||||
manifest_path: str,
|
||||
vocab_file_path: str,
|
||||
lang_first_id: int,
|
||||
lang_second_id: int,
|
||||
category: str,
|
||||
item_count: int,
|
||||
name: str = "",
|
||||
description: str = "",
|
||||
emoji: str = "",
|
||||
level: str = "",
|
||||
) -> None:
|
||||
"""
|
||||
Compute size + checksum of *vocab_file_path*, then upsert an entry in the
|
||||
manifest at *manifest_path*. Creates the manifest if it does not exist yet.
|
||||
|
||||
If an entry with the same ``id`` already exists it is updated in-place
|
||||
(created_at is preserved, updated_at is refreshed).
|
||||
"""
|
||||
vocab_path = Path(vocab_file_path)
|
||||
if not vocab_path.is_file():
|
||||
print(f" [manifest] WARNING: vocab file not found: {vocab_file_path}")
|
||||
return
|
||||
|
||||
entry_id = vocab_path.stem # e.g. "verbs_beginners"
|
||||
filename = vocab_path.name # e.g. "verbs_beginners.json"
|
||||
|
||||
size_bytes = _file_size(vocab_path)
|
||||
checksum_sha256 = _sha256(vocab_path)
|
||||
now_iso = _utc_now()
|
||||
|
||||
# Load manifest; drop any entries whose files have since been deleted
|
||||
output_dir = str(vocab_path.parent)
|
||||
manifest = _load_manifest(manifest_path)
|
||||
manifest["lists"] = [
|
||||
e for e in manifest["lists"]
|
||||
if (vocab_path.parent / e["filename"]).is_file()
|
||||
]
|
||||
|
||||
# Find existing entry (if any)
|
||||
existing = _find_entry(manifest["lists"], entry_id)
|
||||
|
||||
if existing is None:
|
||||
# Brand-new entry
|
||||
entry: Dict[str, Any] = {
|
||||
"id": entry_id,
|
||||
"name": name or entry_id,
|
||||
"description": description,
|
||||
"filename": filename,
|
||||
"language_ids": [lang_first_id, lang_second_id],
|
||||
"category": category,
|
||||
"item_count": item_count,
|
||||
"level": level,
|
||||
"emoji": emoji,
|
||||
"version": 1,
|
||||
"size_bytes": size_bytes,
|
||||
"checksum_sha256": checksum_sha256,
|
||||
"created_at": now_iso,
|
||||
"updated_at": now_iso,
|
||||
}
|
||||
manifest["lists"].append(entry)
|
||||
print(f" [manifest] Added new entry: {entry_id}")
|
||||
else:
|
||||
# Update mutable fields; keep created_at and version
|
||||
existing["name"] = name or existing.get("name", entry_id)
|
||||
existing["description"] = description or existing.get("description", "")
|
||||
existing["filename"] = filename
|
||||
existing["language_ids"] = [lang_first_id, lang_second_id]
|
||||
existing["category"] = category
|
||||
existing["item_count"] = item_count
|
||||
existing["level"] = level or existing.get("level", "")
|
||||
existing["emoji"] = emoji or existing.get("emoji", "")
|
||||
existing.setdefault("version", 1) # preserve existing version if already set
|
||||
existing["size_bytes"] = size_bytes
|
||||
existing["checksum_sha256"] = checksum_sha256
|
||||
existing["updated_at"] = now_iso
|
||||
print(f" [manifest] Updated existing entry: {entry_id}")
|
||||
|
||||
# Sort list alphabetically by id for stable output
|
||||
manifest["lists"].sort(key=lambda e: e["id"])
|
||||
manifest["updated_at"] = now_iso
|
||||
|
||||
_save_manifest(manifest_path, manifest)
|
||||
print(f" [manifest] Saved → {manifest_path}")
|
||||
|
||||
|
||||
def print_manifest(manifest_path: str) -> None:
|
||||
"""Pretty-print a summary of the manifest to stdout."""
|
||||
manifest = _load_manifest(manifest_path)
|
||||
lists = manifest.get("lists", [])
|
||||
print(f"\nManifest: {manifest_path} ({len(lists)} lists)")
|
||||
print("-" * 60)
|
||||
for entry in lists:
|
||||
lang_ids = ", ".join(str(i) for i in entry.get("language_ids", []))
|
||||
print(
|
||||
f" [{entry['id']}] {entry['name']}\n"
|
||||
f" category={entry['category']} "
|
||||
f"items={entry['item_count']} "
|
||||
f"langs=[{lang_ids}] "
|
||||
f"size={entry['size_bytes']} B\n"
|
||||
f" updated={entry['updated_at']}"
|
||||
)
|
||||
print()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Private helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _load_manifest(path: str) -> Dict[str, Any]:
|
||||
"""Load existing manifest or return a fresh skeleton."""
|
||||
if os.path.isfile(path):
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
except (json.JSONDecodeError, OSError) as e:
|
||||
print(f" [manifest] WARNING: could not read manifest ({e}), starting fresh.")
|
||||
|
||||
return {
|
||||
"manifest_version": MANIFEST_VERSION,
|
||||
"updated_at": _utc_now(),
|
||||
"lists": [],
|
||||
}
|
||||
|
||||
|
||||
def _save_manifest(path: str, manifest: Dict[str, Any]) -> None:
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(manifest, f, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
def _find_entry(
|
||||
lists: List[Dict[str, Any]], entry_id: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
for entry in lists:
|
||||
if entry.get("id") == entry_id:
|
||||
return entry
|
||||
return None
|
||||
|
||||
|
||||
def _file_size(path: Path) -> int:
|
||||
return path.stat().st_size
|
||||
|
||||
|
||||
def _sha256(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with open(path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(65536), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest().upper()
|
||||
|
||||
|
||||
def _utc_now() -> str:
|
||||
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
Reference in New Issue
Block a user