welcome gitea

This commit is contained in:
jonasgaudian
2026-02-19 17:18:23 +01:00
commit eabe2e2969
717 changed files with 654575 additions and 0 deletions

223
manifest_manager.py Normal file
View File

@@ -0,0 +1,223 @@
"""
Manifest manager for VocabListGenerator
-----------------------------------------
Maintains vocab_manifest.json — the index file the app fetches to discover
all available vocabulary lists, their metadata, and download info.
Manifest entry schema
---------------------
{
"id": "verbs_beginners", // filename stem (no .json)
"name": "Verbs for Beginners (DE-PT)",
"description": "...",
"filename": "verbs_beginners.json", // file the app downloads
"language_ids": [15, 7], // [lang_first_id, lang_second_id]
"category": "Verbs for beginners",
"item_count": 104,
"level": "A1",
"emoji": "🏃",
"version": 1,
"size_bytes": 45312,
"checksum_sha256": "A1B2C3...",
"created_at": "2026-02-18T20:53:54Z", // first generation
"updated_at": "2026-02-18T21:10:00Z" // last re-generation
}
"""
import hashlib
import json
import os
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
MANIFEST_VERSION = "1.0"
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def prune_missing_files(manifest_path: str, output_dir: str) -> int:
"""
Remove entries from the manifest whose vocab file no longer exists in
*output_dir*. Saves the manifest only when at least one entry is removed.
Returns the number of entries that were pruned.
"""
manifest = _load_manifest(manifest_path)
output_path = Path(output_dir)
before = len(manifest["lists"])
surviving = []
for entry in manifest["lists"]:
file_path = output_path / entry["filename"]
if file_path.is_file():
surviving.append(entry)
else:
print(f" [manifest] Pruned missing file: {entry['filename']} (id={entry['id']})")
removed = before - len(surviving)
if removed:
manifest["lists"] = surviving
manifest["updated_at"] = _utc_now()
_save_manifest(manifest_path, manifest)
print(f" [manifest] Pruned {removed} stale entr{'y' if removed == 1 else 'ies'}{manifest_path}")
return removed
def update_manifest(
manifest_path: str,
vocab_file_path: str,
lang_first_id: int,
lang_second_id: int,
category: str,
item_count: int,
name: str = "",
description: str = "",
emoji: str = "",
level: str = "",
) -> None:
"""
Compute size + checksum of *vocab_file_path*, then upsert an entry in the
manifest at *manifest_path*. Creates the manifest if it does not exist yet.
If an entry with the same ``id`` already exists it is updated in-place
(created_at is preserved, updated_at is refreshed).
"""
vocab_path = Path(vocab_file_path)
if not vocab_path.is_file():
print(f" [manifest] WARNING: vocab file not found: {vocab_file_path}")
return
entry_id = vocab_path.stem # e.g. "verbs_beginners"
filename = vocab_path.name # e.g. "verbs_beginners.json"
size_bytes = _file_size(vocab_path)
checksum_sha256 = _sha256(vocab_path)
now_iso = _utc_now()
# Load manifest; drop any entries whose files have since been deleted
output_dir = str(vocab_path.parent)
manifest = _load_manifest(manifest_path)
manifest["lists"] = [
e for e in manifest["lists"]
if (vocab_path.parent / e["filename"]).is_file()
]
# Find existing entry (if any)
existing = _find_entry(manifest["lists"], entry_id)
if existing is None:
# Brand-new entry
entry: Dict[str, Any] = {
"id": entry_id,
"name": name or entry_id,
"description": description,
"filename": filename,
"language_ids": [lang_first_id, lang_second_id],
"category": category,
"item_count": item_count,
"level": level,
"emoji": emoji,
"version": 1,
"size_bytes": size_bytes,
"checksum_sha256": checksum_sha256,
"created_at": now_iso,
"updated_at": now_iso,
}
manifest["lists"].append(entry)
print(f" [manifest] Added new entry: {entry_id}")
else:
# Update mutable fields; keep created_at and version
existing["name"] = name or existing.get("name", entry_id)
existing["description"] = description or existing.get("description", "")
existing["filename"] = filename
existing["language_ids"] = [lang_first_id, lang_second_id]
existing["category"] = category
existing["item_count"] = item_count
existing["level"] = level or existing.get("level", "")
existing["emoji"] = emoji or existing.get("emoji", "")
existing.setdefault("version", 1) # preserve existing version if already set
existing["size_bytes"] = size_bytes
existing["checksum_sha256"] = checksum_sha256
existing["updated_at"] = now_iso
print(f" [manifest] Updated existing entry: {entry_id}")
# Sort list alphabetically by id for stable output
manifest["lists"].sort(key=lambda e: e["id"])
manifest["updated_at"] = now_iso
_save_manifest(manifest_path, manifest)
print(f" [manifest] Saved → {manifest_path}")
def print_manifest(manifest_path: str) -> None:
"""Pretty-print a summary of the manifest to stdout."""
manifest = _load_manifest(manifest_path)
lists = manifest.get("lists", [])
print(f"\nManifest: {manifest_path} ({len(lists)} lists)")
print("-" * 60)
for entry in lists:
lang_ids = ", ".join(str(i) for i in entry.get("language_ids", []))
print(
f" [{entry['id']}] {entry['name']}\n"
f" category={entry['category']} "
f"items={entry['item_count']} "
f"langs=[{lang_ids}] "
f"size={entry['size_bytes']} B\n"
f" updated={entry['updated_at']}"
)
print()
# ---------------------------------------------------------------------------
# Private helpers
# ---------------------------------------------------------------------------
def _load_manifest(path: str) -> Dict[str, Any]:
"""Load existing manifest or return a fresh skeleton."""
if os.path.isfile(path):
try:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
except (json.JSONDecodeError, OSError) as e:
print(f" [manifest] WARNING: could not read manifest ({e}), starting fresh.")
return {
"manifest_version": MANIFEST_VERSION,
"updated_at": _utc_now(),
"lists": [],
}
def _save_manifest(path: str, manifest: Dict[str, Any]) -> None:
with open(path, "w", encoding="utf-8") as f:
json.dump(manifest, f, indent=2, ensure_ascii=False)
def _find_entry(
lists: List[Dict[str, Any]], entry_id: str
) -> Optional[Dict[str, Any]]:
for entry in lists:
if entry.get("id") == entry_id:
return entry
return None
def _file_size(path: Path) -> int:
return path.stat().st_size
def _sha256(path: Path) -> str:
h = hashlib.sha256()
with open(path, "rb") as f:
for chunk in iter(lambda: f.read(65536), b""):
h.update(chunk)
return h.hexdigest().upper()
def _utc_now() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")