Files
Wictionary-Data-Parser/scripts/03_update_manifest.py
2026-02-13 00:10:40 +01:00

108 lines
4.1 KiB
Python

import json
import os
import hashlib
import sys
import pathlib
import re
import argparse
from typing import Dict, Any, Set
# ======================================================================
# --- DEFAULT CONFIGURATION ---
# ======================================================================
try:
SCRIPT_DIR = pathlib.Path(__file__).parent
ROOT_DIR = SCRIPT_DIR.parent
except NameError:
SCRIPT_DIR = pathlib.Path.cwd()
ROOT_DIR = SCRIPT_DIR.parent
DEFAULT_OUTPUTS_DIR = ROOT_DIR / "outputs"
# ======================================================================
def calculate_sha256(filepath: pathlib.Path, block_size=65536) -> str | None:
sha256 = hashlib.sha256()
try:
with open(filepath, 'rb') as f:
for block in iter(lambda: f.read(block_size), b''):
sha256.update(block)
except IOError as e:
print(f" ERROR: Could not read file '{filepath.name}': {e}")
return None
return sha256.hexdigest().upper()
def guess_properties_from_base(base_name: str) -> Dict[str, str]:
match = re.match(r"dictionary_([a-zA-Z]{2,3})", base_name)
if match:
lang_code = match.group(1)
return {"id": f"{lang_code}_dict", "name": f"Dictionary ({lang_code.upper()})", "lang_code": lang_code}
return {"id": base_name, "name": f"Dictionary ({base_name})", "lang_code": "xx"}
def create_new_dict_entry(base_name: str, asset_files: list[pathlib.Path]) -> Dict[str, Any]:
props = guess_properties_from_base(base_name)
new_entry = {
"id": props["id"], "name": props["name"], "description": "Auto-generated", "version": "1.0.0", "assets": []
}
for file_path in asset_files:
print(f" -> Adding new asset: '{file_path.name}'")
csum = calculate_sha256(file_path)
if csum:
new_entry["assets"].append({
"filename": file_path.name, "size_bytes": os.path.getsize(file_path), "checksum_sha256": csum
})
return new_entry
def update_manifest(outputs_dir: pathlib.Path):
manifest_path = outputs_dir / 'manifest.json'
if not outputs_dir.exists():
print(f"Error: Outputs directory does not exist: {outputs_dir}")
sys.exit(1)
manifest_data = {"files": []}
if manifest_path.exists():
try:
with open(manifest_path, 'r', encoding='utf-8') as f:
manifest_data = json.load(f)
if 'files' not in manifest_data: manifest_data['files'] = []
except Exception as e:
print(f"Error reading manifest: {e}"); sys.exit(1)
print(f"Scanning {outputs_dir} for assets...")
assets_map = {asset['filename']: asset for entry in manifest_data.get('files', []) for asset in entry.get('assets', [])}
discovered = list(outputs_dir.glob('*.db')) + list(outputs_dir.glob('*.zstdict'))
new_files, updated_count = [], 0
for fpath in discovered:
fname = fpath.name
if fname in assets_map:
print(f"Updating: {fname}")
assets_map[fname]['size_bytes'] = os.path.getsize(fpath)
assets_map[fname]['checksum_sha256'] = calculate_sha256(fpath)
updated_count += 1
else:
new_files.append(fpath)
added_count = 0
if new_files:
grouped = {}
for f in new_files:
grouped.setdefault(f.stem, []).append(f)
for base, files in grouped.items():
print(f"Creating new entry for: {base}")
manifest_data['files'].append(create_new_dict_entry(base, files))
added_count += 1
with open(manifest_path, 'w', encoding='utf-8') as f:
json.dump(manifest_data, f, indent=2, ensure_ascii=False)
print(f"\nComplete. Updated {updated_count} assets, added {added_count} new entries.")
def main():
parser = argparse.ArgumentParser(description="Update manifest.json with .db and .zstdict files.")
parser.add_argument("--outputs-dir", type=pathlib.Path, default=DEFAULT_OUTPUTS_DIR,
help="Directory containing assets and manifest.json.")
args = parser.parse_args()
update_manifest(args.outputs_dir)
if __name__ == "__main__":
main()