Files
Wictionary-Data-Parser/tests/debug_german_compression.py
2026-02-13 00:10:40 +01:00

65 lines
2.2 KiB
Python

#!/usr/bin/env python3
"""
Debug German Verb Compression
=============================
Debug script to understand what's happening with German verb compression.
"""
import json
import sys
import pathlib
# Add parent directory to path for imports
sys.path.append(str(pathlib.Path(__file__).parent.parent))
from scripts.InflectionProcessor import InflectionProcessor
from scripts.lang_config import GERMAN_VERB_CONFIG
# Load German verb sample
samples_dir = pathlib.Path(__file__).parent.parent / "samples"
german_data_path = samples_dir / "german" / "laufen.json"
if german_data_path.exists():
with open(german_data_path, 'r', encoding='utf-8') as f:
german_data = json.load(f)
# Add required fields
german_data["lang_code"] = "de"
german_data["word"] = "laufen"
german_data["pos"] = "verb"
german_data["senses"] = [{"glosses": ["to run"]}]
print("Original data forms type:", type(german_data.get("forms")))
print("Original data forms length:", len(german_data.get("forms", [])))
print("First few forms:")
for i, form in enumerate(german_data.get("forms", [])[:3]):
print(f" {i}: {form}")
# Initialize processor
processor = InflectionProcessor({
'de_verb': GERMAN_VERB_CONFIG
})
# Process the entry
processed = processor.process(german_data)
print("\nProcessed data forms type:", type(processed.get("forms")))
print("Processed data forms:", processed.get("forms"))
if processed.get("forms") is None:
print("Forms are None")
elif isinstance(processed.get("forms"), dict):
print("Forms are a dictionary:")
for key, value in processed["forms"].items():
print(f" {key}: {value}")
elif isinstance(processed.get("forms"), list):
print("Forms are a list:")
print(f" Length: {len(processed['forms'])}")
print(f" First item type: {type(processed['forms'][0])}")
if processed['forms']:
print(f" First item: {processed['forms'][0]}")
else:
print(f"Forms are of unexpected type: {type(processed.get('forms'))}")
else:
print(f"German sample data not found at: {german_data_path}")