65 lines
2.2 KiB
Python
65 lines
2.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Debug German Verb Compression
|
|
=============================
|
|
Debug script to understand what's happening with German verb compression.
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
import pathlib
|
|
|
|
# Add parent directory to path for imports
|
|
sys.path.append(str(pathlib.Path(__file__).parent.parent))
|
|
|
|
from scripts.InflectionProcessor import InflectionProcessor
|
|
from scripts.lang_config import GERMAN_VERB_CONFIG
|
|
|
|
# Load German verb sample
|
|
samples_dir = pathlib.Path(__file__).parent.parent / "samples"
|
|
german_data_path = samples_dir / "german" / "laufen.json"
|
|
|
|
if german_data_path.exists():
|
|
with open(german_data_path, 'r', encoding='utf-8') as f:
|
|
german_data = json.load(f)
|
|
|
|
# Add required fields
|
|
german_data["lang_code"] = "de"
|
|
german_data["word"] = "laufen"
|
|
german_data["pos"] = "verb"
|
|
german_data["senses"] = [{"glosses": ["to run"]}]
|
|
|
|
print("Original data forms type:", type(german_data.get("forms")))
|
|
print("Original data forms length:", len(german_data.get("forms", [])))
|
|
print("First few forms:")
|
|
for i, form in enumerate(german_data.get("forms", [])[:3]):
|
|
print(f" {i}: {form}")
|
|
|
|
# Initialize processor
|
|
processor = InflectionProcessor({
|
|
'de_verb': GERMAN_VERB_CONFIG
|
|
})
|
|
|
|
# Process the entry
|
|
processed = processor.process(german_data)
|
|
|
|
print("\nProcessed data forms type:", type(processed.get("forms")))
|
|
print("Processed data forms:", processed.get("forms"))
|
|
|
|
if processed.get("forms") is None:
|
|
print("Forms are None")
|
|
elif isinstance(processed.get("forms"), dict):
|
|
print("Forms are a dictionary:")
|
|
for key, value in processed["forms"].items():
|
|
print(f" {key}: {value}")
|
|
elif isinstance(processed.get("forms"), list):
|
|
print("Forms are a list:")
|
|
print(f" Length: {len(processed['forms'])}")
|
|
print(f" First item type: {type(processed['forms'][0])}")
|
|
if processed['forms']:
|
|
print(f" First item: {processed['forms'][0]}")
|
|
else:
|
|
print(f"Forms are of unexpected type: {type(processed.get('forms'))}")
|
|
|
|
else:
|
|
print(f"German sample data not found at: {german_data_path}") |