Migrate to gitea
This commit is contained in:
65
tests/debug_german_compression.py
Normal file
65
tests/debug_german_compression.py
Normal file
@@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Debug German Verb Compression
|
||||
=============================
|
||||
Debug script to understand what's happening with German verb compression.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import pathlib
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.append(str(pathlib.Path(__file__).parent.parent))
|
||||
|
||||
from scripts.InflectionProcessor import InflectionProcessor
|
||||
from scripts.lang_config import GERMAN_VERB_CONFIG
|
||||
|
||||
# Load German verb sample
|
||||
samples_dir = pathlib.Path(__file__).parent.parent / "samples"
|
||||
german_data_path = samples_dir / "german" / "laufen.json"
|
||||
|
||||
if german_data_path.exists():
|
||||
with open(german_data_path, 'r', encoding='utf-8') as f:
|
||||
german_data = json.load(f)
|
||||
|
||||
# Add required fields
|
||||
german_data["lang_code"] = "de"
|
||||
german_data["word"] = "laufen"
|
||||
german_data["pos"] = "verb"
|
||||
german_data["senses"] = [{"glosses": ["to run"]}]
|
||||
|
||||
print("Original data forms type:", type(german_data.get("forms")))
|
||||
print("Original data forms length:", len(german_data.get("forms", [])))
|
||||
print("First few forms:")
|
||||
for i, form in enumerate(german_data.get("forms", [])[:3]):
|
||||
print(f" {i}: {form}")
|
||||
|
||||
# Initialize processor
|
||||
processor = InflectionProcessor({
|
||||
'de_verb': GERMAN_VERB_CONFIG
|
||||
})
|
||||
|
||||
# Process the entry
|
||||
processed = processor.process(german_data)
|
||||
|
||||
print("\nProcessed data forms type:", type(processed.get("forms")))
|
||||
print("Processed data forms:", processed.get("forms"))
|
||||
|
||||
if processed.get("forms") is None:
|
||||
print("Forms are None")
|
||||
elif isinstance(processed.get("forms"), dict):
|
||||
print("Forms are a dictionary:")
|
||||
for key, value in processed["forms"].items():
|
||||
print(f" {key}: {value}")
|
||||
elif isinstance(processed.get("forms"), list):
|
||||
print("Forms are a list:")
|
||||
print(f" Length: {len(processed['forms'])}")
|
||||
print(f" First item type: {type(processed['forms'][0])}")
|
||||
if processed['forms']:
|
||||
print(f" First item: {processed['forms'][0]}")
|
||||
else:
|
||||
print(f"Forms are of unexpected type: {type(processed.get('forms'))}")
|
||||
|
||||
else:
|
||||
print(f"German sample data not found at: {german_data_path}")
|
||||
Reference in New Issue
Block a user