#!/usr/bin/env python3 """ Debug German Verb Compression ============================= Debug script to understand what's happening with German verb compression. """ import json import sys import pathlib # Add parent directory to path for imports sys.path.append(str(pathlib.Path(__file__).parent.parent)) from scripts.InflectionProcessor import InflectionProcessor from scripts.lang_config import GERMAN_VERB_CONFIG # Load German verb sample samples_dir = pathlib.Path(__file__).parent.parent / "samples" german_data_path = samples_dir / "german" / "laufen.json" if german_data_path.exists(): with open(german_data_path, 'r', encoding='utf-8') as f: german_data = json.load(f) # Add required fields german_data["lang_code"] = "de" german_data["word"] = "laufen" german_data["pos"] = "verb" german_data["senses"] = [{"glosses": ["to run"]}] print("Original data forms type:", type(german_data.get("forms"))) print("Original data forms length:", len(german_data.get("forms", []))) print("First few forms:") for i, form in enumerate(german_data.get("forms", [])[:3]): print(f" {i}: {form}") # Initialize processor processor = InflectionProcessor({ 'de_verb': GERMAN_VERB_CONFIG }) # Process the entry processed = processor.process(german_data) print("\nProcessed data forms type:", type(processed.get("forms"))) print("Processed data forms:", processed.get("forms")) if processed.get("forms") is None: print("Forms are None") elif isinstance(processed.get("forms"), dict): print("Forms are a dictionary:") for key, value in processed["forms"].items(): print(f" {key}: {value}") elif isinstance(processed.get("forms"), list): print("Forms are a list:") print(f" Length: {len(processed['forms'])}") print(f" First item type: {type(processed['forms'][0])}") if processed['forms']: print(f" First item: {processed['forms'][0]}") else: print(f"Forms are of unexpected type: {type(processed.get('forms'))}") else: print(f"German sample data not found at: {german_data_path}")