GERMAN_VERB_CONFIG = { "clean_prefixes": ["ich", "du", "er/sie/es", "wir", "ihr", "sie"], "normalization_rules": [ {"field": "pronouns", "match": "ich", "add_tags": ["first-person", "singular", "indicative", "active"]}, {"field": "pronouns", "match": "du", "add_tags": ["second-person", "singular", "indicative", "active"]}, {"field": "pronouns", "match": "er", "add_tags": ["third-person", "singular", "indicative", "active"]}, {"field": "pronouns", "match": "sie", "add_tags": ["third-person", "singular", "indicative", "active"]}, {"field": "pronouns", "match": "es", "add_tags": ["third-person", "singular", "indicative", "active"]}, {"field": "pronouns", "match": "wir", "add_tags": ["first-person", "plural", "indicative", "active"]}, {"field": "pronouns", "match": "ihr", "add_tags": ["second-person", "plural", "indicative", "active"]} ], "properties": [ { "name": "auxiliary", "multivalue": True, # <--- CRITICAL CHANGE HERE "default": ["haben"], "rules": [ # Check for explicit raw tags {"value": "sein", "criteria": {"raw_tags": ["Hilfsverb sein"]}}, {"value": "haben", "criteria": {"raw_tags": ["Hilfsverb haben"]}}, # Check for 'common forms' that imply the aux {"value": "sein", "criteria": {"form_regex": "^sein$", "tags": ["auxiliary", "perfect"]}}, {"value": "haben", "criteria": {"form_regex": "^haben$", "tags": ["auxiliary", "perfect"]}} ] }, { "name": "separability", "default": "inseparable", "rules": [ {"value": "separable", "criteria": {"tags": ["separable"]}}, {"value": "inseparable", "criteria": {"tags": ["inseparable"]}}, {"value": "separable", "criteria": {"tags": ["participle-2"], "form_regex": "^(?!ge).+ge.+$"}} ] } ], "schema": { "infinitive": { "type": "single", "criteria": {"tags": ["infinitive", "present"], "exclude_tags": ["extended", "passive", "reflexive", "zu"]} }, "participle_perfect": { "type": "single", "criteria": {"tags": ["participle-2", "perfect"], "exclude_tags": ["active", "passive", "auxiliary"]} }, "imperative": { "type": "list", "size": 2, "base_criteria": {"tags": ["imperative", "present", "active"]}, "indices": [ {"index": 0, "tags": ["singular", "second-person"]}, {"index": 1, "tags": ["plural", "second-person"]} ] }, "present": { "type": "list", "size": 6, "base_criteria": {"tags": ["indicative", "present", "active"], "exclude_tags": ["passive"]}, "indices": [ {"index": 0, "tags": ["first-person", "singular"]}, {"index": 1, "tags": ["second-person", "singular"]}, {"index": 2, "tags": ["third-person", "singular"]}, {"index": 3, "tags": ["first-person", "plural"]}, {"index": 4, "tags": ["second-person", "plural"]}, {"index": 5, "tags": ["third-person", "plural"]} ] }, "past": { "type": "list", "size": 6, "base_criteria": {"tags": ["indicative", "past", "active"], "exclude_tags": ["passive"]}, "indices": [ {"index": 0, "tags": ["first-person", "singular"]}, {"index": 1, "tags": ["second-person", "singular"]}, {"index": 2, "tags": ["third-person", "singular"]}, {"index": 3, "tags": ["first-person", "plural"]}, {"index": 4, "tags": ["second-person", "plural"]}, {"index": 5, "tags": ["third-person", "plural"]} ] }, "subjunctive_ii": { "type": "list", "size": 6, "base_criteria": {"tags": ["subjunctive-ii", "past", "active"], "exclude_tags": ["passive"]}, "indices": [ {"index": 0, "tags": ["first-person", "singular"]}, {"index": 1, "tags": ["second-person", "singular"]}, {"index": 2, "tags": ["third-person", "singular"]}, {"index": 3, "tags": ["first-person", "plural"]}, {"index": 4, "tags": ["second-person", "plural"]}, {"index": 5, "tags": ["third-person", "plural"]} ] } } } FRENCH_VERB_CONFIG = { "skip_normalization_if_source": False, # CHANGED: Set to False to prevent crashes on idioms, rare words, and defective verbs "validate_completeness": False, "clean_prefixes": [ "qu'", "qu’", "que", "j'", "j’", "je", "tu", "il/elle/on", "il", "elle", "on", "nous", "vous", "ils/elles", "ils", "elles" ], "normalization_rules": [ # Pronoun matches {"field": "form", "match": r"\bje\b", "match_mode": "regex", "add_tags": ["first-person", "singular"]}, {"field": "form", "match": r"\bj[’']", "match_mode": "regex", "add_tags": ["first-person", "singular"]}, {"field": "form", "match": r"\btu\b", "match_mode": "regex", "add_tags": ["second-person", "singular"]}, {"field": "form", "match": r"\b(il|elle|on|il/elle/on)\b", "match_mode": "regex", "add_tags": ["third-person", "singular"]}, {"field": "form", "match": r"\[il/ɛl/ɔ̃\]", "match_mode": "regex", "add_tags": ["third-person", "singular"]}, {"field": "form", "match": r"\bnous\b", "match_mode": "regex", "add_tags": ["first-person", "plural"]}, {"field": "form", "match": r"\bvous\b", "match_mode": "regex", "add_tags": ["second-person", "plural"]}, {"field": "form", "match": r"\b(ils|elles|ils/elles)\b", "match_mode": "regex", "add_tags": ["third-person", "plural"]}, {"field": "form", "match": r"\[il/ɛl\]", "match_mode": "regex", "add_tags": ["third-person", "plural"]}, # Suffix Heuristics {"field": "form", "match": r"ons$", "match_mode": "regex", "add_tags": ["first-person", "plural"]}, {"field": "form", "match": r"ez$", "match_mode": "regex", "add_tags": ["second-person", "plural"]} ], "properties": [ { "name": "auxiliary", "multivalue": True, "default": ["avoir"], "rules": [ {"value": "être", "criteria": {"raw_tags": ["auxiliary être"]}}, {"value": "avoir", "criteria": {"raw_tags": ["auxiliary avoir"]}}, {"value": "être", "criteria": {"tags": ["auxiliary-être"]}}, {"value": "avoir", "criteria": {"tags": ["auxiliary-avoir"]}} ] }, { "name": "group", "default": "unknown", "rules": [ {"value": "1st-group", "criteria": {"raw_tags": ["1ᵉʳ groupe"]}}, {"value": "2nd-group", "criteria": {"raw_tags": ["2ᵉ groupe"]}}, {"value": "3rd-group", "criteria": {"raw_tags": ["3ᵉ groupe"]}}, {"value": "1st-group", "criteria": {"form_regex": "er$"}}, {"value": "2nd-group", "criteria": {"form_regex": "ir$"}}, {"value": "3rd-group", "criteria": {"form_regex": "(re|oir)$"}} ] } ], "schema": { "infinitive": { "type": "single", "criteria": {"tags": ["infinitive", "present"]} }, "participle_present": { "type": "single", "optional": True, "criteria": {"tags": ["participle", "present"]} }, "participle_past": { "type": "single", "optional": True, "criteria": {"tags": ["participle", "past"], "exclude_tags": ["multiword-construction"]} }, # All lists are now marked optional to handle defective verbs (like 'traire') and sparse data "indicative_present": { "type": "list", "size": 6, "optional": True, "base_criteria": {"tags": ["indicative", "present"]}, "indices": [ {"index": 0, "tags": ["first-person", "singular"]}, {"index": 1, "tags": ["second-person", "singular"]}, {"index": 2, "tags": ["third-person", "singular"]}, {"index": 3, "tags": ["first-person", "plural"]}, {"index": 4, "tags": ["second-person", "plural"]}, {"index": 5, "tags": ["third-person", "plural"]} ] }, "indicative_imperfect": { "type": "list", "size": 6, "optional": True, "base_criteria": {"tags": ["indicative", "imperfect"]}, "indices": [ {"index": 0, "tags": ["first-person", "singular"]}, {"index": 1, "tags": ["second-person", "singular"]}, {"index": 2, "tags": ["third-person", "singular"]}, {"index": 3, "tags": ["first-person", "plural"]}, {"index": 4, "tags": ["second-person", "plural"]}, {"index": 5, "tags": ["third-person", "plural"]} ] }, "indicative_future": { "type": "list", "size": 6, "optional": True, "base_criteria": {"tags": ["indicative", "future"], "exclude_tags": ["perfect"]}, "indices": [ {"index": 0, "tags": ["first-person", "singular"]}, {"index": 1, "tags": ["second-person", "singular"]}, {"index": 2, "tags": ["third-person", "singular"]}, {"index": 3, "tags": ["first-person", "plural"]}, {"index": 4, "tags": ["second-person", "plural"]}, {"index": 5, "tags": ["third-person", "plural"]} ] }, "indicative_simple_past": { "type": "list", "size": 6, "optional": True, # Traire/clore do not have this "base_criteria": {"tags": ["indicative", "past"], "exclude_tags": ["multiword-construction", "imperfect", "perfect", "anterior"]}, "indices": [ {"index": 0, "tags": ["first-person", "singular"]}, {"index": 1, "tags": ["second-person", "singular"]}, {"index": 2, "tags": ["third-person", "singular"]}, {"index": 3, "tags": ["first-person", "plural"]}, {"index": 4, "tags": ["second-person", "plural"]}, {"index": 5, "tags": ["third-person", "plural"]} ] }, "subjunctive_present": { "type": "list", "size": 6, "optional": True, "base_criteria": {"tags": ["subjunctive", "present"]}, "indices": [ {"index": 0, "tags": ["first-person", "singular"]}, {"index": 1, "tags": ["second-person", "singular"]}, {"index": 2, "tags": ["third-person", "singular"]}, {"index": 3, "tags": ["first-person", "plural"]}, {"index": 4, "tags": ["second-person", "plural"]}, {"index": 5, "tags": ["third-person", "plural"]} ] }, "conditional_present": { "type": "list", "size": 6, "optional": True, "base_criteria": {"tags": ["conditional", "present"]}, "indices": [ {"index": 0, "tags": ["first-person", "singular"]}, {"index": 1, "tags": ["second-person", "singular"]}, {"index": 2, "tags": ["third-person", "singular"]}, {"index": 3, "tags": ["first-person", "plural"]}, {"index": 4, "tags": ["second-person", "plural"]}, {"index": 5, "tags": ["third-person", "plural"]} ] }, "imperative": { "type": "list", "size": 3, "optional": True, "base_criteria": {"tags": ["imperative", "present"]}, "indices": [ {"index": 0, "tags": ["singular"]}, {"index": 1, "tags": ["plural", "first-person"]}, {"index": 2, "tags": ["plural", "second-person"]}, {"index": 1, "criteria": {"form_regex": r"ons$"}}, {"index": 2, "criteria": {"form_regex": r"ez$"}}, {"index": 0, "criteria": {"form_regex": r"[es]$"}} ] } } } OLD_FRENCH_VERB_CONFIG = { "skip_normalization_if_source": False, "validate_completeness": True, # --- 1. Normalization --- "clean_prefixes": [ "qu'", "qu’", "que", "j'", "j’", "je", "tu", "il/elle/on", "il", "elle", "on", "nous", "vous", "ils/elles", "ils", "elles" ], "normalization_rules": [ {"field": "form", "match": r"\bje\b", "match_mode": "regex", "add_tags": ["first-person", "singular"]}, {"field": "form", "match": r"\bj[’']", "match_mode": "regex", "add_tags": ["first-person", "singular"]}, {"field": "form", "match": r"\btu\b", "match_mode": "regex", "add_tags": ["second-person", "singular"]}, {"field": "form", "match": r"\b(il|elle|on|il/elle/on)\b", "match_mode": "regex", "add_tags": ["third-person", "singular"]}, {"field": "form", "match": r"\[il/ɛl/ɔ̃\]", "match_mode": "regex", "add_tags": ["third-person", "singular"]}, {"field": "form", "match": r"\bnous\b", "match_mode": "regex", "add_tags": ["first-person", "plural"]}, {"field": "form", "match": r"\bvous\b", "match_mode": "regex", "add_tags": ["second-person", "plural"]}, {"field": "form", "match": r"\b(ils|elles|ils/elles)\b", "match_mode": "regex", "add_tags": ["third-person", "plural"]}, {"field": "form", "match": r"\[il/ɛl\]", "match_mode": "regex", "add_tags": ["third-person", "plural"]}, ], # --- 2. Properties --- "properties": [ { "name": "auxiliary", "multivalue": True, "default": ["avoir"], "rules": [ {"value": "être", "criteria": {"raw_tags": ["auxiliary être"]}}, {"value": "avoir", "criteria": {"raw_tags": ["auxiliary avoir"]}}, {"value": "être", "criteria": {"tags": ["auxiliary-être"]}}, {"value": "avoir", "criteria": {"tags": ["auxiliary-avoir"]}} ] }, { "name": "group", "default": "unknown", "rules": [ {"value": "1st-group", "criteria": {"raw_tags": ["1ᵉʳ groupe"]}}, {"value": "2nd-group", "criteria": {"raw_tags": ["2ᵉ groupe"]}}, {"value": "3rd-group", "criteria": {"raw_tags": ["3ᵉ groupe"]}}, {"value": "1st-group", "criteria": {"form_regex": "er$"}}, {"value": "2nd-group", "criteria": {"form_regex": "ir$"}}, {"value": "3rd-group", "criteria": {"form_regex": "(re|oir)$"}} ] } ], # --- 3. Schema --- "schema": { "infinitive": { "type": "single", "criteria": {"tags": ["infinitive", "present"]} }, "participle_present": { "type": "single", "optional": True, # <--- NEW: Allows missing participle "criteria": {"tags": ["participle", "present"]} }, "participle_past": { "type": "single", "optional": True, # <--- Often missing in defective verbs "criteria": {"tags": ["participle", "past"], "exclude_tags": ["multiword-construction"]} }, "indicative_present": { "type": "list", "size": 6, "base_criteria": {"tags": ["indicative", "present"]}, "indices": [ {"index": 0, "tags": ["first-person", "singular"]}, {"index": 1, "tags": ["second-person", "singular"]}, {"index": 2, "tags": ["third-person", "singular"]}, {"index": 3, "tags": ["first-person", "plural"]}, {"index": 4, "tags": ["second-person", "plural"]}, {"index": 5, "tags": ["third-person", "plural"]} ] }, "indicative_imperfect": { "type": "list", "size": 6, "base_criteria": {"tags": ["indicative", "imperfect"]}, "indices": [ {"index": 0, "tags": ["first-person", "singular"]}, {"index": 1, "tags": ["second-person", "singular"]}, {"index": 2, "tags": ["third-person", "singular"]}, {"index": 3, "tags": ["first-person", "plural"]}, {"index": 4, "tags": ["second-person", "plural"]}, {"index": 5, "tags": ["third-person", "plural"]} ] }, "indicative_future": { "type": "list", "size": 6, "base_criteria": {"tags": ["indicative", "future"], "exclude_tags": ["perfect"]}, "indices": [ {"index": 0, "tags": ["first-person", "singular"]}, {"index": 1, "tags": ["second-person", "singular"]}, {"index": 2, "tags": ["third-person", "singular"]}, {"index": 3, "tags": ["first-person", "plural"]}, {"index": 4, "tags": ["second-person", "plural"]}, {"index": 5, "tags": ["third-person", "plural"]} ] }, "indicative_simple_past": { "type": "list", "size": 6, "base_criteria": {"tags": ["indicative", "past"], "exclude_tags": ["multiword-construction", "imperfect", "perfect", "anterior"]}, "indices": [ {"index": 0, "tags": ["first-person", "singular"]}, {"index": 1, "tags": ["second-person", "singular"]}, {"index": 2, "tags": ["third-person", "singular"]}, {"index": 3, "tags": ["first-person", "plural"]}, {"index": 4, "tags": ["second-person", "plural"]}, {"index": 5, "tags": ["third-person", "plural"]} ] }, "subjunctive_present": { "type": "list", "size": 6, "base_criteria": {"tags": ["subjunctive", "present"]}, "indices": [ {"index": 0, "tags": ["first-person", "singular"]}, {"index": 1, "tags": ["second-person", "singular"]}, {"index": 2, "tags": ["third-person", "singular"]}, {"index": 3, "tags": ["first-person", "plural"]}, {"index": 4, "tags": ["second-person", "plural"]}, {"index": 5, "tags": ["third-person", "plural"]} ] }, "conditional_present": { "type": "list", "size": 6, "base_criteria": {"tags": ["conditional", "present"]}, "indices": [ {"index": 0, "tags": ["first-person", "singular"]}, {"index": 1, "tags": ["second-person", "singular"]}, {"index": 2, "tags": ["third-person", "singular"]}, {"index": 3, "tags": ["first-person", "plural"]}, {"index": 4, "tags": ["second-person", "plural"]}, {"index": 5, "tags": ["third-person", "plural"]} ] }, "imperative": { "type": "list", "size": 3, "optional": True, # <--- Often missing for phrases/defective verbs "base_criteria": {"tags": ["imperative", "present"]}, "indices": [ {"index": 0, "tags": ["singular"]}, {"index": 1, "tags": ["plural", "first-person"]}, {"index": 2, "tags": ["plural", "second-person"]} ] } } }