Migrate to gitea

This commit is contained in:
jonasgaudian
2026-02-13 00:10:40 +01:00
commit 6d06a9e14e
38 changed files with 31427 additions and 0 deletions

View File

@@ -0,0 +1,264 @@
#!/usr/bin/env python3
"""
Test Suite for Wiktionary Transformer
======================================
Comprehensive tests for the transform_wiktionary.py module.
"""
import json
import sys
import pathlib
from typing import Dict, Any
# Add parent directory to path for imports
sys.path.append(str(pathlib.Path(__file__).parent.parent))
from tests.test_framework import TestFramework, SchemaValidator, TestDataLoader
from scripts.transform_wiktionary import WiktionaryTransformer
class TestWiktionaryTransformer(TestFramework):
"""Test suite for WiktionaryTransformer class."""
def __init__(self):
super().__init__()
self.transformer = WiktionaryTransformer(validate=True)
def test_required_fields(self):
"""Test that required fields are properly handled."""
print("Testing required fields...")
# Test with all required fields
valid_entry = {
"word": "test",
"lang_code": "en",
"pos": "noun",
"senses": [{"glosses": ["a test word"]}]
}
try:
result = self.transformer.transform_entry(valid_entry)
self.assert_true("word" in result, "Word field should be present")
self.assert_true("pos" in result, "POS field should be present")
self.assert_true("senses" in result, "Senses field should be present")
except Exception as e:
self.assert_false(True, f"Should not raise exception: {e}")
# Test with missing required field
invalid_entry = {
"word": "test",
"lang_code": "en",
"pos": "noun"
# Missing "senses"
}
try:
result = self.transformer.transform_entry(invalid_entry)
self.assert_false(True, "Should raise exception for missing required field")
except ValueError:
self.assert_true(True, "Should raise ValueError for missing required field")
def test_phonetics_extraction(self):
"""Test phonetics extraction and normalization."""
print("Testing phonetics extraction...")
entry_with_phonetics = {
"word": "test",
"lang_code": "en",
"pos": "noun",
"senses": [{"glosses": ["test"]}],
"sounds": [
{"ipa": "/tɛst/", "audio": "test.ogg"},
{"ipa": "/ˈtɛst/", "homophone": "test"}
]
}
result = self.transformer.transform_entry(entry_with_phonetics)
self.assert_true("phonetics" in result, "Phonetics should be extracted")
self.assert_true("ipa" in result["phonetics"], "IPA should be present")
self.assert_equal(len(result["phonetics"]["ipa"]), 2, "Should have 2 IPA entries")
self.assert_true("homophones" in result["phonetics"], "Homophones should be present")
def test_hyphenation_extraction(self):
"""Test hyphenation extraction."""
print("Testing hyphenation extraction...")
entry_with_hyphenation = {
"word": "hyphenation",
"lang_code": "en",
"pos": "noun",
"senses": [{"glosses": ["test"]}],
"hyphenation": "hy-phen-a-tion"
}
result = self.transformer.transform_entry(entry_with_hyphenation)
self.assert_true("hyphenation" in result, "Hyphenation should be extracted")
self.assert_is_instance(result["hyphenation"], list, "Hyphenation should be a list")
self.assert_equal(len(result["hyphenation"]), 4, "Should have 4 parts")
def test_grammatical_features_extraction(self):
"""Test grammatical features extraction."""
print("Testing grammatical features extraction...")
entry_with_tags = {
"word": "test",
"lang_code": "de",
"pos": "noun",
"senses": [{"glosses": ["test"]}],
"tags": ["masculine", "singular"]
}
result = self.transformer.transform_entry(entry_with_tags)
self.assert_true("grammatical_features" in result, "Grammatical features should be extracted")
self.assert_true("gender" in result["grammatical_features"], "Gender should be present")
self.assert_equal(result["grammatical_features"]["gender"], "masculine", "Gender should be masculine")
self.assert_true("number" in result["grammatical_features"], "Number should be present")
self.assert_equal(result["grammatical_features"]["number"], "singular", "Number should be singular")
def test_etymology_extraction(self):
"""Test etymology extraction."""
print("Testing etymology extraction...")
entry_with_etymology = {
"word": "test",
"lang_code": "en",
"pos": "noun",
"senses": [{"glosses": ["test"]}],
"etymology_text": "From Latin testum",
"etymology_number": 1
}
result = self.transformer.transform_entry(entry_with_etymology)
self.assert_true("etymology" in result, "Etymology should be extracted")
self.assert_true("text" in result["etymology"], "Etymology text should be present")
self.assert_true("number" in result["etymology"], "Etymology number should be present")
def test_relations_extraction(self):
"""Test relations extraction."""
print("Testing relations extraction...")
entry_with_relations = {
"word": "test",
"lang_code": "en",
"pos": "noun",
"senses": [{"glosses": ["test"]}],
"synonyms": [{"word": "exam"}],
"antonyms": [{"word": "ignore"}],
"related": ["examination", "quiz"]
}
result = self.transformer.transform_entry(entry_with_relations)
self.assert_true("relations" in result, "Relations should be extracted")
self.assert_true("synonyms" in result["relations"], "Synonyms should be present")
self.assert_true("antonyms" in result["relations"], "Antonyms should be present")
self.assert_true("related" in result["relations"], "Related terms should be present")
def test_schema_validation(self):
"""Test schema validation."""
print("Testing schema validation...")
# Test valid entry
valid_entry = {
"word": "test",
"lang_code": "en",
"pos": "noun",
"senses": [{"glosses": ["a test word"]}]
}
result = self.transformer.transform_entry(valid_entry)
self.assert_true(SchemaValidator.validate_universal_schema(result), "Valid entry should pass schema validation")
# Test entry with missing required field
invalid_entry = {
"word": "test",
"lang_code": "en",
"pos": "noun"
# Missing senses
}
try:
result = self.transformer.transform_entry(invalid_entry)
self.assert_false(True, "Should raise exception for invalid schema")
except ValueError:
self.assert_true(True, "Should raise ValueError for invalid schema")
def test_real_world_data(self):
"""Test with real sample data."""
print("Testing with real sample data...")
try:
# Load German sample data
german_data = TestDataLoader.load_sample_data("laufen")
# Add required fields if missing
german_data["lang_code"] = "de"
german_data["senses"] = [{"glosses": ["to run", "to walk"]}]
result = self.transformer.transform_entry(german_data)
self.assert_true(SchemaValidator.validate_universal_schema(result), "Real data should pass schema validation")
self.assert_equal(result["word"], "laufen", "Word should be preserved")
self.assert_equal(result["pos"], "verb", "POS should be preserved")
self.assert_true("forms" in result, "Forms should be preserved")
except FileNotFoundError:
self.assert_true(True, "Sample data not available, skipping real data test")
def test_error_handling(self):
"""Test error handling."""
print("Testing error handling...")
# Test with invalid JSON
try:
invalid_json = "not valid json"
self.transformer.transform_entry(json.loads(invalid_json))
self.assert_false(True, "Should raise JSON decode error")
except json.JSONDecodeError:
self.assert_true(True, "Should handle JSON decode errors gracefully")
# Test with missing required field
try:
incomplete_entry = {
"word": "test",
"lang_code": "en"
# Missing pos and senses
}
self.transformer.transform_entry(incomplete_entry)
self.assert_false(True, "Should raise ValueError for missing required fields")
except ValueError as e:
self.assert_true("Missing required field" in str(e), "Should provide descriptive error message")
def run_all_tests(self):
"""Run all tests in this suite."""
print("\n" + "="*60)
print("WIKTIONARY TRANSFORMER TEST SUITE")
print("="*60)
self.test_required_fields()
self.test_phonetics_extraction()
self.test_hyphenation_extraction()
self.test_grammatical_features_extraction()
self.test_etymology_extraction()
self.test_relations_extraction()
self.test_schema_validation()
self.test_real_world_data()
self.test_error_handling()
success = self.print_summary()
self.cleanup()
return success
if __name__ == "__main__":
test_suite = TestWiktionaryTransformer()
success = test_suite.run_all_tests()
if success:
print("\n[SUCCESS] All tests passed!")
sys.exit(0)
else:
print("\n[FAILED] Some tests failed!")
sys.exit(1)