Migrate to gitea
This commit is contained in:
264
tests/test_transform_wiktionary.py
Normal file
264
tests/test_transform_wiktionary.py
Normal file
@@ -0,0 +1,264 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test Suite for Wiktionary Transformer
|
||||
======================================
|
||||
Comprehensive tests for the transform_wiktionary.py module.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import pathlib
|
||||
from typing import Dict, Any
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.append(str(pathlib.Path(__file__).parent.parent))
|
||||
|
||||
from tests.test_framework import TestFramework, SchemaValidator, TestDataLoader
|
||||
from scripts.transform_wiktionary import WiktionaryTransformer
|
||||
|
||||
class TestWiktionaryTransformer(TestFramework):
|
||||
"""Test suite for WiktionaryTransformer class."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.transformer = WiktionaryTransformer(validate=True)
|
||||
|
||||
def test_required_fields(self):
|
||||
"""Test that required fields are properly handled."""
|
||||
print("Testing required fields...")
|
||||
|
||||
# Test with all required fields
|
||||
valid_entry = {
|
||||
"word": "test",
|
||||
"lang_code": "en",
|
||||
"pos": "noun",
|
||||
"senses": [{"glosses": ["a test word"]}]
|
||||
}
|
||||
|
||||
try:
|
||||
result = self.transformer.transform_entry(valid_entry)
|
||||
self.assert_true("word" in result, "Word field should be present")
|
||||
self.assert_true("pos" in result, "POS field should be present")
|
||||
self.assert_true("senses" in result, "Senses field should be present")
|
||||
except Exception as e:
|
||||
self.assert_false(True, f"Should not raise exception: {e}")
|
||||
|
||||
# Test with missing required field
|
||||
invalid_entry = {
|
||||
"word": "test",
|
||||
"lang_code": "en",
|
||||
"pos": "noun"
|
||||
# Missing "senses"
|
||||
}
|
||||
|
||||
try:
|
||||
result = self.transformer.transform_entry(invalid_entry)
|
||||
self.assert_false(True, "Should raise exception for missing required field")
|
||||
except ValueError:
|
||||
self.assert_true(True, "Should raise ValueError for missing required field")
|
||||
|
||||
def test_phonetics_extraction(self):
|
||||
"""Test phonetics extraction and normalization."""
|
||||
print("Testing phonetics extraction...")
|
||||
|
||||
entry_with_phonetics = {
|
||||
"word": "test",
|
||||
"lang_code": "en",
|
||||
"pos": "noun",
|
||||
"senses": [{"glosses": ["test"]}],
|
||||
"sounds": [
|
||||
{"ipa": "/tɛst/", "audio": "test.ogg"},
|
||||
{"ipa": "/ˈtɛst/", "homophone": "test"}
|
||||
]
|
||||
}
|
||||
|
||||
result = self.transformer.transform_entry(entry_with_phonetics)
|
||||
|
||||
self.assert_true("phonetics" in result, "Phonetics should be extracted")
|
||||
self.assert_true("ipa" in result["phonetics"], "IPA should be present")
|
||||
self.assert_equal(len(result["phonetics"]["ipa"]), 2, "Should have 2 IPA entries")
|
||||
self.assert_true("homophones" in result["phonetics"], "Homophones should be present")
|
||||
|
||||
def test_hyphenation_extraction(self):
|
||||
"""Test hyphenation extraction."""
|
||||
print("Testing hyphenation extraction...")
|
||||
|
||||
entry_with_hyphenation = {
|
||||
"word": "hyphenation",
|
||||
"lang_code": "en",
|
||||
"pos": "noun",
|
||||
"senses": [{"glosses": ["test"]}],
|
||||
"hyphenation": "hy-phen-a-tion"
|
||||
}
|
||||
|
||||
result = self.transformer.transform_entry(entry_with_hyphenation)
|
||||
|
||||
self.assert_true("hyphenation" in result, "Hyphenation should be extracted")
|
||||
self.assert_is_instance(result["hyphenation"], list, "Hyphenation should be a list")
|
||||
self.assert_equal(len(result["hyphenation"]), 4, "Should have 4 parts")
|
||||
|
||||
def test_grammatical_features_extraction(self):
|
||||
"""Test grammatical features extraction."""
|
||||
print("Testing grammatical features extraction...")
|
||||
|
||||
entry_with_tags = {
|
||||
"word": "test",
|
||||
"lang_code": "de",
|
||||
"pos": "noun",
|
||||
"senses": [{"glosses": ["test"]}],
|
||||
"tags": ["masculine", "singular"]
|
||||
}
|
||||
|
||||
result = self.transformer.transform_entry(entry_with_tags)
|
||||
|
||||
self.assert_true("grammatical_features" in result, "Grammatical features should be extracted")
|
||||
self.assert_true("gender" in result["grammatical_features"], "Gender should be present")
|
||||
self.assert_equal(result["grammatical_features"]["gender"], "masculine", "Gender should be masculine")
|
||||
self.assert_true("number" in result["grammatical_features"], "Number should be present")
|
||||
self.assert_equal(result["grammatical_features"]["number"], "singular", "Number should be singular")
|
||||
|
||||
def test_etymology_extraction(self):
|
||||
"""Test etymology extraction."""
|
||||
print("Testing etymology extraction...")
|
||||
|
||||
entry_with_etymology = {
|
||||
"word": "test",
|
||||
"lang_code": "en",
|
||||
"pos": "noun",
|
||||
"senses": [{"glosses": ["test"]}],
|
||||
"etymology_text": "From Latin testum",
|
||||
"etymology_number": 1
|
||||
}
|
||||
|
||||
result = self.transformer.transform_entry(entry_with_etymology)
|
||||
|
||||
self.assert_true("etymology" in result, "Etymology should be extracted")
|
||||
self.assert_true("text" in result["etymology"], "Etymology text should be present")
|
||||
self.assert_true("number" in result["etymology"], "Etymology number should be present")
|
||||
|
||||
def test_relations_extraction(self):
|
||||
"""Test relations extraction."""
|
||||
print("Testing relations extraction...")
|
||||
|
||||
entry_with_relations = {
|
||||
"word": "test",
|
||||
"lang_code": "en",
|
||||
"pos": "noun",
|
||||
"senses": [{"glosses": ["test"]}],
|
||||
"synonyms": [{"word": "exam"}],
|
||||
"antonyms": [{"word": "ignore"}],
|
||||
"related": ["examination", "quiz"]
|
||||
}
|
||||
|
||||
result = self.transformer.transform_entry(entry_with_relations)
|
||||
|
||||
self.assert_true("relations" in result, "Relations should be extracted")
|
||||
self.assert_true("synonyms" in result["relations"], "Synonyms should be present")
|
||||
self.assert_true("antonyms" in result["relations"], "Antonyms should be present")
|
||||
self.assert_true("related" in result["relations"], "Related terms should be present")
|
||||
|
||||
def test_schema_validation(self):
|
||||
"""Test schema validation."""
|
||||
print("Testing schema validation...")
|
||||
|
||||
# Test valid entry
|
||||
valid_entry = {
|
||||
"word": "test",
|
||||
"lang_code": "en",
|
||||
"pos": "noun",
|
||||
"senses": [{"glosses": ["a test word"]}]
|
||||
}
|
||||
|
||||
result = self.transformer.transform_entry(valid_entry)
|
||||
self.assert_true(SchemaValidator.validate_universal_schema(result), "Valid entry should pass schema validation")
|
||||
|
||||
# Test entry with missing required field
|
||||
invalid_entry = {
|
||||
"word": "test",
|
||||
"lang_code": "en",
|
||||
"pos": "noun"
|
||||
# Missing senses
|
||||
}
|
||||
|
||||
try:
|
||||
result = self.transformer.transform_entry(invalid_entry)
|
||||
self.assert_false(True, "Should raise exception for invalid schema")
|
||||
except ValueError:
|
||||
self.assert_true(True, "Should raise ValueError for invalid schema")
|
||||
|
||||
def test_real_world_data(self):
|
||||
"""Test with real sample data."""
|
||||
print("Testing with real sample data...")
|
||||
|
||||
try:
|
||||
# Load German sample data
|
||||
german_data = TestDataLoader.load_sample_data("laufen")
|
||||
|
||||
# Add required fields if missing
|
||||
german_data["lang_code"] = "de"
|
||||
german_data["senses"] = [{"glosses": ["to run", "to walk"]}]
|
||||
|
||||
result = self.transformer.transform_entry(german_data)
|
||||
|
||||
self.assert_true(SchemaValidator.validate_universal_schema(result), "Real data should pass schema validation")
|
||||
self.assert_equal(result["word"], "laufen", "Word should be preserved")
|
||||
self.assert_equal(result["pos"], "verb", "POS should be preserved")
|
||||
self.assert_true("forms" in result, "Forms should be preserved")
|
||||
|
||||
except FileNotFoundError:
|
||||
self.assert_true(True, "Sample data not available, skipping real data test")
|
||||
|
||||
def test_error_handling(self):
|
||||
"""Test error handling."""
|
||||
print("Testing error handling...")
|
||||
|
||||
# Test with invalid JSON
|
||||
try:
|
||||
invalid_json = "not valid json"
|
||||
self.transformer.transform_entry(json.loads(invalid_json))
|
||||
self.assert_false(True, "Should raise JSON decode error")
|
||||
except json.JSONDecodeError:
|
||||
self.assert_true(True, "Should handle JSON decode errors gracefully")
|
||||
|
||||
# Test with missing required field
|
||||
try:
|
||||
incomplete_entry = {
|
||||
"word": "test",
|
||||
"lang_code": "en"
|
||||
# Missing pos and senses
|
||||
}
|
||||
self.transformer.transform_entry(incomplete_entry)
|
||||
self.assert_false(True, "Should raise ValueError for missing required fields")
|
||||
except ValueError as e:
|
||||
self.assert_true("Missing required field" in str(e), "Should provide descriptive error message")
|
||||
|
||||
def run_all_tests(self):
|
||||
"""Run all tests in this suite."""
|
||||
print("\n" + "="*60)
|
||||
print("WIKTIONARY TRANSFORMER TEST SUITE")
|
||||
print("="*60)
|
||||
|
||||
self.test_required_fields()
|
||||
self.test_phonetics_extraction()
|
||||
self.test_hyphenation_extraction()
|
||||
self.test_grammatical_features_extraction()
|
||||
self.test_etymology_extraction()
|
||||
self.test_relations_extraction()
|
||||
self.test_schema_validation()
|
||||
self.test_real_world_data()
|
||||
self.test_error_handling()
|
||||
|
||||
success = self.print_summary()
|
||||
self.cleanup()
|
||||
return success
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_suite = TestWiktionaryTransformer()
|
||||
success = test_suite.run_all_tests()
|
||||
|
||||
if success:
|
||||
print("\n[SUCCESS] All tests passed!")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print("\n[FAILED] Some tests failed!")
|
||||
sys.exit(1)
|
||||
Reference in New Issue
Block a user