#!/usr/bin/env python3 """ Test Suite for Wiktionary Transformer ====================================== Comprehensive tests for the transform_wiktionary.py module. """ import json import sys import pathlib from typing import Dict, Any # Add parent directory to path for imports sys.path.append(str(pathlib.Path(__file__).parent.parent)) from tests.test_framework import TestFramework, SchemaValidator, TestDataLoader from scripts.transform_wiktionary import WiktionaryTransformer class TestWiktionaryTransformer(TestFramework): """Test suite for WiktionaryTransformer class.""" def __init__(self): super().__init__() self.transformer = WiktionaryTransformer(validate=True) def test_required_fields(self): """Test that required fields are properly handled.""" print("Testing required fields...") # Test with all required fields valid_entry = { "word": "test", "lang_code": "en", "pos": "noun", "senses": [{"glosses": ["a test word"]}] } try: result = self.transformer.transform_entry(valid_entry) self.assert_true("word" in result, "Word field should be present") self.assert_true("pos" in result, "POS field should be present") self.assert_true("senses" in result, "Senses field should be present") except Exception as e: self.assert_false(True, f"Should not raise exception: {e}") # Test with missing required field invalid_entry = { "word": "test", "lang_code": "en", "pos": "noun" # Missing "senses" } try: result = self.transformer.transform_entry(invalid_entry) self.assert_false(True, "Should raise exception for missing required field") except ValueError: self.assert_true(True, "Should raise ValueError for missing required field") def test_phonetics_extraction(self): """Test phonetics extraction and normalization.""" print("Testing phonetics extraction...") entry_with_phonetics = { "word": "test", "lang_code": "en", "pos": "noun", "senses": [{"glosses": ["test"]}], "sounds": [ {"ipa": "/tɛst/", "audio": "test.ogg"}, {"ipa": "/ˈtɛst/", "homophone": "test"} ] } result = self.transformer.transform_entry(entry_with_phonetics) self.assert_true("phonetics" in result, "Phonetics should be extracted") self.assert_true("ipa" in result["phonetics"], "IPA should be present") self.assert_equal(len(result["phonetics"]["ipa"]), 2, "Should have 2 IPA entries") self.assert_true("homophones" in result["phonetics"], "Homophones should be present") def test_hyphenation_extraction(self): """Test hyphenation extraction.""" print("Testing hyphenation extraction...") entry_with_hyphenation = { "word": "hyphenation", "lang_code": "en", "pos": "noun", "senses": [{"glosses": ["test"]}], "hyphenation": "hy-phen-a-tion" } result = self.transformer.transform_entry(entry_with_hyphenation) self.assert_true("hyphenation" in result, "Hyphenation should be extracted") self.assert_is_instance(result["hyphenation"], list, "Hyphenation should be a list") self.assert_equal(len(result["hyphenation"]), 4, "Should have 4 parts") def test_grammatical_features_extraction(self): """Test grammatical features extraction.""" print("Testing grammatical features extraction...") entry_with_tags = { "word": "test", "lang_code": "de", "pos": "noun", "senses": [{"glosses": ["test"]}], "tags": ["masculine", "singular"] } result = self.transformer.transform_entry(entry_with_tags) self.assert_true("grammatical_features" in result, "Grammatical features should be extracted") self.assert_true("gender" in result["grammatical_features"], "Gender should be present") self.assert_equal(result["grammatical_features"]["gender"], "masculine", "Gender should be masculine") self.assert_true("number" in result["grammatical_features"], "Number should be present") self.assert_equal(result["grammatical_features"]["number"], "singular", "Number should be singular") def test_etymology_extraction(self): """Test etymology extraction.""" print("Testing etymology extraction...") entry_with_etymology = { "word": "test", "lang_code": "en", "pos": "noun", "senses": [{"glosses": ["test"]}], "etymology_text": "From Latin testum", "etymology_number": 1 } result = self.transformer.transform_entry(entry_with_etymology) self.assert_true("etymology" in result, "Etymology should be extracted") self.assert_true("text" in result["etymology"], "Etymology text should be present") self.assert_true("number" in result["etymology"], "Etymology number should be present") def test_relations_extraction(self): """Test relations extraction.""" print("Testing relations extraction...") entry_with_relations = { "word": "test", "lang_code": "en", "pos": "noun", "senses": [{"glosses": ["test"]}], "synonyms": [{"word": "exam"}], "antonyms": [{"word": "ignore"}], "related": ["examination", "quiz"] } result = self.transformer.transform_entry(entry_with_relations) self.assert_true("relations" in result, "Relations should be extracted") self.assert_true("synonyms" in result["relations"], "Synonyms should be present") self.assert_true("antonyms" in result["relations"], "Antonyms should be present") self.assert_true("related" in result["relations"], "Related terms should be present") def test_schema_validation(self): """Test schema validation.""" print("Testing schema validation...") # Test valid entry valid_entry = { "word": "test", "lang_code": "en", "pos": "noun", "senses": [{"glosses": ["a test word"]}] } result = self.transformer.transform_entry(valid_entry) self.assert_true(SchemaValidator.validate_universal_schema(result), "Valid entry should pass schema validation") # Test entry with missing required field invalid_entry = { "word": "test", "lang_code": "en", "pos": "noun" # Missing senses } try: result = self.transformer.transform_entry(invalid_entry) self.assert_false(True, "Should raise exception for invalid schema") except ValueError: self.assert_true(True, "Should raise ValueError for invalid schema") def test_real_world_data(self): """Test with real sample data.""" print("Testing with real sample data...") try: # Load German sample data german_data = TestDataLoader.load_sample_data("laufen") # Add required fields if missing german_data["lang_code"] = "de" german_data["senses"] = [{"glosses": ["to run", "to walk"]}] result = self.transformer.transform_entry(german_data) self.assert_true(SchemaValidator.validate_universal_schema(result), "Real data should pass schema validation") self.assert_equal(result["word"], "laufen", "Word should be preserved") self.assert_equal(result["pos"], "verb", "POS should be preserved") self.assert_true("forms" in result, "Forms should be preserved") except FileNotFoundError: self.assert_true(True, "Sample data not available, skipping real data test") def test_error_handling(self): """Test error handling.""" print("Testing error handling...") # Test with invalid JSON try: invalid_json = "not valid json" self.transformer.transform_entry(json.loads(invalid_json)) self.assert_false(True, "Should raise JSON decode error") except json.JSONDecodeError: self.assert_true(True, "Should handle JSON decode errors gracefully") # Test with missing required field try: incomplete_entry = { "word": "test", "lang_code": "en" # Missing pos and senses } self.transformer.transform_entry(incomplete_entry) self.assert_false(True, "Should raise ValueError for missing required fields") except ValueError as e: self.assert_true("Missing required field" in str(e), "Should provide descriptive error message") def run_all_tests(self): """Run all tests in this suite.""" print("\n" + "="*60) print("WIKTIONARY TRANSFORMER TEST SUITE") print("="*60) self.test_required_fields() self.test_phonetics_extraction() self.test_hyphenation_extraction() self.test_grammatical_features_extraction() self.test_etymology_extraction() self.test_relations_extraction() self.test_schema_validation() self.test_real_world_data() self.test_error_handling() success = self.print_summary() self.cleanup() return success if __name__ == "__main__": test_suite = TestWiktionaryTransformer() success = test_suite.run_all_tests() if success: print("\n[SUCCESS] All tests passed!") sys.exit(0) else: print("\n[FAILED] Some tests failed!") sys.exit(1)