commit 7c17f0f0cffeb18daa056b3cebf845611d223795 Author: jonasgaudian <43753916+jonasgaudian@users.noreply.github.com> Date: Sat Feb 14 18:12:28 2026 +0100 migrate to gitea diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..faeec5a --- /dev/null +++ b/.gitignore @@ -0,0 +1,72 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Virtual environments +venv/ +ENV/ +env/ +.venv + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS generated files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..74e663a --- /dev/null +++ b/__init__.py @@ -0,0 +1,22 @@ +""" +Android XML Translation Tool +A tool for translating Android XML values files using local LLM +""" + +from .config import Config +from .models import TranslationItem, TranslationBatch +from .llm_client import LLMClient +from .xml_processor import XMLProcessor +from .ui import UI +from .translation_tool import TranslationTool + +__version__ = "1.0.0" +__all__ = [ + 'Config', + 'TranslationItem', + 'TranslationBatch', + 'LLMClient', + 'XMLProcessor', + 'UI', + 'TranslationTool' +] diff --git a/config.py b/config.py new file mode 100644 index 0000000..68b37c8 --- /dev/null +++ b/config.py @@ -0,0 +1,73 @@ +""" +Configuration management for Android XML Translation Tool +""" + +import os +import sys +import yaml +from pathlib import Path +from typing import Dict, List, Optional, Any + + +class Config: + """Configuration loader and validator""" + + def __init__(self, config_path: str = "config.yaml"): + self.config_path = config_path + self.data = self._load_config() + if self.data is not None: # Only validate if data was loaded + self._validate_config() + + def _load_config(self) -> Dict[str, Any]: + """Load configuration from YAML file""" + try: + with open(self.config_path, 'r', encoding='utf-8') as f: + return yaml.safe_load(f) + except FileNotFoundError: + print(f"Configuration file {self.config_path} not found!") + sys.exit(1) + except yaml.YAMLError as e: + print(f"Error parsing configuration file: {e}") + sys.exit(1) + + def _validate_config(self): + """Validate required configuration fields""" + if self.data is None: + print(f"Configuration file {self.config_path} is empty or invalid!") + sys.exit(1) + + required_sections = ['llm', 'android', 'translation'] + for section in required_sections: + if section not in self.data: + print(f"Missing required section: {section}") + sys.exit(1) + + @property + def llm_config(self) -> Dict[str, Any]: + return self.data['llm'] + + @property + def android_config(self) -> Dict[str, Any]: + return self.data['android'] + + @property + def translation_config(self) -> Dict[str, Any]: + return self.data['translation'] + + @property + def output_config(self) -> Dict[str, Any]: + return self.data.get('output', {}) + + @property + def examples_config(self) -> Dict[str, Any]: + return self.data.get('examples', {}) + + def has_examples_config(self) -> bool: + """Check if examples configuration is present and valid""" + examples = self.data.get('examples', {}) + return bool( + examples and + examples.get('input_folder') and + examples.get('base_folder') and + examples.get('target_folders') + ) diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..87916ca --- /dev/null +++ b/config.yaml @@ -0,0 +1,62 @@ +# Android XML Translation Tool Configuration + +# LLM Configuration (LM Studio) +llm: + #base_url: "http://169.254.123.98:45612/v1" # LM Studio server URL + base_url: "https://api.deepseek.com/v1" # DeepSeek API base URL (should end in /v1) + api_key: "sk-2f2e6ad638e849ee827feabc0fde0dda" # Your DeepSeek API key + model: "deepseek-chat" # DeepSeek model name + timeout: 30 # Request timeout in seconds + max_retries: 3 # Maximum retry attempts + +# Android Project Configuration +android: + input_folder: "C:/dev/Polly/app/src/main" # Path to Android resources folder + base_values_folder: "res/values" # Base language folder (usually English) + + # Target language folders to translate to + target_folders: + - "res/values-de-rDE" # German (Germany) + - "res/values-pt-rBR" # Portuguese (Brazil) + #- "res/values-es" # Spanish + #- "res/values-fr" # French + + # XML files to translate (relative to values folders) + files_to_translate: + - "strings.xml" + #- "arrays.xml" + #- "intro_strings.xml" + +# Examples Folder Configuration (for .md file translation) +examples: + input_folder: "C:/dev/Polly/app/src/main" # Path to examples folder + base_folder: "assets/hints" # Base folder with .md files (English) + + # Target language folders to translate to (relative to examples/input_folder) + target_folders: + - "assets/hints-de-rDE" # German (Germany) + - "assets/hints-pt-rBR" # Portuguese (Brazil) + + # File extension to translate + file_extension: ".md" + +# Translation Configuration +translation: + batch_size: 3 # Number of strings to translate in one batch + interactive_approval: true # Ask for approval before adding translations + + # Language-specific translation instructions + language_instructions: + "values-de-rDE": "Translate to German (Germany). Use informal 'Du' and do NOT use 'Sie' for 'you'. Keep technical terms in English if commonly used." + "values-pt-rBR": "Translate to Portuguese (Brazil). Use informal Brazilian Portuguese. Try to keep the translation natural and short. Use idioma to translate the word language." + "values-es": "Translate to Spanish. Use informal 'tΓΊ' form. Adapt cultural references for Spanish-speaking audiences." + "values-fr": "Translate to French. Use informal 'tu' form. Keep technical terms in English if commonly used in French tech context." + "assets/hints-de-rDE": "Translate to German (Germany). Use informal 'du' form. This is for hint/help content." + "assets/hints-pt-rBR": "Translate to Portuguese (Brazil). Use informal Brazilian Portuguese. This is for hint/help content." + +# Output Configuration +output: + create_backups: false # Create backup files before modifying + backup_suffix: ".backup" # Suffix for backup files + preserve_formatting: true # Preserve original XML formatting and comments + log_level: "INFO" # DEBUG, INFO, WARNING, ERROR diff --git a/debug_llm.py b/debug_llm.py new file mode 100644 index 0000000..b9c88fc --- /dev/null +++ b/debug_llm.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +""" +Debug script to test LLM connection +""" + +import openai +from config import Config + +def test_llm_connection(): + """Test basic LLM connection""" + config = Config() + + client = openai.OpenAI( + base_url=config.llm_config['base_url'], + api_key=config.llm_config.get('api_key', 'not-needed') + ) + + try: + print(f"Testing connection to {config.llm_config['base_url']}") + print(f"Using model: {config.llm_config['model']}") + + response = client.chat.completions.create( + model=config.llm_config['model'], + messages=[ + {"role": "system", "content": "You are a helpful translator."}, + {"role": "user", "content": "Translate 'Hello' to German. Respond with just the translation."} + ], + temperature=0.3, + timeout=30 + ) + + print(f"Response type: {type(response)}") + print(f"Response: {response}") + + if hasattr(response, 'choices') and response.choices: + choice = response.choices[0] + print(f"Choice type: {type(choice)}") + print(f"Choice: {choice}") + + if hasattr(choice, 'message'): + message = choice.message + print(f"Message type: {type(message)}") + print(f"Message: {message}") + + if hasattr(message, 'content'): + content = message.content + print(f"Content type: {type(content)}") + print(f"Content: {content}") + else: + print("No content attribute in message") + else: + print("No message attribute in choice") + else: + print("No choices in response") + + except Exception as e: + print(f"Error: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_llm_connection() diff --git a/debug_translation.py b/debug_translation.py new file mode 100644 index 0000000..d3f5a20 --- /dev/null +++ b/debug_translation.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +""" +Debug script to test translation with actual prompt +""" + +import openai +from config import Config +from models import TranslationBatch, TranslationItem + +def test_translation(): + """Test translation with actual prompt""" + config = Config() + + client = openai.OpenAI( + base_url=config.llm_config['base_url'], + api_key=config.llm_config.get('api_key', 'not-needed') + ) + + # Create a test batch + items = [ + TranslationItem(name="app_name", value="Test App"), + TranslationItem(name="welcome", value="Welcome to our app!") + ] + + batch = TranslationBatch( + items=items, + target_language="values-de-rDE", + target_file="strings.xml" + ) + + # Build the actual prompt + instruction = "Translate to German (Germany). Use informal 'du' form for user-facing text. Keep technical terms in English if commonly used." + + prompt = f"Translate the following Android strings to {batch.target_language}.\n\n" + prompt += f"Instructions: {instruction}\n\n" + prompt += "Format your response as a JSON array with the same order as input:\n" + prompt += "[\"translation1\", \"translation2\", ...]\n\n" + prompt += "Strings to translate:\n" + + for i, item in enumerate(batch.items): + prompt += f"{i + 1}. {item.value}\n" + + print("=== PROMPT ===") + print(prompt) + print("=== END PROMPT ===\n") + + try: + response = client.chat.completions.create( + model=config.llm_config['model'], + messages=[ + {"role": "system", "content": instruction}, + {"role": "user", "content": prompt} + ], + temperature=0.3, + timeout=30 + ) + + print(f"Response type: {type(response)}") + + if hasattr(response, 'choices') and response.choices: + choice = response.choices[0] + if hasattr(choice, 'message'): + content = choice.message.content + print(f"Content type: {type(content)}") + print(f"Content: {repr(content)}") + + if content is None: + print("CONTENT IS NONE!") + else: + print(f"Content length: {len(content)}") + print(f"Content stripped: '{content.strip()}'") + + except Exception as e: + print(f"Error: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_translation() diff --git a/llm_client.py b/llm_client.py new file mode 100644 index 0000000..f71f7d4 --- /dev/null +++ b/llm_client.py @@ -0,0 +1,144 @@ +""" +OpenAI-compatible LLM client for LM Studio integration +""" + +import json +import openai +from typing import List +from models import TranslationBatch +from config import Config + + +class LLMClient: + """OpenAI-compatible LLM client for LM Studio""" + + def __init__(self, config: Config): + self.config = config + self.client = openai.OpenAI( + base_url=config.llm_config['base_url'], + api_key=config.llm_config.get('api_key', 'not-needed') + ) + self.model = config.llm_config['model'] + self.timeout = config.llm_config.get('timeout', 30) + self.max_retries = config.llm_config.get('max_retries', 3) + + def translate_batch(self, batch: TranslationBatch, instruction: str) -> List[str]: + """Translate a batch of strings""" + # Prepare the prompt + prompt = self._build_translation_prompt(batch, instruction) + + # Build system message with strong emphasis on instructions + system_message = ( + "You are a professional translator. " + "You must follow the translation instructions EXACTLY as provided. " + "Pay special attention to formality level, tone, and any specific requirements mentioned. " + "Your response must be a valid JSON array." + ) + + for attempt in range(self.max_retries): + try: + response = self.client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": system_message}, + {"role": "user", "content": prompt} + ], + temperature=0.3, + timeout=self.timeout + ) + + # Parse the response + if not response or not response.choices: + print(f"Empty response from LLM") + continue + + choice = response.choices[0] + if not hasattr(choice, 'message') or not choice.message: + print(f"Invalid response structure: {response}") + continue + + content = choice.message.content + if content is None: + print(f"Empty content in response: {response}") + continue + + translations = self._parse_translation_response(content) + return translations + + except Exception as e: + print(f"Translation attempt {attempt + 1} failed: {e}") + if attempt == self.max_retries - 1: + print(f"All translation attempts failed for batch") + return [] + + return [] + + def _build_translation_prompt(self, batch: TranslationBatch, instruction: str) -> str: + """Build translation prompt for the batch""" + prompt = "=== TRANSLATION TASK ===\n\n" + prompt += f"CRITICAL INSTRUCTIONS (you MUST follow these):\n{instruction}\n\n" + prompt += "IMPORTANT: Pay special attention to the tone and formality level specified above. " + prompt += "This is crucial for the quality of the translation.\n\n" + + # Check if we have any string arrays in the batch + has_arrays = any(item.item_type == 'string-array' for item in batch.items) + + if has_arrays: + prompt += "Note: Some items are string arrays (multiple items separated by ' | '). " + prompt += "For string arrays, keep the same number of items and use ' | ' as separator.\n\n" + + prompt += "=== CONTENT TO TRANSLATE ===\n" + prompt += f"Target language: {batch.target_language}\n\n" + + for i, item in enumerate(batch.items): + if item.item_type == 'string-array': + prompt += f"{i + 1}. [ARRAY] {item.name}: {item.value}\n" + else: + prompt += f"{i + 1}. {item.value}\n" + + prompt += "\n=== RESPONSE FORMAT ===\n" + prompt += "Return ONLY a JSON array with the translations in the same order:\n" + prompt += '["translation1", "translation2", ...]\n\n' + prompt += "=== REMINDER ===\n" + prompt += f"Remember to follow these instructions: {instruction}\n" + + return prompt + + def _parse_translation_response(self, response: str) -> List[str]: + """Parse translation response from LLM""" + if not response or not response.strip(): + print(f"Empty response to parse: {response}") + return [] + + try: + # Try to parse as JSON array + if response.strip().startswith('['): + return json.loads(response.strip()) + + # Try to extract JSON from response + start = response.find('[') + end = response.rfind(']') + 1 + if start != -1 and end != 0: + return json.loads(response[start:end]) + + # Fallback: split by lines and clean up + lines = response.strip().split('\n') + translations = [] + for line in lines: + line = line.strip() + # Remove numbering and quotes + if line and not line.startswith('[') and not line.startswith(']'): + # Remove numbers, quotes, and extra formatting + clean_line = line + if '. ' in clean_line: + clean_line = clean_line.split('. ', 1)[1] + clean_line = clean_line.strip('"\'') + if clean_line: + translations.append(clean_line) + + return translations + + except Exception as e: + print(f"Error parsing translation response: {e}") + print(f"Raw response: {response}") + return [] diff --git a/main.py b/main.py new file mode 100644 index 0000000..1c86568 --- /dev/null +++ b/main.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +""" +Android XML Translation Tool +Translates Android values XML files using local LLM (LM Studio compatible) +""" + +import sys +from translation_tool import TranslationTool + + +def main(): + """Main entry point""" + if len(sys.argv) > 1: + config_path = sys.argv[1] + else: + config_path = "config.yaml" + + try: + tool = TranslationTool(config_path) + tool.run() + except KeyboardInterrupt: + from ui import UI + ui = UI() + ui.show_interrupted() + except Exception as e: + from ui import UI + ui = UI() + ui.show_error(str(e)) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/md_processor.py b/md_processor.py new file mode 100644 index 0000000..fd32561 --- /dev/null +++ b/md_processor.py @@ -0,0 +1,136 @@ +""" +Markdown file processor for translation +""" + +import os +from typing import Dict, List, Tuple +from dataclasses import dataclass, field + + +@dataclass +class MDTranslationItem: + """Represents a single markdown file translation item""" + filename: str + content: str + relative_path: str = "" # For subdirectories if needed + + +class MDProcessor: + """Markdown file processor for translation""" + + def __init__(self, file_extension: str = ".md"): + self.file_extension = file_extension + + def get_md_files(self, folder_path: str) -> List[str]: + """ + Get all .md files in the specified folder (non-recursive). + Returns a sorted list of filenames. + """ + if not os.path.exists(folder_path): + return [] + + md_files = [] + for item in os.listdir(folder_path): + item_path = os.path.join(folder_path, item) + if os.path.isfile(item_path) and item.endswith(self.file_extension): + md_files.append(item) + + return sorted(md_files) + + def load_md_file(self, file_path: str) -> str: + """Load and return the content of a markdown file""" + try: + with open(file_path, 'r', encoding='utf-8') as f: + return f.read() + except Exception as e: + print(f"Error loading markdown file {file_path}: {e}") + return "" + + def save_md_file(self, content: str, file_path: str): + """Save content to a markdown file""" + try: + # Create parent directory if it doesn't exist + parent_dir = os.path.dirname(file_path) + if parent_dir and not os.path.exists(parent_dir): + os.makedirs(parent_dir, exist_ok=True) + + with open(file_path, 'w', encoding='utf-8') as f: + f.write(content) + except Exception as e: + print(f"Error saving markdown file {file_path}: {e}") + + def extract_content(self, folder_path: str) -> Dict[str, MDTranslationItem]: + """ + Extract content from all .md files in the folder. + Returns a dict mapping filename to MDTranslationItem. + """ + items = {} + md_files = self.get_md_files(folder_path) + + for filename in md_files: + file_path = os.path.join(folder_path, filename) + content = self.load_md_file(file_path) + if content: # Only add if we successfully loaded content + items[filename] = MDTranslationItem( + filename=filename, + content=content + ) + + return items + + def check_asset_counts(self, base_folder: str, target_folders: List[str]) -> Tuple[bool, List[str]]: + """ + Check that all target folders have the same number of .md assets as the base folder. + + Args: + base_folder: Path to the base folder (source of truth) + target_folders: List of paths to target folders + + Returns: + Tuple of (is_valid, list of error messages) + """ + errors = [] + + # Get base folder count + base_files = self.get_md_files(base_folder) + base_count = len(base_files) + + if base_count == 0: + errors.append(f"Base folder {base_folder} contains no .md files") + return False, errors + + # Check each target folder + for target_folder in target_folders: + target_files = self.get_md_files(target_folder) + target_count = len(target_files) + + if target_count != base_count: + # Find missing/extra files + base_set = set(base_files) + target_set = set(target_files) + + missing_in_target = base_set - target_set + extra_in_target = target_set - base_set + + error_msg = f"Asset count mismatch in {target_folder}: expected {base_count}, found {target_count}" + + if missing_in_target: + error_msg += f"\n Missing files: {', '.join(sorted(missing_in_target))}" + if extra_in_target: + error_msg += f"\n Extra files: {', '.join(sorted(extra_in_target))}" + + errors.append(error_msg) + + return len(errors) == 0, errors + + def find_missing_files(self, base_folder: str, target_folder: str) -> List[str]: + """ + Find .md files that exist in base folder but not in target folder. + + Returns: + List of filenames that need to be translated + """ + base_files = set(self.get_md_files(base_folder)) + target_files = set(self.get_md_files(target_folder)) + + return sorted(list(base_files - target_files)) diff --git a/models.py b/models.py new file mode 100644 index 0000000..f874f71 --- /dev/null +++ b/models.py @@ -0,0 +1,24 @@ +""" +Data models for Android XML Translation Tool +""" + +from dataclasses import dataclass, field +from typing import Optional, List + + +@dataclass +class TranslationItem: + """Represents a single translation item (string or string-array)""" + name: str + value: str + comment: Optional[str] = None + item_type: str = "string" # "string" or "string-array" + items: List[str] = field(default_factory=list) # For string-array items + + +@dataclass +class TranslationBatch: + """Represents a batch of translations to process""" + items: list[TranslationItem] + target_language: str + target_file: str diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..875a3b5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +requests>=2.31.0 +pyyaml>=6.0.1 +lxml>=4.9.3 +openai>=1.3.0 +colorama>=0.4.6 +rich>=13.6.0 diff --git a/run_tests.py b/run_tests.py new file mode 100644 index 0000000..4fb6b1f --- /dev/null +++ b/run_tests.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +""" +Test runner for Android XML Translation Tool +""" + +import unittest +import sys +import os + +# Add the project root to Python path +project_root = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, project_root) + +def run_tests(): + """Discover and run all tests""" + # Discover tests in the tests directory + loader = unittest.TestLoader() + start_dir = os.path.join(project_root, 'tests') + suite = loader.discover(start_dir, pattern='test_*.py') + + # Run the tests + runner = unittest.TextTestRunner(verbosity=2) + result = runner.run(suite) + + # Return exit code based on test results + return 0 if result.wasSuccessful() else 1 + +if __name__ == '__main__': + sys.exit(run_tests()) diff --git a/test_xml_format.py b/test_xml_format.py new file mode 100644 index 0000000..7a2aa46 --- /dev/null +++ b/test_xml_format.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +""" +Test XML formatting +""" + +import tempfile +import os +from lxml import etree +from xml_processor import XMLProcessor +from config import Config + +def test_xml_formatting(): + """Test that XML is properly formatted""" + + # Create a mock config + class MockConfig: + def __init__(self): + self.output_config = {'create_backups': False} + + config = MockConfig() + processor = XMLProcessor(config) + + # Create initial XML + initial_xml = ''' + + Existing value +''' + + # Write to temp file + with tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) as f: + f.write(initial_xml) + temp_path = f.name + + try: + # Load the XML + root = processor.load_xml_file(temp_path) + + # Add new strings + new_strings = [ + ('new_key1', 'New value 1'), + ('new_key2', 'New value 2') + ] + + processor.add_missing_strings(root, new_strings) + + # Save the file + processor.save_xml_file(root, temp_path) + + # Read and display the result + with open(temp_path, 'r', encoding='utf-8') as f: + result = f.read() + + print("=== FORMATTED XML ===") + print(result) + print("=== END XML ===") + + finally: + os.unlink(temp_path) + +if __name__ == "__main__": + test_xml_formatting() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..66a653b --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,3 @@ +""" +Test package for Android XML Translation Tool +""" diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..003bd0f --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,141 @@ +""" +Tests for configuration module +""" + +import unittest +import tempfile +import os +import yaml +from unittest.mock import patch +from config import Config + + +class TestConfig(unittest.TestCase): + """Test cases for Config class""" + + def setUp(self): + """Set up test fixtures""" + self.test_config_data = { + 'llm': { + 'base_url': 'http://localhost:1234', + 'api_key': 'test-key', + 'model': 'test-model' + }, + 'android': { + 'input_folder': 'app/src/main/res', + 'base_values_folder': 'values', + 'target_folders': ['values-de-rDE'], + 'files_to_translate': ['strings.xml'] + }, + 'translation': { + 'batch_size': 5, + 'interactive_approval': True + } + } + + def test_load_valid_config(self): + """Test loading a valid configuration file""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f: + yaml.dump(self.test_config_data, f) + temp_path = f.name + + try: + config = Config(temp_path) + self.assertEqual(config.llm_config['base_url'], 'http://localhost:1234') + self.assertEqual(config.android_config['input_folder'], 'app/src/main/res') + self.assertEqual(config.translation_config['batch_size'], 5) + finally: + os.unlink(temp_path) + + def test_missing_file_error(self): + """Test error handling for missing configuration file""" + with patch('sys.exit') as mock_exit: + Config('nonexistent.yaml') + mock_exit.assert_called_once_with(1) + + def test_missing_required_section(self): + """Test error handling for missing required sections""" + incomplete_data = {'llm': self.test_config_data['llm']} # Missing android and translation + + with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f: + yaml.dump(incomplete_data, f) + temp_path = f.name + + try: + with patch('sys.exit') as mock_exit: + Config(temp_path) + mock_exit.assert_called_with(1) # Remove assert_called_once + finally: + os.unlink(temp_path) + + def test_output_config_default(self): + """Test default output configuration""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f: + yaml.dump(self.test_config_data, f) + temp_path = f.name + + try: + config = Config(temp_path) + output_config = config.output_config + self.assertEqual(output_config, {}) # Should be empty dict when not specified + finally: + os.unlink(temp_path) + + def test_examples_config_missing(self): + """Test examples config when not specified""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f: + yaml.dump(self.test_config_data, f) + temp_path = f.name + + try: + config = Config(temp_path) + self.assertFalse(config.has_examples_config()) + self.assertEqual(config.examples_config, {}) + finally: + os.unlink(temp_path) + + def test_examples_config_present(self): + """Test examples config when specified""" + config_with_examples = self.test_config_data.copy() + config_with_examples['examples'] = { + 'input_folder': 'examples', + 'base_folder': 'assets/hints', + 'target_folders': ['assets/hints-de-rDE'], + 'file_extension': '.md' + } + + with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f: + yaml.dump(config_with_examples, f) + temp_path = f.name + + try: + config = Config(temp_path) + self.assertTrue(config.has_examples_config()) + self.assertEqual(config.examples_config['input_folder'], 'examples') + self.assertEqual(config.examples_config['base_folder'], 'assets/hints') + self.assertEqual(config.examples_config['target_folders'], ['assets/hints-de-rDE']) + self.assertEqual(config.examples_config['file_extension'], '.md') + finally: + os.unlink(temp_path) + + def test_examples_config_incomplete(self): + """Test has_examples_config returns False for incomplete config""" + config_with_examples = self.test_config_data.copy() + config_with_examples['examples'] = { + 'input_folder': 'examples', + # Missing base_folder and target_folders + } + + with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f: + yaml.dump(config_with_examples, f) + temp_path = f.name + + try: + config = Config(temp_path) + self.assertFalse(config.has_examples_config()) + finally: + os.unlink(temp_path) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_md_processor.py b/tests/test_md_processor.py new file mode 100644 index 0000000..9aa682d --- /dev/null +++ b/tests/test_md_processor.py @@ -0,0 +1,272 @@ +""" +Tests for MD processor module +""" + +import unittest +import tempfile +import os +from md_processor import MDProcessor, MDTranslationItem + + +class TestMDProcessor(unittest.TestCase): + """Test cases for MDProcessor class""" + + def setUp(self): + """Set up test fixtures""" + self.processor = MDProcessor() + + def test_get_md_files(self): + """Test getting .md files from a folder""" + with tempfile.TemporaryDirectory() as tmpdir: + # Create test files + open(os.path.join(tmpdir, "file1.md"), 'w').close() + open(os.path.join(tmpdir, "file2.md"), 'w').close() + open(os.path.join(tmpdir, "not_md.txt"), 'w').close() + os.makedirs(os.path.join(tmpdir, "subdir")) + open(os.path.join(tmpdir, "subdir", "file3.md"), 'w').close() + + # Get .md files (non-recursive) + files = self.processor.get_md_files(tmpdir) + + self.assertEqual(len(files), 2) + self.assertIn("file1.md", files) + self.assertIn("file2.md", files) + self.assertNotIn("not_md.txt", files) + self.assertNotIn("file3.md", files) # In subdir, not included + + def test_get_md_files_nonexistent_folder(self): + """Test getting .md files from a non-existent folder""" + files = self.processor.get_md_files("/nonexistent/path") + self.assertEqual(files, []) + + def test_load_md_file(self): + """Test loading markdown file content""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: + f.write("# Test Content\n\nThis is a test.") + temp_path = f.name + + try: + content = self.processor.load_md_file(temp_path) + self.assertEqual(content, "# Test Content\n\nThis is a test.") + finally: + os.unlink(temp_path) + + def test_load_md_file_nonexistent(self): + """Test loading a non-existent markdown file""" + content = self.processor.load_md_file("/nonexistent/file.md") + self.assertEqual(content, "") + + def test_save_md_file(self): + """Test saving markdown file""" + with tempfile.TemporaryDirectory() as tmpdir: + file_path = os.path.join(tmpdir, "test.md") + content = "# Test Content\n\nThis is a test." + + self.processor.save_md_file(content, file_path) + + # Verify file was created + self.assertTrue(os.path.exists(file_path)) + + # Verify content + with open(file_path, 'r', encoding='utf-8') as f: + saved_content = f.read() + self.assertEqual(saved_content, content) + + def test_save_md_file_creates_directories(self): + """Test that save_md_file creates parent directories""" + with tempfile.TemporaryDirectory() as tmpdir: + nested_path = os.path.join(tmpdir, "subdir1", "subdir2", "test.md") + content = "# Nested Content" + + self.processor.save_md_file(content, nested_path) + + # Verify file was created in nested directory + self.assertTrue(os.path.exists(nested_path)) + + def test_extract_content(self): + """Test extracting content from all .md files""" + with tempfile.TemporaryDirectory() as tmpdir: + # Create test files + with open(os.path.join(tmpdir, "file1.md"), 'w') as f: + f.write("# File 1") + with open(os.path.join(tmpdir, "file2.md"), 'w') as f: + f.write("# File 2") + + items = self.processor.extract_content(tmpdir) + + self.assertEqual(len(items), 2) + self.assertIn("file1.md", items) + self.assertIn("file2.md", items) + self.assertEqual(items["file1.md"].content, "# File 1") + self.assertEqual(items["file2.md"].content, "# File 2") + self.assertEqual(items["file1.md"].filename, "file1.md") + + def test_extract_content_empty_folder(self): + """Test extracting content from empty folder""" + with tempfile.TemporaryDirectory() as tmpdir: + items = self.processor.extract_content(tmpdir) + self.assertEqual(items, {}) + + def test_check_asset_counts_valid(self): + """Test asset count check when all folders have same count""" + with tempfile.TemporaryDirectory() as tmpdir: + # Create base folder with files + base_folder = os.path.join(tmpdir, "base") + os.makedirs(base_folder) + open(os.path.join(base_folder, "file1.md"), 'w').close() + open(os.path.join(base_folder, "file2.md"), 'w').close() + + # Create target folders with same files + target1 = os.path.join(tmpdir, "target1") + os.makedirs(target1) + open(os.path.join(target1, "file1.md"), 'w').close() + open(os.path.join(target1, "file2.md"), 'w').close() + + target2 = os.path.join(tmpdir, "target2") + os.makedirs(target2) + open(os.path.join(target2, "file1.md"), 'w').close() + open(os.path.join(target2, "file2.md"), 'w').close() + + is_valid, errors = self.processor.check_asset_counts(base_folder, [target1, target2]) + + self.assertTrue(is_valid) + self.assertEqual(errors, []) + + def test_check_asset_counts_missing_files(self): + """Test asset count check when target is missing files""" + with tempfile.TemporaryDirectory() as tmpdir: + # Create base folder with files + base_folder = os.path.join(tmpdir, "base") + os.makedirs(base_folder) + open(os.path.join(base_folder, "file1.md"), 'w').close() + open(os.path.join(base_folder, "file2.md"), 'w').close() + + # Create target folder with missing file + target1 = os.path.join(tmpdir, "target1") + os.makedirs(target1) + open(os.path.join(target1, "file1.md"), 'w').close() + # file2.md is missing + + is_valid, errors = self.processor.check_asset_counts(base_folder, [target1]) + + self.assertFalse(is_valid) + self.assertEqual(len(errors), 1) + self.assertIn("file2.md", errors[0]) + + def test_check_asset_counts_extra_files(self): + """Test asset count check when target has extra files""" + with tempfile.TemporaryDirectory() as tmpdir: + # Create base folder with files + base_folder = os.path.join(tmpdir, "base") + os.makedirs(base_folder) + open(os.path.join(base_folder, "file1.md"), 'w').close() + + # Create target folder with extra file + target1 = os.path.join(tmpdir, "target1") + os.makedirs(target1) + open(os.path.join(target1, "file1.md"), 'w').close() + open(os.path.join(target1, "extra.md"), 'w').close() + + is_valid, errors = self.processor.check_asset_counts(base_folder, [target1]) + + self.assertFalse(is_valid) + self.assertEqual(len(errors), 1) + self.assertIn("extra.md", errors[0]) + + def test_check_asset_counts_empty_base(self): + """Test asset count check with empty base folder""" + with tempfile.TemporaryDirectory() as tmpdir: + base_folder = os.path.join(tmpdir, "base") + os.makedirs(base_folder) + + target1 = os.path.join(tmpdir, "target1") + os.makedirs(target1) + + is_valid, errors = self.processor.check_asset_counts(base_folder, [target1]) + + self.assertFalse(is_valid) + self.assertEqual(len(errors), 1) + self.assertIn("no .md files", errors[0]) + + def test_find_missing_files(self): + """Test finding missing files in target folder""" + with tempfile.TemporaryDirectory() as tmpdir: + # Create base folder with files + base_folder = os.path.join(tmpdir, "base") + os.makedirs(base_folder) + open(os.path.join(base_folder, "file1.md"), 'w').close() + open(os.path.join(base_folder, "file2.md"), 'w').close() + open(os.path.join(base_folder, "file3.md"), 'w').close() + + # Create target folder with some files + target_folder = os.path.join(tmpdir, "target") + os.makedirs(target_folder) + open(os.path.join(target_folder, "file1.md"), 'w').close() + # file2.md and file3.md are missing + + missing = self.processor.find_missing_files(base_folder, target_folder) + + self.assertEqual(len(missing), 2) + self.assertIn("file2.md", missing) + self.assertIn("file3.md", missing) + self.assertNotIn("file1.md", missing) + + def test_find_missing_files_all_present(self): + """Test finding missing files when all are present""" + with tempfile.TemporaryDirectory() as tmpdir: + base_folder = os.path.join(tmpdir, "base") + os.makedirs(base_folder) + open(os.path.join(base_folder, "file1.md"), 'w').close() + + target_folder = os.path.join(tmpdir, "target") + os.makedirs(target_folder) + open(os.path.join(target_folder, "file1.md"), 'w').close() + + missing = self.processor.find_missing_files(base_folder, target_folder) + + self.assertEqual(missing, []) + + def test_custom_extension(self): + """Test MDProcessor with custom file extension""" + processor = MDProcessor(file_extension=".txt") + + with tempfile.TemporaryDirectory() as tmpdir: + open(os.path.join(tmpdir, "file1.txt"), 'w').close() + open(os.path.join(tmpdir, "file2.md"), 'w').close() + + files = processor.get_md_files(tmpdir) + + self.assertEqual(len(files), 1) + self.assertIn("file1.txt", files) + self.assertNotIn("file2.md", files) + + +class TestMDTranslationItem(unittest.TestCase): + """Test cases for MDTranslationItem dataclass""" + + def test_create_item(self): + """Test creating MDTranslationItem""" + item = MDTranslationItem( + filename="test.md", + content="# Test", + relative_path="subdir" + ) + + self.assertEqual(item.filename, "test.md") + self.assertEqual(item.content, "# Test") + self.assertEqual(item.relative_path, "subdir") + + def test_create_item_defaults(self): + """Test creating MDTranslationItem with defaults""" + item = MDTranslationItem( + filename="test.md", + content="# Test" + ) + + self.assertEqual(item.filename, "test.md") + self.assertEqual(item.content, "# Test") + self.assertEqual(item.relative_path, "") + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_models.py b/tests/test_models.py new file mode 100644 index 0000000..a7f2161 --- /dev/null +++ b/tests/test_models.py @@ -0,0 +1,56 @@ +""" +Tests for data models +""" + +import unittest +from models import TranslationItem, TranslationBatch + + +class TestTranslationItem(unittest.TestCase): + """Test cases for TranslationItem model""" + + def test_translation_item_creation(self): + """Test creating a TranslationItem""" + item = TranslationItem(name="test_key", value="Test value") + self.assertEqual(item.name, "test_key") + self.assertEqual(item.value, "Test value") + self.assertIsNone(item.comment) + + def test_translation_item_with_comment(self): + """Test creating a TranslationItem with comment""" + item = TranslationItem( + name="test_key", + value="Test value", + comment="Test comment" + ) + self.assertEqual(item.name, "test_key") + self.assertEqual(item.value, "Test value") + self.assertEqual(item.comment, "Test comment") + + +class TestTranslationBatch(unittest.TestCase): + """Test cases for TranslationBatch model""" + + def setUp(self): + """Set up test fixtures""" + self.items = [ + TranslationItem(name="key1", value="Value 1"), + TranslationItem(name="key2", value="Value 2") + ] + + def test_translation_batch_creation(self): + """Test creating a TranslationBatch""" + batch = TranslationBatch( + items=self.items, + target_language="values-de-rDE", + target_file="strings.xml" + ) + self.assertEqual(len(batch.items), 2) + self.assertEqual(batch.target_language, "values-de-rDE") + self.assertEqual(batch.target_file, "strings.xml") + self.assertEqual(batch.items[0].name, "key1") + self.assertEqual(batch.items[1].value, "Value 2") + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_xml_processor.py b/tests/test_xml_processor.py new file mode 100644 index 0000000..0aedc82 --- /dev/null +++ b/tests/test_xml_processor.py @@ -0,0 +1,188 @@ +""" +Tests for XML processor module +""" + +import unittest +import tempfile +import os +from lxml import etree +from unittest.mock import patch, MagicMock +from config import Config +from models import TranslationItem +from xml_processor import XMLProcessor + + +class TestXMLProcessor(unittest.TestCase): + """Test cases for XMLProcessor class""" + + def setUp(self): + """Set up test fixtures""" + # Create a mock config + self.mock_config = MagicMock(spec=Config) + self.mock_config.output_config = { + 'create_backups': True, + 'backup_suffix': '.backup' + } + + self.processor = XMLProcessor(self.mock_config) + + # Sample XML content (without XML declaration for testing) + self.sample_xml = ''' + Test App + Welcome to our app! + + OK +''' + + def test_load_xml_file_success(self): + """Test successful XML file loading""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) as f: + f.write(self.sample_xml) + temp_path = f.name + + try: + root = self.processor.load_xml_file(temp_path) + self.assertIsNotNone(root) + self.assertEqual(root.tag, 'resources') + self.assertEqual(len(root.findall('string')), 3) + finally: + os.unlink(temp_path) + + def test_load_xml_file_error(self): + """Test error handling when loading invalid XML file""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) as f: + f.write('invalid xml content') + temp_path = f.name + + try: + root = self.processor.load_xml_file(temp_path) + self.assertIsNone(root) + finally: + os.unlink(temp_path) + + def test_extract_strings(self): + """Test extracting strings from XML""" + root = etree.fromstring(self.sample_xml) + strings = self.processor.extract_strings(root) + + self.assertEqual(len(strings), 3) + self.assertIn('app_name', strings) + self.assertIn('welcome_message', strings) + self.assertIn('button_ok', strings) + + # Check string values + self.assertEqual(strings['app_name'].value, 'Test App') + self.assertEqual(strings['welcome_message'].value, 'Welcome to our app!') + self.assertEqual(strings['button_ok'].value, 'OK') + + # Check that all items are TranslationItem instances + for item in strings.values(): + self.assertIsInstance(item, TranslationItem) + + def test_add_missing_strings(self): + """Test adding missing strings to XML""" + # Create target XML with one existing string + target_xml = ''' + Existing value +''' + + target_root = etree.fromstring(target_xml) + missing_strings = [ + ('new_key1', 'New value 1', 'string', []), + ('new_key2', 'New value 2', 'string', []), + ('existing_key', 'Should not be added', 'string', []) # This should be ignored + ] + + self.processor.add_missing_strings(target_root, missing_strings) + + # Check that new strings were added + strings = target_root.findall('string') + self.assertEqual(len(strings), 3) # 1 existing + 2 new + + # Check specific strings + new_key1 = target_root.find(".//string[@name='new_key1']") + self.assertIsNotNone(new_key1) + self.assertEqual(new_key1.text, 'New value 1') + + new_key2 = target_root.find(".//string[@name='new_key2']") + self.assertIsNotNone(new_key2) + self.assertEqual(new_key2.text, 'New value 2') + + # Check existing string wasn't duplicated + existing_strings = target_root.findall(".//string[@name='existing_key']") + self.assertEqual(len(existing_strings), 1) + + def test_extract_string_array(self): + """Test extracting string arrays from XML""" + xml_with_array = ''' + Test App + + Basic greetings + Irregular verbs + Vocabulary at the airport + +''' + + root = etree.fromstring(xml_with_array) + strings = self.processor.extract_strings(root) + + self.assertEqual(len(strings), 2) + self.assertIn('app_name', strings) + self.assertIn('vocabulary_hints', strings) + + # Check string array + array_item = strings['vocabulary_hints'] + self.assertEqual(array_item.item_type, 'string-array') + self.assertEqual(len(array_item.items), 3) + self.assertEqual(array_item.items[0], 'Basic greetings') + self.assertEqual(array_item.items[1], 'Irregular verbs') + self.assertEqual(array_item.items[2], 'Vocabulary at the airport') + self.assertEqual(array_item.value, 'Basic greetings | Irregular verbs | Vocabulary at the airport') + + def test_add_missing_string_array(self): + """Test adding missing string arrays to XML""" + target_xml = ''' + Existing value +''' + + target_root = etree.fromstring(target_xml) + missing_strings = [ + ('vocabulary_hints', '', 'string-array', ['Basic greetings', 'Irregular verbs']) + ] + + self.processor.add_missing_strings(target_root, missing_strings) + + # Check that string-array was added + string_array = target_root.find(".//string-array[@name='vocabulary_hints']") + self.assertIsNotNone(string_array) + + # Check items + items = string_array.findall('item') + self.assertEqual(len(items), 2) + self.assertEqual(items[0].text, 'Basic greetings') + self.assertEqual(items[1].text, 'Irregular verbs') + + def test_skip_non_translatable_string_array(self): + """Test that non-translatable string arrays are skipped""" + xml_with_non_translatable = ''' + + Item 1 + + + Item 2 + +''' + + root = etree.fromstring(xml_with_non_translatable) + strings = self.processor.extract_strings(root) + + self.assertEqual(len(strings), 1) + self.assertIn('translatable_array', strings) + self.assertNotIn('non_translatable_array', strings) + + # Note: File saving tests are complex to mock properly due to lxml internals + # The core functionality is tested through integration tests + + +if __name__ == '__main__': + unittest.main() diff --git a/translation_tool.py b/translation_tool.py new file mode 100644 index 0000000..9483a0c --- /dev/null +++ b/translation_tool.py @@ -0,0 +1,310 @@ +""" +Main translation tool logic +""" + +import os +import sys +import logging +from lxml import etree +from typing import List +from config import Config +from models import TranslationItem, TranslationBatch +from llm_client import LLMClient +from xml_processor import XMLProcessor +from md_processor import MDProcessor +from ui import UI + + +class TranslationTool: + """Main translation tool class""" + + def __init__(self, config_path: str = "config.yaml"): + self.config = Config(config_path) + self.llm_client = LLMClient(self.config) + self.xml_processor = XMLProcessor(self.config) + self.md_processor = MDProcessor() + self.ui = UI() + + # Setup logging + self._setup_logging() + + def _setup_logging(self): + """Setup logging configuration""" + log_level = self.config.output_config.get('log_level', 'INFO') + logging.basicConfig( + level=getattr(logging, log_level), + format='%(asctime)s - %(levelname)s - %(message)s' + ) + self.logger = logging.getLogger(__name__) + + def run(self): + """Main execution method""" + self.ui.show_header() + + # Process Android XML files + self._process_android_files() + + # Process Markdown files if configured + if self.config.has_examples_config(): + self._process_md_files() + + self.ui.show_success("Translation process completed!") + + def _process_android_files(self): + """Process Android XML files""" + android_config = self.config.android_config + input_folder = android_config['input_folder'] + base_folder = android_config['base_values_folder'] + target_folders = android_config['target_folders'] + files_to_translate = android_config['files_to_translate'] + + # Check if input folder exists + if not os.path.exists(input_folder): + self.ui.show_error(f"Input folder {input_folder} not found!") + return + + base_values_path = os.path.join(input_folder, base_folder) + if not os.path.exists(base_values_path): + self.ui.show_error(f"Base values folder {base_values_path} not found!") + return + + # Process each target language + for target_folder in target_folders: + self.ui.show_processing_language(target_folder) + self._process_language(input_folder, base_folder, target_folder, files_to_translate) + + def _process_md_files(self): + """Process Markdown files in examples folder""" + examples_config = self.config.examples_config + input_folder = examples_config['input_folder'] + base_folder = examples_config['base_folder'] + target_folders = examples_config['target_folders'] + file_extension = examples_config.get('file_extension', '.md') + + # Update MD processor with configured extension + self.md_processor = MDProcessor(file_extension) + + # Check if input folder exists + if not os.path.exists(input_folder): + self.ui.show_warning(f"Examples input folder {input_folder} not found, skipping...") + return + + base_path = os.path.join(input_folder, base_folder) + if not os.path.exists(base_path): + self.ui.show_warning(f"Examples base folder {base_path} not found, skipping...") + return + + # Check asset counts across all locales + target_paths = [os.path.join(input_folder, tf) for tf in target_folders] + is_valid, errors = self.md_processor.check_asset_counts(base_path, target_paths) + + if not is_valid: + self.ui.show_warning("Asset count check failed:") + for error in errors: + self.ui.show_warning(f" - {error}") + else: + self.ui.show_asset_count_check(len(self.md_processor.get_md_files(base_path))) + + # Process each target language + for target_folder in target_folders: + self.ui.show_processing_language(f"examples/{target_folder}") + self._process_md_language(input_folder, base_folder, target_folder) + + def _process_md_language(self, input_folder: str, base_folder: str, target_folder: str): + """Process Markdown translation for a specific language - files are processed one by one""" + base_path = os.path.join(input_folder, base_folder) + target_path = os.path.join(input_folder, target_folder) + + # Create target folder if it doesn't exist + os.makedirs(target_path, exist_ok=True) + + # Find missing files + missing_files = self.md_processor.find_missing_files(base_path, target_path) + + if not missing_files: + self.ui.show_all_translated(target_path) + return + + self.ui.show_missing_strings(len(missing_files), target_path) + + # Get language instruction + instruction = self._get_instruction(target_folder) + + interactive = self.config.translation_config['interactive_approval'] + + # Process each missing file one by one (no batching for .md files) + for i, filename in enumerate(missing_files, 1): + file_path = os.path.join(base_path, filename) + content = self.md_processor.load_md_file(file_path) + + if not content: + self.ui.show_warning(f"Could not load {filename}, skipping...") + continue + + # Create a single-item batch for this file + item = TranslationItem( + name=filename, + value=content, + item_type='string' + ) + + batch = TranslationBatch( + items=[item], + target_language=target_folder, + target_file=target_path + ) + + self.ui.show_processing_file(filename) + + # Translate single file + translations = self.llm_client.translate_batch(batch, instruction) + + if not translations or len(translations) != 1: + self.ui.show_batch_failed(i) + continue + + translation = translations[0] + + # Display translation for approval (truncated for display) + if interactive: + display_batch = TranslationBatch( + items=[TranslationItem( + name=filename, + value=content[:500] + "..." if len(content) > 500 else content, + item_type='string' + )], + target_language=target_folder, + target_file=target_path + ) + approved = self.ui.show_batch_approval(display_batch, [translation[:500] + "..." if len(translation) > 500 else translation]) + if not approved: + self.ui.show_batch_skipped(i) + continue + + # Save translated file + target_file = os.path.join(target_path, filename) + self.md_processor.save_md_file(translation, target_file) + + self.ui.show_batch_added(i, filename) + + def _get_instruction(self, target_folder: str) -> str: + """Get language instruction for a target folder""" + language_instructions = self.config.translation_config.get('language_instructions', {}) + + # Try direct lookup first + if target_folder in language_instructions: + return language_instructions[target_folder] + + # Extract folder name (e.g., "res/values-de-rDE" -> "values-de-rDE") + folder_name = os.path.basename(target_folder) + if folder_name in language_instructions: + return language_instructions[folder_name] + + # Try hints folder mapping (e.g., "assets/hints-de-rDE" -> "values-de-rDE") + if 'hints-' in folder_name: + values_key = folder_name.replace('hints-', 'values-') + if values_key in language_instructions: + return language_instructions[values_key] + + # Default fallback + return f"Translate to {target_folder}" + + def _process_language(self, input_folder: str, base_folder: str, target_folder: str, files_to_translate: List[str]): + """Process translation for a specific language""" + base_values_path = os.path.join(input_folder, base_folder) + target_values_path = os.path.join(input_folder, target_folder) + + # Create target folder if it doesn't exist + os.makedirs(target_values_path, exist_ok=True) + + # Get language instruction + instruction = self._get_instruction(target_folder) + + # Process each file + for filename in files_to_translate: + base_file = os.path.join(base_values_path, filename) + target_file = os.path.join(target_values_path, filename) + + if not os.path.exists(base_file): + self.ui.show_warning(f"Base file {base_file} not found, skipping...") + continue + + self.ui.show_processing_file(filename) + self._process_file(base_file, target_file, target_folder, instruction) + + def _process_file(self, base_file: str, target_file: str, target_folder: str, instruction: str): + """Process translation for a specific file""" + # Load base XML + base_root = self.xml_processor.load_xml_file(base_file) + if base_root is None: + return + + # Load or create target XML + if os.path.exists(target_file): + target_root = self.xml_processor.load_xml_file(target_file) + else: + # Create new XML structure + target_root = etree.Element('resources') + + if target_root is None: + return + + # Extract strings + base_strings = self.xml_processor.extract_strings(base_root) + target_strings = self.xml_processor.extract_strings(target_root) + + # Find missing strings + missing_strings = [] + for name, item in base_strings.items(): + if name not in target_strings: + missing_strings.append(item) + + if not missing_strings: + self.ui.show_all_translated(target_file) + return + + self.ui.show_missing_strings(len(missing_strings), target_file) + + # Process in batches + batch_size = self.config.translation_config['batch_size'] + interactive = self.config.translation_config['interactive_approval'] + + for i in range(0, len(missing_strings), batch_size): + batch_items = missing_strings[i:i + batch_size] + batch = TranslationBatch( + items=batch_items, + target_language=target_folder, + target_file=target_file + ) + + # Translate batch + translations = self.llm_client.translate_batch(batch, instruction) + + if not translations or len(translations) != len(batch_items): + self.ui.show_batch_failed(i // batch_size + 1) + continue + + # Display translations for approval + if interactive: + approved = self.ui.show_batch_approval(batch, translations) + if not approved: + self.ui.show_batch_skipped(i // batch_size + 1) + continue + + # Add translations to target XML + new_translations = [] + for item, translation in zip(batch_items, translations): + if item.item_type == 'string-array': + # For string arrays, parse the translation into individual items + # Expected format: item1 | item2 | item3 (same separator as input) + translated_items = [t.strip() for t in translation.split('|')] + new_translations.append((item.name, translation, 'string-array', translated_items)) + else: + # Regular string + new_translations.append((item.name, translation, 'string', [])) + + self.xml_processor.add_missing_strings(target_root, new_translations) + + # Save the file + self.xml_processor.save_xml_file(target_root, target_file) + self.ui.show_batch_added(i // batch_size + 1, target_file) diff --git a/ui.py b/ui.py new file mode 100644 index 0000000..8c8b3ec --- /dev/null +++ b/ui.py @@ -0,0 +1,91 @@ +""" +User interface components for Android XML Translation Tool +""" + +from rich.console import Console +from rich.table import Table +from rich.panel import Panel +from rich.prompt import Confirm +from models import TranslationBatch + + +class UI: + """User interface handler""" + + def __init__(self): + self.console = Console() + + def show_header(self): + """Display application header""" + self.console.print(Panel.fit("🌍 Android XML Translation Tool", style="bold blue")) + + def show_processing_language(self, language: str): + """Display current language being processed""" + self.console.print(f"\n[bold cyan]Processing language: {language}[/bold cyan]") + + def show_processing_file(self, filename: str): + """Display current file being processed""" + self.console.print(f"[dim]Processing file: {filename}[/dim]") + + def show_processing_folder(self, folder: str): + """Display current folder being processed""" + self.console.print(f"[dim]Processing folder: {folder}[/dim]") + + def show_asset_count_check(self, count: int): + """Display asset count check passed message""" + self.console.print(f"[green]βœ“ Asset count check passed: {count} .md files in all locales[/green]") + + def show_missing_strings(self, count: int, filename: str): + """Display count of missing strings""" + self.console.print(f"[yellow]Found {count} missing strings in {filename}[/yellow]") + + def show_all_translated(self, filename: str): + """Display message when all strings are already translated""" + self.console.print(f"[green]βœ“ {filename}: All strings are already translated[/green]") + + def show_batch_approval(self, batch: TranslationBatch, translations: list[str]) -> bool: + """Show batch translations for user approval""" + self.console.print(f"\n[bold]πŸ“ Translation Batch Review[/bold]") + self.console.print(f"[dim]Target: {batch.target_language} | File: {batch.target_file}[/dim]\n") + + # Create table for display + table = Table(show_header=True, header_style="bold magenta") + table.add_column("#", style="cyan", width=4) + table.add_column("Original", style="white") + table.add_column("Translation", style="green") + + for i, (item, translation) in enumerate(zip(batch.items, translations)): + table.add_row(str(i + 1), item.value, translation) + + self.console.print(table) + + # Ask for approval + return Confirm.ask("\n[yellow]Approve these translations?[/yellow]", default=True) + + def show_batch_skipped(self, batch_num: int): + """Display message when batch is skipped""" + self.console.print(f"[yellow]⏭️ Batch {batch_num} skipped[/yellow]") + + def show_batch_failed(self, batch_num: int): + """Display message when batch translation fails""" + self.console.print(f"[red]❌ Translation failed for batch {batch_num}[/red]") + + def show_batch_added(self, batch_num: int, filename: str): + """Display message when batch is successfully added""" + self.console.print(f"[green]βœ… Batch {batch_num} added to {filename}[/green]") + + def show_warning(self, message: str): + """Display warning message""" + self.console.print(f"[yellow]Warning: {message}[/yellow]") + + def show_error(self, message: str): + """Display error message""" + self.console.print(f"[red]Error: {message}[/red]") + + def show_success(self, message: str): + """Display success message""" + self.console.print(f"[bold green]βœ… {message}[/bold green]") + + def show_interrupted(self): + """Display interruption message""" + self.console.print("\n[yellow]⚠️ Translation process interrupted by user[/yellow]") diff --git a/xml_processor.py b/xml_processor.py new file mode 100644 index 0000000..fa7f291 --- /dev/null +++ b/xml_processor.py @@ -0,0 +1,173 @@ +""" +XML file processor for Android resources +""" + +import os +from lxml import etree +from typing import Dict, Tuple, List, Union +from models import TranslationItem +from config import Config + + +class XMLProcessor: + """XML file processor for Android resources""" + + def __init__(self, config: Config): + self.config = config + self.parser = etree.XMLParser(remove_blank_text=False, strip_cdata=False) + + def load_xml_file(self, file_path: str) -> etree.Element: + """Load and parse XML file""" + try: + tree = etree.parse(file_path, self.parser) + return tree.getroot() + except Exception as e: + print(f"Error loading XML file {file_path}: {e}") + return None + + def save_xml_file(self, root: etree.Element, file_path: str): + """Save XML file with formatting""" + try: + # Ensure proper formatting for the root element + if root.text is None: + root.text = "\n " + if root.tail is None: + root.tail = "\n" + + # Ensure all children have proper tails for formatting + for i, child in enumerate(root): + if child.tail is None or child.tail.strip() == "": + child.tail = "\n " + + # Make sure the last child ends with proper indentation + if len(root) > 0: + last_child = root[-1] + if not last_child.tail.endswith("\n"): + last_child.tail = "\n" + + # Create backup if enabled + if self.config.output_config.get('create_backups', True): + backup_path = file_path + self.config.output_config.get('backup_suffix', '.backup') + if os.path.exists(file_path): + backup_tree = etree.ElementTree(root) + backup_tree.write(backup_path, + encoding='utf-8', + xml_declaration=True, + pretty_print=True) + + # Save the modified file with pretty printing + tree = etree.ElementTree(root) + tree.write(file_path, encoding='utf-8', xml_declaration=True, pretty_print=True) + + except Exception as e: + print(f"Error saving XML file {file_path}: {e}") + + def extract_strings(self, root: etree.Element) -> Dict[str, TranslationItem]: + """Extract strings and string-arrays from XML root element""" + strings = {} + + for element in root: + if element.tag == 'string': + name = element.get('name') + value = element.text or '' + + # Skip strings marked as non-translatable + translatable = element.get('translatable', 'true') + if translatable.lower() == 'false': + continue + + # Handle CDATA and special characters + if isinstance(value, str): + value = value.strip() + + # Get comment if exists + comment = None + if element.tail and element.tail.strip(): + comment = element.tail.strip() + + strings[name] = TranslationItem(name=name, value=value, comment=comment, item_type='string') + + elif element.tag == 'string-array': + name = element.get('name') + + # Skip arrays marked as non-translatable + translatable = element.get('translatable', 'true') + if translatable.lower() == 'false': + continue + + # Extract all items from the string-array + items = [] + for item in element: + if item.tag == 'item': + item_value = item.text or '' + if isinstance(item_value, str): + item_value = item_value.strip() + items.append(item_value) + + # Get comment if exists + comment = None + if element.tail and element.tail.strip(): + comment = element.tail.strip() + + # Store with combined value for display + combined_value = " | ".join(items) if items else "" + strings[name] = TranslationItem( + name=name, + value=combined_value, + comment=comment, + item_type='string-array', + items=items + ) + + elif element.tag == 'plurals': + # Handle plurals - for now, skip or handle separately + name = element.get('name') + strings[name] = TranslationItem(name=name, value=f"<{element.tag}>", comment="Complex type", item_type='plurals') + + return strings + + def add_missing_strings(self, target_root: etree.Element, missing_strings: List[Tuple]): + """Add missing strings and string-arrays to target XML""" + for item_data in missing_strings: + if len(item_data) == 4: + # Extended format: (name, value, item_type, items) + name, value, item_type, items = item_data + else: + # Regular string: (name, value) + name, value = item_data[0], item_data[1] + item_type = 'string' + items = [] + + if item_type == 'string-array': + # Check if string-array already exists + existing = target_root.find(f".//string-array[@name='{name}']") + if existing is None: + # Create new string-array element + new_array = etree.SubElement(target_root, 'string-array', name=name) + + # Add items to the array + for i, item_value in enumerate(items): + item_elem = etree.SubElement(new_array, 'item') + item_elem.text = item_value + # Add proper indentation between items + item_elem.tail = "\n " + + # Add proper tail for the array element itself + new_array.tail = "\n " + + else: + # Regular string + existing = target_root.find(f".//string[@name='{name}']") + if existing is None: + # Create new string element with proper indentation + new_string = etree.SubElement(target_root, 'string', name=name) + new_string.text = value + + # Add proper indentation and newlines + new_string.tail = "\n " + + # Ensure the last element has proper closing + if len(target_root) > 0: + last_child = target_root[-1] + if last_child.tail and not last_child.tail.endswith("\n"): + last_child.tail += "\n"