migrate to gitea

2026-02-14 18:12:28 +01:00
commit 7c17f0f0cf
21 changed files with 2037 additions and 0 deletions
--- a/llm_client.py
+++ b/llm_client.py
@@ -0,0 +1,144 @@
+"""
+OpenAI-compatible LLM client for LM Studio integration
+"""
+
+import json
+import openai
+from typing import List
+from models import TranslationBatch
+from config import Config
+
+
+class LLMClient:
+    """OpenAI-compatible LLM client for LM Studio"""
+    
+    def __init__(self, config: Config):
+        self.config = config
+        self.client = openai.OpenAI(
+            base_url=config.llm_config['base_url'],
+            api_key=config.llm_config.get('api_key', 'not-needed')
+        )
+        self.model = config.llm_config['model']
+        self.timeout = config.llm_config.get('timeout', 30)
+        self.max_retries = config.llm_config.get('max_retries', 3)
+    
+    def translate_batch(self, batch: TranslationBatch, instruction: str) -> List[str]:
+        """Translate a batch of strings"""
+        # Prepare the prompt
+        prompt = self._build_translation_prompt(batch, instruction)
+        
+        # Build system message with strong emphasis on instructions
+        system_message = (
+            "You are a professional translator. "
+            "You must follow the translation instructions EXACTLY as provided. "
+            "Pay special attention to formality level, tone, and any specific requirements mentioned. "
+            "Your response must be a valid JSON array."
+        )
+        
+        for attempt in range(self.max_retries):
+            try:
+                response = self.client.chat.completions.create(
+                    model=self.model,
+                    messages=[
+                        {"role": "system", "content": system_message},
+                        {"role": "user", "content": prompt}
+                    ],
+                    temperature=0.3,
+                    timeout=self.timeout
+                )
+                
+                # Parse the response
+                if not response or not response.choices:
+                    print(f"Empty response from LLM")
+                    continue
+                
+                choice = response.choices[0]
+                if not hasattr(choice, 'message') or not choice.message:
+                    print(f"Invalid response structure: {response}")
+                    continue
+                
+                content = choice.message.content
+                if content is None:
+                    print(f"Empty content in response: {response}")
+                    continue
+                
+                translations = self._parse_translation_response(content)
+                return translations
+                
+            except Exception as e:
+                print(f"Translation attempt {attempt + 1} failed: {e}")
+                if attempt == self.max_retries - 1:
+                    print(f"All translation attempts failed for batch")
+                    return []
+        
+        return []
+    
+    def _build_translation_prompt(self, batch: TranslationBatch, instruction: str) -> str:
+        """Build translation prompt for the batch"""
+        prompt = "=== TRANSLATION TASK ===\n\n"
+        prompt += f"CRITICAL INSTRUCTIONS (you MUST follow these):\n{instruction}\n\n"
+        prompt += "IMPORTANT: Pay special attention to the tone and formality level specified above. "
+        prompt += "This is crucial for the quality of the translation.\n\n"
+        
+        # Check if we have any string arrays in the batch
+        has_arrays = any(item.item_type == 'string-array' for item in batch.items)
+        
+        if has_arrays:
+            prompt += "Note: Some items are string arrays (multiple items separated by ' | '). "
+            prompt += "For string arrays, keep the same number of items and use ' | ' as separator.\n\n"
+        
+        prompt += "=== CONTENT TO TRANSLATE ===\n"
+        prompt += f"Target language: {batch.target_language}\n\n"
+        
+        for i, item in enumerate(batch.items):
+            if item.item_type == 'string-array':
+                prompt += f"{i + 1}. [ARRAY] {item.name}: {item.value}\n"
+            else:
+                prompt += f"{i + 1}. {item.value}\n"
+        
+        prompt += "\n=== RESPONSE FORMAT ===\n"
+        prompt += "Return ONLY a JSON array with the translations in the same order:\n"
+        prompt += '["translation1", "translation2", ...]\n\n'
+        prompt += "=== REMINDER ===\n"
+        prompt += f"Remember to follow these instructions: {instruction}\n"
+        
+        return prompt
+    
+    def _parse_translation_response(self, response: str) -> List[str]:
+        """Parse translation response from LLM"""
+        if not response or not response.strip():
+            print(f"Empty response to parse: {response}")
+            return []
+            
+        try:
+            # Try to parse as JSON array
+            if response.strip().startswith('['):
+                return json.loads(response.strip())
+            
+            # Try to extract JSON from response
+            start = response.find('[')
+            end = response.rfind(']') + 1
+            if start != -1 and end != 0:
+                return json.loads(response[start:end])
+            
+            # Fallback: split by lines and clean up
+            lines = response.strip().split('\n')
+            translations = []
+            for line in lines:
+                line = line.strip()
+                # Remove numbering and quotes
+                if line and not line.startswith('[') and not line.startswith(']'):
+                    # Remove numbers, quotes, and extra formatting
+                    clean_line = line
+                    if '. ' in clean_line:
+                        clean_line = clean_line.split('. ', 1)[1]
+                    clean_line = clean_line.strip('"\'')
+                    if clean_line:
+                        translations.append(clean_line)
+            
+            return translations
+            
+        except Exception as e:
+            print(f"Error parsing translation response: {e}")
+            print(f"Raw response: {response}")
+            return []