migrate to gitea

This commit is contained in:
jonasgaudian
2026-02-14 18:12:28 +01:00
commit 7c17f0f0cf
21 changed files with 2037 additions and 0 deletions

144
llm_client.py Normal file
View File

@@ -0,0 +1,144 @@
"""
OpenAI-compatible LLM client for LM Studio integration
"""
import json
import openai
from typing import List
from models import TranslationBatch
from config import Config
class LLMClient:
"""OpenAI-compatible LLM client for LM Studio"""
def __init__(self, config: Config):
self.config = config
self.client = openai.OpenAI(
base_url=config.llm_config['base_url'],
api_key=config.llm_config.get('api_key', 'not-needed')
)
self.model = config.llm_config['model']
self.timeout = config.llm_config.get('timeout', 30)
self.max_retries = config.llm_config.get('max_retries', 3)
def translate_batch(self, batch: TranslationBatch, instruction: str) -> List[str]:
"""Translate a batch of strings"""
# Prepare the prompt
prompt = self._build_translation_prompt(batch, instruction)
# Build system message with strong emphasis on instructions
system_message = (
"You are a professional translator. "
"You must follow the translation instructions EXACTLY as provided. "
"Pay special attention to formality level, tone, and any specific requirements mentioned. "
"Your response must be a valid JSON array."
)
for attempt in range(self.max_retries):
try:
response = self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": prompt}
],
temperature=0.3,
timeout=self.timeout
)
# Parse the response
if not response or not response.choices:
print(f"Empty response from LLM")
continue
choice = response.choices[0]
if not hasattr(choice, 'message') or not choice.message:
print(f"Invalid response structure: {response}")
continue
content = choice.message.content
if content is None:
print(f"Empty content in response: {response}")
continue
translations = self._parse_translation_response(content)
return translations
except Exception as e:
print(f"Translation attempt {attempt + 1} failed: {e}")
if attempt == self.max_retries - 1:
print(f"All translation attempts failed for batch")
return []
return []
def _build_translation_prompt(self, batch: TranslationBatch, instruction: str) -> str:
"""Build translation prompt for the batch"""
prompt = "=== TRANSLATION TASK ===\n\n"
prompt += f"CRITICAL INSTRUCTIONS (you MUST follow these):\n{instruction}\n\n"
prompt += "IMPORTANT: Pay special attention to the tone and formality level specified above. "
prompt += "This is crucial for the quality of the translation.\n\n"
# Check if we have any string arrays in the batch
has_arrays = any(item.item_type == 'string-array' for item in batch.items)
if has_arrays:
prompt += "Note: Some items are string arrays (multiple items separated by ' | '). "
prompt += "For string arrays, keep the same number of items and use ' | ' as separator.\n\n"
prompt += "=== CONTENT TO TRANSLATE ===\n"
prompt += f"Target language: {batch.target_language}\n\n"
for i, item in enumerate(batch.items):
if item.item_type == 'string-array':
prompt += f"{i + 1}. [ARRAY] {item.name}: {item.value}\n"
else:
prompt += f"{i + 1}. {item.value}\n"
prompt += "\n=== RESPONSE FORMAT ===\n"
prompt += "Return ONLY a JSON array with the translations in the same order:\n"
prompt += '["translation1", "translation2", ...]\n\n'
prompt += "=== REMINDER ===\n"
prompt += f"Remember to follow these instructions: {instruction}\n"
return prompt
def _parse_translation_response(self, response: str) -> List[str]:
"""Parse translation response from LLM"""
if not response or not response.strip():
print(f"Empty response to parse: {response}")
return []
try:
# Try to parse as JSON array
if response.strip().startswith('['):
return json.loads(response.strip())
# Try to extract JSON from response
start = response.find('[')
end = response.rfind(']') + 1
if start != -1 and end != 0:
return json.loads(response[start:end])
# Fallback: split by lines and clean up
lines = response.strip().split('\n')
translations = []
for line in lines:
line = line.strip()
# Remove numbering and quotes
if line and not line.startswith('[') and not line.startswith(']'):
# Remove numbers, quotes, and extra formatting
clean_line = line
if '. ' in clean_line:
clean_line = clean_line.split('. ', 1)[1]
clean_line = clean_line.strip('"\'')
if clean_line:
translations.append(clean_line)
return translations
except Exception as e:
print(f"Error parsing translation response: {e}")
print(f"Raw response: {response}")
return []