ResourceTranslate/llm_client.py

"""
OpenAI-compatible LLM client for LM Studio integration
"""

import json
import openai
from typing import List
from models import TranslationBatch
from config import Config


class LLMClient:
    """OpenAI-compatible LLM client for LM Studio"""

    def __init__(self, config: Config):
        self.config = config
        self.client = openai.OpenAI(
            base_url=config.llm_config['base_url'],
            api_key=config.llm_config.get('api_key', 'not-needed')
        )
        self.model = config.llm_config['model']
        self.timeout = config.llm_config.get('timeout', 30)
        self.max_retries = config.llm_config.get('max_retries', 3)

    def translate_batch(self, batch: TranslationBatch, instruction: str) -> List[str]:
        """Translate a batch of strings"""
        # Prepare the prompt
        prompt = self._build_translation_prompt(batch, instruction)

        # Build system message with strong emphasis on instructions
        system_message = (
            "You are a professional translator. "
            "You must follow the translation instructions EXACTLY as provided. "
            "Pay special attention to formality level, tone, and any specific requirements mentioned. "
            "Your response must be a valid JSON array."
        )

        for attempt in range(self.max_retries):
            try:
                response = self.client.chat.completions.create(
                    model=self.model,
                    messages=[
                        {"role": "system", "content": system_message},
                        {"role": "user", "content": prompt}
                    ],
                    temperature=0.3,
                    timeout=self.timeout
                )

                # Parse the response
                if not response or not response.choices:
                    print(f"Empty response from LLM")
                    continue

                choice = response.choices[0]
                if not hasattr(choice, 'message') or not choice.message:
                    print(f"Invalid response structure: {response}")
                    continue

                content = choice.message.content
                if content is None:
                    print(f"Empty content in response: {response}")
                    continue

                translations = self._parse_translation_response(content)
                return translations

            except Exception as e:
                print(f"Translation attempt {attempt + 1} failed: {e}")
                if attempt == self.max_retries - 1:
                    print(f"All translation attempts failed for batch")
                    return []

        return []

    def _build_translation_prompt(self, batch: TranslationBatch, instruction: str) -> str:
        """Build translation prompt for the batch"""
        prompt = "=== TRANSLATION TASK ===\n\n"
        prompt += f"CRITICAL INSTRUCTIONS (you MUST follow these):\n{instruction}\n\n"
        prompt += "IMPORTANT: Pay special attention to the tone and formality level specified above. "
        prompt += "This is crucial for the quality of the translation.\n\n"

        # Check if we have any string arrays in the batch
        has_arrays = any(item.item_type == 'string-array' for item in batch.items)

        if has_arrays:
            prompt += "Note: Some items are string arrays (multiple items separated by ' | '). "
            prompt += "For string arrays, keep the same number of items and use ' | ' as separator.\n\n"

        prompt += "=== CONTENT TO TRANSLATE ===\n"
        prompt += f"Target language: {batch.target_language}\n\n"

        for i, item in enumerate(batch.items):
            if item.item_type == 'string-array':
                prompt += f"{i + 1}. [ARRAY] {item.name}: {item.value}\n"
            else:
                prompt += f"{i + 1}. {item.value}\n"

        prompt += "\n=== RESPONSE FORMAT ===\n"
        prompt += "Return ONLY a JSON array with the translations in the same order:\n"
        prompt += '["translation1", "translation2", ...]\n\n'
        prompt += "=== REMINDER ===\n"
        prompt += f"Remember to follow these instructions: {instruction}\n"

        return prompt

    def _parse_translation_response(self, response: str) -> List[str]:
        """Parse translation response from LLM"""
        if not response or not response.strip():
            print(f"Empty response to parse: {response}")
            return []

        try:
            # Try to parse as JSON array
            if response.strip().startswith('['):
                return json.loads(response.strip())

            # Try to extract JSON from response
            start = response.find('[')
            end = response.rfind(']') + 1
            if start != -1 and end != 0:
                return json.loads(response[start:end])

            # Fallback: split by lines and clean up
            lines = response.strip().split('\n')
            translations = []
            for line in lines:
                line = line.strip()
                # Remove numbering and quotes
                if line and not line.startswith('[') and not line.startswith(']'):
                    # Remove numbers, quotes, and extra formatting
                    clean_line = line
                    if '. ' in clean_line:
                        clean_line = clean_line.split('. ', 1)[1]
                    clean_line = clean_line.strip('"\'')
                    if clean_line:
                        translations.append(clean_line)

            return translations

        except Exception as e:
            print(f"Error parsing translation response: {e}")
            print(f"Raw response: {response}")
            return []