""" Markdown file processor for translation """ import os from typing import Dict, List, Tuple from dataclasses import dataclass, field @dataclass class MDTranslationItem: """Represents a single markdown file translation item""" filename: str content: str relative_path: str = "" # For subdirectories if needed class MDProcessor: """Markdown file processor for translation""" def __init__(self, file_extension: str = ".md"): self.file_extension = file_extension def get_md_files(self, folder_path: str) -> List[str]: """ Get all .md files in the specified folder (non-recursive). Returns a sorted list of filenames. """ if not os.path.exists(folder_path): return [] md_files = [] for item in os.listdir(folder_path): item_path = os.path.join(folder_path, item) if os.path.isfile(item_path) and item.endswith(self.file_extension): md_files.append(item) return sorted(md_files) def load_md_file(self, file_path: str) -> str: """Load and return the content of a markdown file""" try: with open(file_path, 'r', encoding='utf-8') as f: return f.read() except Exception as e: print(f"Error loading markdown file {file_path}: {e}") return "" def save_md_file(self, content: str, file_path: str): """Save content to a markdown file""" try: # Create parent directory if it doesn't exist parent_dir = os.path.dirname(file_path) if parent_dir and not os.path.exists(parent_dir): os.makedirs(parent_dir, exist_ok=True) with open(file_path, 'w', encoding='utf-8') as f: f.write(content) except Exception as e: print(f"Error saving markdown file {file_path}: {e}") def extract_content(self, folder_path: str) -> Dict[str, MDTranslationItem]: """ Extract content from all .md files in the folder. Returns a dict mapping filename to MDTranslationItem. """ items = {} md_files = self.get_md_files(folder_path) for filename in md_files: file_path = os.path.join(folder_path, filename) content = self.load_md_file(file_path) if content: # Only add if we successfully loaded content items[filename] = MDTranslationItem( filename=filename, content=content ) return items def check_asset_counts(self, base_folder: str, target_folders: List[str]) -> Tuple[bool, List[str]]: """ Check that all target folders have the same number of .md assets as the base folder. Args: base_folder: Path to the base folder (source of truth) target_folders: List of paths to target folders Returns: Tuple of (is_valid, list of error messages) """ errors = [] # Get base folder count base_files = self.get_md_files(base_folder) base_count = len(base_files) if base_count == 0: errors.append(f"Base folder {base_folder} contains no .md files") return False, errors # Check each target folder for target_folder in target_folders: target_files = self.get_md_files(target_folder) target_count = len(target_files) if target_count != base_count: # Find missing/extra files base_set = set(base_files) target_set = set(target_files) missing_in_target = base_set - target_set extra_in_target = target_set - base_set error_msg = f"Asset count mismatch in {target_folder}: expected {base_count}, found {target_count}" if missing_in_target: error_msg += f"\n Missing files: {', '.join(sorted(missing_in_target))}" if extra_in_target: error_msg += f"\n Extra files: {', '.join(sorted(extra_in_target))}" errors.append(error_msg) return len(errors) == 0, errors def find_missing_files(self, base_folder: str, target_folder: str) -> List[str]: """ Find .md files that exist in base folder but not in target folder. Returns: List of filenames that need to be translated """ base_files = set(self.get_md_files(base_folder)) target_files = set(self.get_md_files(target_folder)) return sorted(list(base_files - target_files))