137 lines
4.8 KiB
Python
137 lines
4.8 KiB
Python
"""
|
|
Markdown file processor for translation
|
|
"""
|
|
|
|
import os
|
|
from typing import Dict, List, Tuple
|
|
from dataclasses import dataclass, field
|
|
|
|
|
|
@dataclass
|
|
class MDTranslationItem:
|
|
"""Represents a single markdown file translation item"""
|
|
filename: str
|
|
content: str
|
|
relative_path: str = "" # For subdirectories if needed
|
|
|
|
|
|
class MDProcessor:
|
|
"""Markdown file processor for translation"""
|
|
|
|
def __init__(self, file_extension: str = ".md"):
|
|
self.file_extension = file_extension
|
|
|
|
def get_md_files(self, folder_path: str) -> List[str]:
|
|
"""
|
|
Get all .md files in the specified folder (non-recursive).
|
|
Returns a sorted list of filenames.
|
|
"""
|
|
if not os.path.exists(folder_path):
|
|
return []
|
|
|
|
md_files = []
|
|
for item in os.listdir(folder_path):
|
|
item_path = os.path.join(folder_path, item)
|
|
if os.path.isfile(item_path) and item.endswith(self.file_extension):
|
|
md_files.append(item)
|
|
|
|
return sorted(md_files)
|
|
|
|
def load_md_file(self, file_path: str) -> str:
|
|
"""Load and return the content of a markdown file"""
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
return f.read()
|
|
except Exception as e:
|
|
print(f"Error loading markdown file {file_path}: {e}")
|
|
return ""
|
|
|
|
def save_md_file(self, content: str, file_path: str):
|
|
"""Save content to a markdown file"""
|
|
try:
|
|
# Create parent directory if it doesn't exist
|
|
parent_dir = os.path.dirname(file_path)
|
|
if parent_dir and not os.path.exists(parent_dir):
|
|
os.makedirs(parent_dir, exist_ok=True)
|
|
|
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
f.write(content)
|
|
except Exception as e:
|
|
print(f"Error saving markdown file {file_path}: {e}")
|
|
|
|
def extract_content(self, folder_path: str) -> Dict[str, MDTranslationItem]:
|
|
"""
|
|
Extract content from all .md files in the folder.
|
|
Returns a dict mapping filename to MDTranslationItem.
|
|
"""
|
|
items = {}
|
|
md_files = self.get_md_files(folder_path)
|
|
|
|
for filename in md_files:
|
|
file_path = os.path.join(folder_path, filename)
|
|
content = self.load_md_file(file_path)
|
|
if content: # Only add if we successfully loaded content
|
|
items[filename] = MDTranslationItem(
|
|
filename=filename,
|
|
content=content
|
|
)
|
|
|
|
return items
|
|
|
|
def check_asset_counts(self, base_folder: str, target_folders: List[str]) -> Tuple[bool, List[str]]:
|
|
"""
|
|
Check that all target folders have the same number of .md assets as the base folder.
|
|
|
|
Args:
|
|
base_folder: Path to the base folder (source of truth)
|
|
target_folders: List of paths to target folders
|
|
|
|
Returns:
|
|
Tuple of (is_valid, list of error messages)
|
|
"""
|
|
errors = []
|
|
|
|
# Get base folder count
|
|
base_files = self.get_md_files(base_folder)
|
|
base_count = len(base_files)
|
|
|
|
if base_count == 0:
|
|
errors.append(f"Base folder {base_folder} contains no .md files")
|
|
return False, errors
|
|
|
|
# Check each target folder
|
|
for target_folder in target_folders:
|
|
target_files = self.get_md_files(target_folder)
|
|
target_count = len(target_files)
|
|
|
|
if target_count != base_count:
|
|
# Find missing/extra files
|
|
base_set = set(base_files)
|
|
target_set = set(target_files)
|
|
|
|
missing_in_target = base_set - target_set
|
|
extra_in_target = target_set - base_set
|
|
|
|
error_msg = f"Asset count mismatch in {target_folder}: expected {base_count}, found {target_count}"
|
|
|
|
if missing_in_target:
|
|
error_msg += f"\n Missing files: {', '.join(sorted(missing_in_target))}"
|
|
if extra_in_target:
|
|
error_msg += f"\n Extra files: {', '.join(sorted(extra_in_target))}"
|
|
|
|
errors.append(error_msg)
|
|
|
|
return len(errors) == 0, errors
|
|
|
|
def find_missing_files(self, base_folder: str, target_folder: str) -> List[str]:
|
|
"""
|
|
Find .md files that exist in base folder but not in target folder.
|
|
|
|
Returns:
|
|
List of filenames that need to be translated
|
|
"""
|
|
base_files = set(self.get_md_files(base_folder))
|
|
target_files = set(self.get_md_files(target_folder))
|
|
|
|
return sorted(list(base_files - target_files))
|