migrate to gitea
This commit is contained in:
136
md_processor.py
Normal file
136
md_processor.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
Markdown file processor for translation
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Dict, List, Tuple
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class MDTranslationItem:
|
||||
"""Represents a single markdown file translation item"""
|
||||
filename: str
|
||||
content: str
|
||||
relative_path: str = "" # For subdirectories if needed
|
||||
|
||||
|
||||
class MDProcessor:
|
||||
"""Markdown file processor for translation"""
|
||||
|
||||
def __init__(self, file_extension: str = ".md"):
|
||||
self.file_extension = file_extension
|
||||
|
||||
def get_md_files(self, folder_path: str) -> List[str]:
|
||||
"""
|
||||
Get all .md files in the specified folder (non-recursive).
|
||||
Returns a sorted list of filenames.
|
||||
"""
|
||||
if not os.path.exists(folder_path):
|
||||
return []
|
||||
|
||||
md_files = []
|
||||
for item in os.listdir(folder_path):
|
||||
item_path = os.path.join(folder_path, item)
|
||||
if os.path.isfile(item_path) and item.endswith(self.file_extension):
|
||||
md_files.append(item)
|
||||
|
||||
return sorted(md_files)
|
||||
|
||||
def load_md_file(self, file_path: str) -> str:
|
||||
"""Load and return the content of a markdown file"""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
except Exception as e:
|
||||
print(f"Error loading markdown file {file_path}: {e}")
|
||||
return ""
|
||||
|
||||
def save_md_file(self, content: str, file_path: str):
|
||||
"""Save content to a markdown file"""
|
||||
try:
|
||||
# Create parent directory if it doesn't exist
|
||||
parent_dir = os.path.dirname(file_path)
|
||||
if parent_dir and not os.path.exists(parent_dir):
|
||||
os.makedirs(parent_dir, exist_ok=True)
|
||||
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
except Exception as e:
|
||||
print(f"Error saving markdown file {file_path}: {e}")
|
||||
|
||||
def extract_content(self, folder_path: str) -> Dict[str, MDTranslationItem]:
|
||||
"""
|
||||
Extract content from all .md files in the folder.
|
||||
Returns a dict mapping filename to MDTranslationItem.
|
||||
"""
|
||||
items = {}
|
||||
md_files = self.get_md_files(folder_path)
|
||||
|
||||
for filename in md_files:
|
||||
file_path = os.path.join(folder_path, filename)
|
||||
content = self.load_md_file(file_path)
|
||||
if content: # Only add if we successfully loaded content
|
||||
items[filename] = MDTranslationItem(
|
||||
filename=filename,
|
||||
content=content
|
||||
)
|
||||
|
||||
return items
|
||||
|
||||
def check_asset_counts(self, base_folder: str, target_folders: List[str]) -> Tuple[bool, List[str]]:
|
||||
"""
|
||||
Check that all target folders have the same number of .md assets as the base folder.
|
||||
|
||||
Args:
|
||||
base_folder: Path to the base folder (source of truth)
|
||||
target_folders: List of paths to target folders
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, list of error messages)
|
||||
"""
|
||||
errors = []
|
||||
|
||||
# Get base folder count
|
||||
base_files = self.get_md_files(base_folder)
|
||||
base_count = len(base_files)
|
||||
|
||||
if base_count == 0:
|
||||
errors.append(f"Base folder {base_folder} contains no .md files")
|
||||
return False, errors
|
||||
|
||||
# Check each target folder
|
||||
for target_folder in target_folders:
|
||||
target_files = self.get_md_files(target_folder)
|
||||
target_count = len(target_files)
|
||||
|
||||
if target_count != base_count:
|
||||
# Find missing/extra files
|
||||
base_set = set(base_files)
|
||||
target_set = set(target_files)
|
||||
|
||||
missing_in_target = base_set - target_set
|
||||
extra_in_target = target_set - base_set
|
||||
|
||||
error_msg = f"Asset count mismatch in {target_folder}: expected {base_count}, found {target_count}"
|
||||
|
||||
if missing_in_target:
|
||||
error_msg += f"\n Missing files: {', '.join(sorted(missing_in_target))}"
|
||||
if extra_in_target:
|
||||
error_msg += f"\n Extra files: {', '.join(sorted(extra_in_target))}"
|
||||
|
||||
errors.append(error_msg)
|
||||
|
||||
return len(errors) == 0, errors
|
||||
|
||||
def find_missing_files(self, base_folder: str, target_folder: str) -> List[str]:
|
||||
"""
|
||||
Find .md files that exist in base folder but not in target folder.
|
||||
|
||||
Returns:
|
||||
List of filenames that need to be translated
|
||||
"""
|
||||
base_files = set(self.get_md_files(base_folder))
|
||||
target_files = set(self.get_md_files(target_folder))
|
||||
|
||||
return sorted(list(base_files - target_files))
|
||||
Reference in New Issue
Block a user