Files
Wictionary-Data-Parser/scripts/printline.py
2026-02-13 00:10:40 +01:00

39 lines
1.3 KiB
Python

import json
import pathlib
from datetime import datetime
INPUT_FILE_NAME = "fr_raw-wiktextract-data.jsonl"
SCRIPT_DIR = pathlib.Path(__file__).parent
ROOT_DIR = SCRIPT_DIR.parent
INPUT_FILE = ROOT_DIR / "raw_data" / INPUT_FILE_NAME
# --- Configuration ---
START_LINE = 99 # 1-based index (first line is 1)
NUM_LINES = 99 # Number of lines/objects to write
def extract_lines_to_file(file_path, start_line, num_lines):
# Generate timestamp filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_file = file_path.parent / f"{timestamp}.json"
with open(file_path, 'r', encoding='utf-8') as infile:
with open(output_file, 'w', encoding='utf-8') as outfile:
for i, line in enumerate(infile, start=1):
if i >= start_line and i < start_line + num_lines:
try:
element = json.loads(line)
outfile.write(json.dumps(element, indent=2, ensure_ascii=False))
outfile.write('\n')
except json.JSONDecodeError:
outfile.write(f"Error: Line {i} is not valid JSON.\n")
print(f"Output written to: {output_file}")
if __name__ == "__main__":
extract_lines_to_file(INPUT_FILE, START_LINE, NUM_LINES)