import json import pathlib from datetime import datetime INPUT_FILE_NAME = "fr_raw-wiktextract-data.jsonl" SCRIPT_DIR = pathlib.Path(__file__).parent ROOT_DIR = SCRIPT_DIR.parent INPUT_FILE = ROOT_DIR / "raw_data" / INPUT_FILE_NAME # --- Configuration --- START_LINE = 99 # 1-based index (first line is 1) NUM_LINES = 99 # Number of lines/objects to write def extract_lines_to_file(file_path, start_line, num_lines): # Generate timestamp filename timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_file = file_path.parent / f"{timestamp}.json" with open(file_path, 'r', encoding='utf-8') as infile: with open(output_file, 'w', encoding='utf-8') as outfile: for i, line in enumerate(infile, start=1): if i >= start_line and i < start_line + num_lines: try: element = json.loads(line) outfile.write(json.dumps(element, indent=2, ensure_ascii=False)) outfile.write('\n') except json.JSONDecodeError: outfile.write(f"Error: Line {i} is not valid JSON.\n") print(f"Output written to: {output_file}") if __name__ == "__main__": extract_lines_to_file(INPUT_FILE, START_LINE, NUM_LINES)