39 lines
1.3 KiB
Python
39 lines
1.3 KiB
Python
import json
|
|
import pathlib
|
|
from datetime import datetime
|
|
|
|
|
|
INPUT_FILE_NAME = "fr_raw-wiktextract-data.jsonl"
|
|
SCRIPT_DIR = pathlib.Path(__file__).parent
|
|
ROOT_DIR = SCRIPT_DIR.parent
|
|
INPUT_FILE = ROOT_DIR / "raw_data" / INPUT_FILE_NAME
|
|
|
|
|
|
|
|
# --- Configuration ---
|
|
START_LINE = 99 # 1-based index (first line is 1)
|
|
NUM_LINES = 99 # Number of lines/objects to write
|
|
|
|
|
|
def extract_lines_to_file(file_path, start_line, num_lines):
|
|
# Generate timestamp filename
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
output_file = file_path.parent / f"{timestamp}.json"
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as infile:
|
|
with open(output_file, 'w', encoding='utf-8') as outfile:
|
|
for i, line in enumerate(infile, start=1):
|
|
if i >= start_line and i < start_line + num_lines:
|
|
try:
|
|
element = json.loads(line)
|
|
outfile.write(json.dumps(element, indent=2, ensure_ascii=False))
|
|
outfile.write('\n')
|
|
except json.JSONDecodeError:
|
|
outfile.write(f"Error: Line {i} is not valid JSON.\n")
|
|
|
|
print(f"Output written to: {output_file}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
extract_lines_to_file(INPUT_FILE, START_LINE, NUM_LINES)
|