Monte Carlo Benchmarking Engine
High-performance SIMD Monte Carlo engine (AVX2/NEON) with custom memory allocators and perf logging.
 
Loading...
Searching...
No Matches
combine_batch_parquets.py
Go to the documentation of this file.
1# ===========================================
2# combine_batch_parquets.py
3# ===========================================
4
5
36
37
38from scripts.config import DB_PATH
39from pathlib import Path
40import polars as pl
41import sys
42
43if len(sys.argv) != 3:
44 print("Usage: combine_batch_parquets.py <batch_dir> <output_file>")
45 sys.exit(1)
46
47batch_dir = Path(sys.argv[1])
48output_path = Path(sys.argv[2])
49global_db_path = Path(DB_PATH)
50
51# --- 1. Combine batch parquet files ---
52files = [f for f in batch_dir.glob("perf_results_*.parquet") if f.name != output_path.name]
53if not files:
54 print(f"[ERROR] No .parquet files found in {batch_dir}")
55 sys.exit(0)
56
57merged = pl.concat([pl.read_parquet(f) for f in files], how="vertical_relaxed").sort("Timestamp")
58merged.write_parquet(output_path, compression="zstd")
59print(f"[INFO] Merged batch saved: {output_path}")
60
61
62# --- 2. Append to global db.parquet ---
63if global_db_path.exists():
64 db = pl.read_parquet(global_db_path)
65 db = pl.concat([db, merged], how="vertical_relaxed")
66 print("[INFO] Appended to existing db.parquet")
67else:
68 db = merged
69 print("[INFO] Created new db.parquet")
70
71db.write_parquet(global_db_path, compression="zstd")
72print(f"[INFO] Parquet db updated: {global_db_path}")