39from pathlib
import Path
44 print(
"Usage: combine_batch_parquets.py <batch_dir> <output_file>")
47batch_dir = Path(sys.argv[1])
48output_path = Path(sys.argv[2])
49global_db_path = Path(DB_PATH)
52files = [f
for f
in batch_dir.glob(
"perf_results_*.parquet")
if f.name != output_path.name]
54 print(f
"[ERROR] No .parquet files found in {batch_dir}")
57merged = pl.concat([pl.read_parquet(f)
for f
in files], how=
"vertical_relaxed").sort(
"Timestamp")
58merged.write_parquet(output_path, compression=
"zstd")
59print(f
"[INFO] Merged batch saved: {output_path}")
63if global_db_path.exists():
64 db = pl.read_parquet(global_db_path)
65 db = pl.concat([db, merged], how=
"vertical_relaxed")
66 print(
"[INFO] Appended to existing db.parquet")
69 print(
"[INFO] Created new db.parquet")
71db.write_parquet(global_db_path, compression=
"zstd")
72print(f
"[INFO] Parquet db updated: {global_db_path}")