Monte Carlo Benchmarking Engine
High-performance SIMD Monte Carlo engine (AVX2/NEON) with custom memory allocators and perf logging.
 
Loading...
Searching...
No Matches
insert_to_clickhouse.py
Go to the documentation of this file.
1# ===========================================
2# insert_to_clickhouse.py
3# ===========================================
4
5
25
26
27import argparse
28import polars as pl
29from clickhouse_driver import Client
30from pipeline.schema import SCHEMA
31from pipeline.utils import safe_vector_cast
32from scripts.config import *
33
34
35def insert_batch(batch_id: str) -> None:
36 """!Filters and inserts a batch of records into ClickHouse.
37
38 Loads data from the Parquet file at DB_PATH, filters by BatchID,
39 and inserts the resulting records into the `benchmark.performance` table.
40
41 @param batch_id The BatchID to filter the dataset on.
42
43 @throws Exception If ClickHouse insert fails.
44 """
45 df = pl.read_parquet(DB_PATH)
46 df = df.filter(pl.col("BatchID") == batch_id)
47
48 # Optional: enforce schema casting
49 df = safe_vector_cast(df, SCHEMA)
50
51 client = Client(
52 host=CLICKHOUSE_HOST,
53 port=CLICKHOUSE_TCP_PORT,
54 user=CLICKHOUSE_USER,
55 password=CLICKHOUSE_PASSWORD
56 )
57
58 records = df.to_dicts()
59
60 try:
61 client.execute("INSERT INTO benchmark.performance VALUES", records)
62 except Exception as e:
63 print(f"[ERROR] Error inserting records into ClickHouse: {e}")
64 raise
65
66 print(f"[INFO] Inserted {len(records)} records into ClickHouse for batch '{batch_id}'.")
67
68
69def main():
70 """!CLI entrypoint for inserting a batch into ClickHouse.
71
72 Parses --batchid from command-line arguments and performs the insert.
73 """
74 parser = argparse.ArgumentParser(description="Insert benchmarking logs into ClickHouse")
75 parser.add_argument("--batchid", type=str, required=True, help="Batch ID to ingest")
76 args = parser.parse_args()
77
78 insert_batch(args.batchid)
79
80
81if __name__ == "__main__":
82 main()
None insert_batch(str batch_id)
Filters and inserts a batch of records into ClickHouse.
main()
CLI entrypoint for inserting a batch into ClickHouse.