40from pathlib
import Path
41from clickhouse_driver
import Client
51 """!Prints an info message to stdout.
53 \param msg The message string.
56 print(f
"[INFO] {msg}")
59 """!Prints an error message to stdout.
60 @param msg The message string.
62 print(f
"[ERROR] {msg}")
65 """!Executes a shell command with logging.
66 @param cmd The command string to run.
67 @throws subprocess.CalledProcessError if the command fails.
69 log(f
"Running command: {cmd}")
70 subprocess.run(cmd, shell=
True, check=
True)
73 """!Waits for ClickHouse server to become ready, retries for up to 30 attempts.
74 @return A connected ClickHouse Client instance.
75 @throws RuntimeError if ClickHouse doesn't respond after all attempts.
77 log(
"Waiting for ClickHouse...")
79 for attempt
in range(30):
81 log(f
"Connecting to ClickHouse at {CLICKHOUSE_HOST}:{CLICKHOUSE_TCP_PORT} as user '{CLICKHOUSE_USER}'")
82 client = Client(host=CLICKHOUSE_HOST, port=CLICKHOUSE_TCP_PORT, user=CLICKHOUSE_USER, password=CLICKHOUSE_PASSWORD)
83 client.execute(
"SELECT 1")
84 log(
"ClickHouse is ready.")
87 except Exception
as e:
88 log(f
"Attempt {attempt+1}/30 failed: {e}")
91 raise RuntimeError(
"ClickHouse did not start after 30 attempts.")
94 """!Creates the ClickHouse database and performance table if they don't exist.
95 @param client The connected ClickHouse client.
97 log(
"Setting up ClickHouse database and table.")
98 client.execute(
"CREATE DATABASE IF NOT EXISTS benchmark")
99 client.execute(generate_clickhouse_table())
100 log(
"Schema loaded into ClickHouse.")
103 """!Wipes previous data and loads data from a Parquet file into ClickHouse.
105 Casts data using the shared schema and inserts it into the benchmark.performance table.
106 Existing table data will be truncated.
108 @param client The connected ClickHouse client.
109 @param db_path Path to the Parquet file to load.
110 @throws FileNotFoundError if the file does not exist.
111 @throws Exception if insertion fails.
113 if not db_path.exists():
114 raise FileNotFoundError(f
"{db_path} not found")
116 log(f
"Loading data from: {db_path}")
117 df = pl.read_parquet(db_path)
118 df = safe_vector_cast(df, SCHEMA)
120 records = df.to_dicts()
123 log(
"No records to insert.")
127 client.execute(
"TRUNCATE TABLE benchmark.performance")
128 client.execute(
"INSERT INTO benchmark.performance VALUES", records)
129 except Exception
as e:
130 err(f
"Error inserting records into ClickHouse: {e}")
135 """!CLI entrypoint. Parses arguments and coordinates Docker, schema setup, and data loading.
137 parser = argparse.ArgumentParser(description=
"Setup and load benchmark data into ClickHouse")
138 parser.add_argument(
"--load-from-sample", action=
"store_true", help=
"Setup and restore from db_sample.parquet")
139 parser.add_argument(
"--load-from-db", action=
"store_true", help=
"Setup and use existing db.parquet")
140 parser.add_argument(
"--docker-compose", action=
"store_true", help=
"Use Docker Compose to start ClickHouse & Grafana")
141 parser.add_argument(
"--setup-clickhouse", action=
"store_true", help=
"Setup ClickHouse database and table")
143 args = parser.parse_args()
145 os.makedirs(
"db", exist_ok=
True)
146 os.makedirs(
"samples", exist_ok=
True)
147 os.makedirs(
"db/logs", exist_ok=
True)
149 if args.docker_compose:
150 log(
"Setting up docker instance to be ready for ClickHouse database and Grafana.")
156 if args.setup_clickhouse
or args.docker_compose:
159 if args.load_from_sample:
160 log(f
"Loading from sample data: {SAMPLE_PATH}")
161 shutil.copy(SAMPLE_PATH, DB_PATH)
164 elif args.load_from_db:
165 log(f
"Loading from existing data: {DB_PATH}")
168 if args.load_from_sample
or args.load_from_db:
172if __name__ ==
"__main__":
setup_clickhouse(Client client)
Creates the ClickHouse database and performance table if they don't exist.
Client wait_for_clickhouse()
Waits for ClickHouse server to become ready, retries for up to 30 attempts.
err(str msg)
Prints an error message to stdout.
run_command(str cmd)
Executes a shell command with logging.
load_db_to_clickhouse(Client client, Path db_path)
Wipes previous data and loads data from a Parquet file into ClickHouse.
log(str msg)
Prints an info message to stdout.