Monte Carlo Benchmarking Engine
High-performance SIMD Monte Carlo engine (AVX2/NEON) with custom memory allocators and perf logging.
 
Loading...
Searching...
No Matches
schema_to_clickhouse.py
Go to the documentation of this file.
1# ===========================================
2# schema_to_clickhouse.py
3# ===========================================
4
5
48
49from pipeline.schema import SCHEMA
50import polars as pl
51
52def polars_to_clickhouse_dtype(dtype, nullable):
53 """!Converts a Polars data type to a valid ClickHouse column type.
54
55 This function normalizes the input dtype, whether it's a string (e.g., "Utf8"),
56 a Polars dtype class (e.g., pl.Int64), or an instantiated Polars dtype.
57
58 @param dtype The input data type (string, Polars class, or Polars dtype object).
59 @param nullable Whether to wrap the type in ClickHouse's Nullable().
60
61 @return A string representing the ClickHouse-compatible column type.
62
63 @throws ValueError If the dtype is not supported or recognized.
64 """
65 if isinstance(dtype, str):
66 dtype_map = {
67 "String": "String",
68 "Utf8": "String",
69 "Int64": "Int64",
70 "Float64": "Float64",
71 "Datetime": "DateTime64(3)",
72 }
73 ch_type = dtype_map.get(dtype)
74 if ch_type is None:
75 raise ValueError(f"Unsupported string dtype: {dtype}")
76 return f"Nullable({ch_type})" if nullable else ch_type
77
78 if isinstance(dtype, type):
79 dtype = dtype()
80
81 dtype_name = type(dtype).__name__
82
83 match dtype_name:
84 case "Utf8" | "String": ch_type = "String"
85 case "Int64": ch_type = "Int64"
86 case "Float64": ch_type = "Float64"
87 case "Datetime": ch_type = "DateTime64(3)"
88 case _: raise ValueError(f"Unsupported dtype: {dtype_name}")
89
90 return f"Nullable({ch_type})" if nullable else ch_type
91
92
93def generate_clickhouse_table(table_name="benchmark.performance"):
94 """!Generates a CREATE TABLE SQL statement for ClickHouse.
95
96 Converts the SCHEMA dictionary into a fully-typed ClickHouse DDL statement.
97 Each field is converted using polars_to_clickhouse_dtype().
98
99 @param table_name The name of the target ClickHouse table.
100
101 @return A multi-line SQL string to define the table in ClickHouse.
102
103 @note Uses MergeTree engine and orders by (Method, Timestamp).
104 """
105 lines = []
106
107 for name, (dtype, nullable) in SCHEMA.items():
108 ch_type = polars_to_clickhouse_dtype(dtype, nullable)
109 lines.append(f" `{name}` {ch_type},")
110
111 return f"""CREATE TABLE IF NOT EXISTS {table_name} (
112{chr(10).join(lines).rstrip(',')}
113) ENGINE = MergeTree()
114ORDER BY (Method, Timestamp);"""
115
116
117if __name__ == "__main__":
polars_to_clickhouse_dtype(dtype, nullable)
Converts a Polars data type to a valid ClickHouse column type.
generate_clickhouse_table(table_name="benchmark.performance")
Generates a CREATE TABLE SQL statement for ClickHouse.