Monte Carlo Benchmarking Engine
High-performance SIMD Monte Carlo engine (AVX2/NEON) with custom memory allocators and perf logging.
 
Loading...
Searching...
No Matches
pipeline.gen_perf_parquet_logs Namespace Reference

Functions

 parse_args ()
 
 update_parquet (args)
 

Variables

 args = parse_args()
 

Function Documentation

◆ parse_args()

pipeline.gen_perf_parquet_logs.parse_args ( )

Definition at line 57 of file gen_perf_parquet_logs.py.

57def parse_args():
58 parser = argparse.ArgumentParser(description="Parse perf stats for Monte Carlo benchmarking.")
59 parser.add_argument("--out_path", required=True, help="Output .parquet file path")
60
61 parser.add_argument("--wall_time_s", required=True, help="Wall time (seconds)")
62 parser.add_argument("--wall_time_ns", required=True, help="Wall time (nanoseconds)")
63
64 parser.add_argument("--timestamp", required=True, help="Timestamp for the benchmark run")
65 parser.add_argument("--batchid", required=True, help="Unique ID for this batch of trials")
66 parser.add_argument("--method", required=True, help="Benchmarking method (e.g., SIMD, Pool, etc.)")
67
68 parser.add_argument("--trials", required=True, help="Number of trials run")
69 parser.add_argument("--cycles", required=True, help="CPU cycles")
70 parser.add_argument("--instr", required=True, help="Instructions executed")
71 parser.add_argument("--ipc", required=True, help="Instructions per cycle")
72
73 parser.add_argument("--cache_loads", required=True, help="Cache loads")
74 parser.add_argument("--cache_miss", required=True, help="Cache misses")
75
76 parser.add_argument("--l1_loads", required=True, help="L1 data cache loads")
77 parser.add_argument("--l1_misses", required=True, help="L1 data cache misses")
78
79 parser.add_argument("--l2_loads", required=True, help="L2 data cache loads")
80 parser.add_argument("--l2_misses", required=True, help="L2 data cache misses")
81
82 parser.add_argument("--l3_loads", required=True, help="L3 data cache loads")
83 parser.add_argument("--l3_misses", required=True, help="L3 data cache misses")
84
85 parser.add_argument("--tlb_loads", required=True, help="TLB loads")
86 parser.add_argument("--tlb_misses", required=True, help="TLB misses")
87
88 parser.add_argument("--branch_instr", required=True, help="Branch instructions")
89 parser.add_argument("--branch_misses", required=True, help="Branch misses")
90
91 parser.add_argument("--miss_per_trial", required=True, help="Cache+TLB misses per trial")
92 parser.add_argument("--cycles_per_trial", required=True, help="Cycles per trial")
93
94 return parser.parse_args()
95

◆ update_parquet()

pipeline.gen_perf_parquet_logs.update_parquet ( args)

Definition at line 96 of file gen_perf_parquet_logs.py.

96def update_parquet(args):
97 matches = sorted(glob(f"db/logs/batch_{args.batchid}_*"))
98 if not matches:
99 raise FileNotFoundError(f"No batch directory found for batch ID {args.batchid}")
100 batch_dir = Path(matches[-1])
101
102 parquet_path = batch_dir / f"perf_results_{args.method}_{args.timestamp}_{args.batchid}.parquet"
103
104 # 1. Build the raw row (match SCHEMA field names exactly)
105 row = {
106 "Timestamp": args.timestamp,
107 "BatchID": args.batchid,
108 "Method": args.method,
109 "Trials": args.trials,
110 "Cycles": args.cycles,
111 "Instructions": args.instr,
112 "IPC": args.ipc,
113 "Wall Time (s)": args.wall_time_s,
114 "Wall Time (ns)": args.wall_time_ns,
115 "Cache Loads": args.cache_loads,
116 "Cache Misses": args.cache_miss,
117 "Cache Miss %": safe_div_percent(args.cache_miss, args.cache_loads),
118 "L1 Loads": args.l1_loads,
119 "L1 Misses": args.l1_misses,
120 "L1 Miss %": safe_div_percent(args.l1_misses, args.l1_loads),
121 "L2 Loads": args.l2_loads,
122 "L2 Misses": args.l2_misses,
123 "L2 Miss %": safe_div_percent(args.l2_misses, args.l2_loads),
124 "L3 Loads": args.l3_loads,
125 "L3 Misses": args.l3_misses,
126 "L3 Miss %": safe_div_percent(args.l3_misses, args.l3_loads),
127 "TLB Loads": args.tlb_loads,
128 "TLB Misses": args.tlb_misses,
129 "TLB Miss %": safe_div_percent(args.tlb_misses, args.tlb_loads),
130 "Branch Instructions": args.branch_instr,
131 "Branch Misses": args.branch_misses,
132 "Branch Miss %": safe_div_percent(args.branch_misses, args.branch_instr),
133 "Misses/Trial": args.miss_per_trial,
134 "Cycles/Trial": args.cycles_per_trial,
135 }
136
137 row = {k: (None if v == "NA" else v) for k, v in row.items()}
138
139 # 2. Create DataFrame and cast using schema
140 df = pl.DataFrame([row])
141
142 # 3. Convert string timestamp to datetime
143 df = df.with_columns([
144 pl.col("Timestamp").str.strptime(pl.Datetime("ms"), "%Y-%m-%d %H:%M:%S", strict=False)
145 ])
146
147 df = safe_vector_cast(df, SCHEMA)
148
149 df.write_parquet(parquet_path, compression="zstd")
150
151 print(f"[INFO] Parquet saved: {parquet_path}")
152

Variable Documentation

◆ args

pipeline.gen_perf_parquet_logs.args = parse_args()

Definition at line 154 of file gen_perf_parquet_logs.py.