Barretenberg
The ZK-SNARK library at the core of Aztec
Loading...
Searching...
No Matches
avm_tx_corpus_analyzer.cpp
Go to the documentation of this file.
1
14
15#include <algorithm>
16#include <cmath>
17#include <filesystem>
18#include <fstream>
19#include <iomanip>
20#include <iostream>
21#include <map>
22#include <numeric>
23#include <vector>
24
25namespace fs = std::filesystem;
26using namespace bb::avm2;
27using namespace bb::avm2::fuzzer;
28
29// Statistics structure for a distribution
30struct Stats {
31 double mean = 0.0;
32 double median = 0.0;
33 size_t mode = 0;
34 std::map<size_t, size_t> histogram; // value -> count
35};
36
37// Compute mean, median, mode and histogram from a vector of values
38Stats compute_stats(const std::vector<size_t>& values)
39{
40 Stats stats;
41
42 if (values.empty()) {
43 return stats;
44 }
45
46 // Histogram
47 for (size_t v : values) {
48 stats.histogram[v]++;
49 }
50
51 // Mean
52 double sum = std::accumulate(values.begin(), values.end(), 0.0);
53 stats.mean = sum / static_cast<double>(values.size());
54
55 // Median
56 std::vector<size_t> sorted = values;
57 std::sort(sorted.begin(), sorted.end());
58 size_t n = sorted.size();
59 if (n % 2 == 0) {
60 stats.median = (static_cast<double>(sorted[n / 2 - 1]) + static_cast<double>(sorted[n / 2])) / 2.0;
61 } else {
62 stats.median = static_cast<double>(sorted[n / 2]);
63 }
64
65 // Mode (value with highest count)
66 size_t max_count = 0;
67 for (const auto& [value, count] : stats.histogram) {
68 if (count > max_count) {
69 max_count = count;
70 stats.mode = value;
71 }
72 }
73
74 return stats;
75}
76
77// Count opcodes in bytecode
78void count_opcodes(const std::vector<uint8_t>& bytecode, std::map<WireOpCode, size_t>& opcode_counts)
79{
80 size_t pos = 0;
81 while (pos < bytecode.size()) {
82 try {
84 opcode_counts[instruction.opcode]++;
86 } catch (const std::exception&) {
87 // Invalid bytecode, stop parsing
88 break;
89 }
90 }
91}
92
93// Get opcode name as string
94std::string opcode_name(WireOpCode opcode)
95{
96 std::ostringstream oss;
97 oss << opcode;
98 return oss.str();
99}
100
101// Print a visual histogram bar
102std::string histogram_bar(size_t count, size_t max_count, size_t max_width = 40)
103{
104 if (max_count == 0) {
105 return "";
106 }
107 size_t bar_len = static_cast<size_t>(
108 std::round(static_cast<double>(count) / static_cast<double>(max_count) * static_cast<double>(max_width)));
109 return std::string(bar_len, '#');
110}
111
112// Print opcode histogram
114{
115 std::cout << "\n=== Opcode Histogram ===\n";
116
117 if (opcode_counts.empty()) {
118 std::cout << "No opcodes found.\n";
119 return;
120 }
121
122 // Find max count for scaling bars
123 size_t max_count = 0;
124 size_t total_instructions = 0;
125 for (const auto& [opcode, count] : opcode_counts) {
126 max_count = std::max(max_count, count);
127 total_instructions += count;
128 }
129
130 // Find max opcode name length for alignment
131 size_t max_name_len = 0;
132 for (const auto& [opcode, count] : opcode_counts) {
133 max_name_len = std::max(max_name_len, opcode_name(opcode).length());
134 }
135
136 // Sort by count (descending)
137 std::vector<std::pair<WireOpCode, size_t>> sorted_counts(opcode_counts.begin(), opcode_counts.end());
138 std::sort(
139 sorted_counts.begin(), sorted_counts.end(), [](const auto& a, const auto& b) { return a.second > b.second; });
140
141 for (const auto& [opcode, count] : sorted_counts) {
142 std::cout << std::setw(static_cast<int>(max_name_len)) << std::left << opcode_name(opcode) << ": "
143 << std::setw(8) << std::right << count << " " << histogram_bar(count, max_count) << "\n";
144 }
145
146 // Summary stats
147 std::cout << "\n=== Opcode Statistics ===\n";
148 std::cout << "Total instructions: " << total_instructions << "\n";
149
150 size_t total_opcodes = static_cast<size_t>(WireOpCode::LAST_OPCODE_SENTINEL);
151 std::cout << "Unique opcodes used: " << opcode_counts.size() << "/" << total_opcodes << "\n";
152
153 // Find and display missing opcodes
154 std::vector<WireOpCode> missing_opcodes;
155 for (size_t i = 0; i < total_opcodes; i++) {
156 auto opcode = static_cast<WireOpCode>(i);
157 if (opcode_counts.find(opcode) == opcode_counts.end()) {
158 missing_opcodes.push_back(opcode);
159 }
160 }
161
162 if (!missing_opcodes.empty()) {
163 std::cout << "Missing opcodes (" << missing_opcodes.size() << "): ";
164 for (size_t i = 0; i < missing_opcodes.size(); i++) {
165 if (i > 0) {
166 std::cout << ", ";
167 }
168 std::cout << opcode_name(missing_opcodes[i]);
169 }
170 std::cout << "\n";
171 }
172
173 if (!sorted_counts.empty()) {
174 std::cout << "Most common: " << opcode_name(sorted_counts.front().first) << " (" << sorted_counts.front().second
175 << ")\n";
176 std::cout << "Least common: " << opcode_name(sorted_counts.back().first) << " (" << sorted_counts.back().second
177 << ")\n";
178 }
179}
180
181// Structure to track multi-phase transaction statistics
189
190// Print enqueued calls statistics
192 const Stats& app_logic,
193 const Stats& teardown,
194 const MultiPhaseStats& multi_phase)
195{
196 std::cout << "\n=== Enqueued Calls Statistics ===\n";
197
198 auto print_stats = [](const std::string& name, const Stats& s) {
199 std::cout << "\n" << name << ":\n";
200 std::cout << " Mean: " << std::fixed << std::setprecision(2) << s.mean << ", Median: " << s.median
201 << ", Mode: " << s.mode << "\n";
202 std::cout << " Histogram: ";
203 for (const auto& [value, count] : s.histogram) {
204 std::cout << value << "(" << count << ") ";
205 }
206 std::cout << "\n";
207 };
208
209 print_stats("Setup Calls", setup);
210 print_stats("App Logic Calls", app_logic);
211 print_stats("Teardown Calls", teardown);
212
213 std::cout << "\nMulti-Phase Transactions:\n";
214 std::cout << " Txs with calls in multiple phases: " << multi_phase.txs_with_multiple_phases << "\n";
215 std::cout << " Txs with setup + app_logic only: " << multi_phase.txs_with_setup_and_app_logic << "\n";
216 std::cout << " Txs with setup + teardown only: " << multi_phase.txs_with_setup_and_teardown << "\n";
217 std::cout << " Txs with app_logic + teardown only: " << multi_phase.txs_with_app_logic_and_teardown << "\n";
218 std::cout << " Txs with all three phases: " << multi_phase.txs_with_all_three_phases << "\n";
219}
220
221int main(int argc, char** argv)
222{
223 // Default corpus path (relative to where we run from)
224 std::string corpus_dir = "corpus/tx";
225 if (argc > 1) {
226 corpus_dir = argv[1];
227 }
228
229 // Check if corpus directory exists
230 if (!fs::exists(corpus_dir)) {
231 std::cerr << "Error: Corpus directory does not exist: " << corpus_dir << "\n";
232 return 1;
233 }
234
235 if (!fs::is_directory(corpus_dir)) {
236 std::cerr << "Error: Not a directory: " << corpus_dir << "\n";
237 return 1;
238 }
239
240 std::cout << "=== AVM Fuzzer Corpus Analysis ===\n";
241 std::cout << "Corpus directory: " << corpus_dir << "\n";
242
243 // Statistics accumulators
244 std::map<WireOpCode, size_t> total_opcode_counts;
245 std::vector<size_t> setup_call_counts;
246 std::vector<size_t> app_logic_call_counts;
247 std::vector<size_t> teardown_call_counts;
248 MultiPhaseStats multi_phase_stats;
249 size_t files_processed = 0;
250 size_t files_failed = 0;
251 size_t total_input_programs = 0;
252
253 // Iterate over all files in the corpus directory
254 for (const auto& entry : fs::directory_iterator(corpus_dir)) {
255 if (!entry.is_regular_file()) {
256 continue;
257 }
258
259 const auto& path = entry.path();
260
261 // Read file contents
262 std::ifstream file(path, std::ios::binary);
263 if (!file) {
264 files_failed++;
265 continue;
266 }
267
269 file.close();
270
271 // Deserialize FuzzerTxData
272 FuzzerTxData tx_data;
273 try {
274 msgpack::unpack(reinterpret_cast<const char*>(buffer.data()), buffer.size()).get().convert(tx_data);
275 } catch (const std::exception& e) {
276 files_failed++;
277 continue;
278 }
279
280 files_processed++;
281
282 // Count enqueued calls
283 size_t setup_count = tx_data.tx.setup_enqueued_calls.size();
284 size_t app_logic_count = tx_data.tx.app_logic_enqueued_calls.size();
285 size_t teardown_count = tx_data.tx.teardown_enqueued_call.has_value() ? 1 : 0;
286
287 setup_call_counts.push_back(setup_count);
288 app_logic_call_counts.push_back(app_logic_count);
289 teardown_call_counts.push_back(teardown_count);
290
291 // Track multi-phase statistics
292 bool has_setup = setup_count > 0;
293 bool has_app_logic = app_logic_count > 0;
294 bool has_teardown = teardown_count > 0;
295 int phases_with_calls = (has_setup ? 1 : 0) + (has_app_logic ? 1 : 0) + (has_teardown ? 1 : 0);
296
297 if (phases_with_calls >= 2) {
298 multi_phase_stats.txs_with_multiple_phases++;
299 }
300 if (has_setup && has_app_logic && !has_teardown) {
301 multi_phase_stats.txs_with_setup_and_app_logic++;
302 }
303 if (has_setup && has_teardown && !has_app_logic) {
304 multi_phase_stats.txs_with_setup_and_teardown++;
305 }
306 if (has_app_logic && has_teardown && !has_setup) {
307 multi_phase_stats.txs_with_app_logic_and_teardown++;
308 }
309 if (has_setup && has_app_logic && has_teardown) {
310 multi_phase_stats.txs_with_all_three_phases++;
311 }
312
313 // Process each input program and build bytecode
314 for (auto& fuzzer_data : tx_data.input_programs) {
315 total_input_programs++;
316
317 try {
318 // Build bytecode using ControlFlow
319 ControlFlow control_flow(fuzzer_data.instruction_blocks);
320 for (const auto& cfg_instruction : fuzzer_data.cfg_instructions) {
321 control_flow.process_cfg_instruction(cfg_instruction);
322 }
323 auto bytecode = control_flow.build_bytecode(fuzzer_data.return_options);
324
325 // Count opcodes in the bytecode
326 count_opcodes(bytecode, total_opcode_counts);
327 } catch (const std::exception&) {
328 // Skip invalid bytecode generation
329 continue;
330 }
331 }
332 }
333
334 // Print summary
335 std::cout << "\nFiles processed: " << files_processed << "\n";
336 std::cout << "Files failed: " << files_failed << "\n";
337 std::cout << "Total input programs: " << total_input_programs << "\n";
338
339 // Print opcode histogram
340 print_opcode_histogram(total_opcode_counts);
341
342 // Print enqueued calls statistics
343 Stats setup_stats = compute_stats(setup_call_counts);
344 Stats app_logic_stats = compute_stats(app_logic_call_counts);
345 Stats teardown_stats = compute_stats(teardown_call_counts);
346 print_enqueued_calls_stats(setup_stats, app_logic_stats, teardown_stats, multi_phase_stats);
347
348 return 0;
349}
std::shared_ptr< Napi::ThreadSafeFunction > bytecode
std::string opcode_name(WireOpCode opcode)
std::string histogram_bar(size_t count, size_t max_count, size_t max_width=40)
int main(int argc, char **argv)
void print_enqueued_calls_stats(const Stats &setup, const Stats &app_logic, const Stats &teardown, const MultiPhaseStats &multi_phase)
Stats compute_stats(const std::vector< size_t > &values)
void print_opcode_histogram(const std::map< WireOpCode, size_t > &opcode_counts)
void count_opcodes(const std::vector< uint8_t > &bytecode, std::map< WireOpCode, size_t > &opcode_counts)
FF a
FF b
uint8_t const size_t length
Definition data_store.hpp:9
uint8_t buffer[RANDOM_BUFFER_SIZE]
Definition engine.cpp:34
Instruction instruction
Instruction deserialize_instruction(std::span< const uint8_t > bytecode, size_t pos)
Parsing of an instruction in the supplied bytecode at byte position pos. This checks that the WireOpC...
Inner sum(Cont< Inner, Args... > const &in)
Definition container.hpp:70
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Definition tuple.hpp:13
std::vector< FuzzerData > input_programs
std::map< size_t, size_t > histogram
std::vector< PublicCallRequestWithCalldata > setup_enqueued_calls
Definition avm_io.hpp:337
std::optional< PublicCallRequestWithCalldata > teardown_enqueued_call
Definition avm_io.hpp:339
std::vector< PublicCallRequestWithCalldata > app_logic_enqueued_calls
Definition avm_io.hpp:338