From fa7fc6f98e5bc01b01e29dfc5becca37bba2ac31 Mon Sep 17 00:00:00 2001 From: MirkoCalvi Date: Tue, 3 Jun 2025 17:57:10 +0200 Subject: [PATCH 1/9] progresses on OneOp tests with new codegen --- build.zig | 75 ++++++++++--------- .../op_softmax.zig | 7 +- src/IR_codegen/main.zig | 1 + src/IR_codegen/utils.zig | 31 ++++++++ src/IR_graph/TODO | 25 +++++++ src/IR_graph/op_union/op_union.zig | 25 ++++++- src/IR_graph/op_union/operators/op_gemm.zig | 3 +- src/IR_graph/op_union/operators/op_slice.zig | 5 +- .../op_union/operators/op_softmax.zig | 8 +- .../Python-ONNX/available_operations.txt | 6 +- tests/CodeGen/oneOpModelGenerator.zig | 19 ++--- 11 files changed, 144 insertions(+), 61 deletions(-) diff --git a/build.zig b/build.zig index 359d9060..e5862b1a 100644 --- a/build.zig +++ b/build.zig @@ -35,6 +35,10 @@ pub fn build(b: *std.Build) void { IR_mod.addImport("zant", zant_mod); IR_mod.addImport("codegen", codeGen_mod); + const IR_codeGen_mod = b.createModule(.{ .root_source_file = b.path("src/IR_codegen/IR_codegen.zig") }); + IR_codeGen_mod.addImport("zant", zant_mod); + IR_codeGen_mod.addImport("IR_zant", IR_mod); + const Img2Tens_mod = b.createModule(.{ .root_source_file = b.path("src/ImageToTensor/imageToTensor.zig") }); Img2Tens_mod.addImport("zant", zant_mod); Img2Tens_mod.addImport("codegen", codeGen_mod); @@ -105,19 +109,6 @@ pub fn build(b: *std.Build) void { const comm_option = b.option(bool, "comm", "Codegen with comments") orelse false; const dynamic_option = b.option(bool, "dynamic", "Dynamic allocation") orelse false; - // Define codegen options - const codegen_options = b.addOptions(); // Model name option - codegen_options.addOption([]const u8, "model", model_name_option); - codegen_options.addOption([]const u8, "model_path", model_path_option); - codegen_options.addOption([]const u8, "generated_path", generated_path_option); - codegen_options.addOption([]const u8, "user_tests", user_tests_option); - codegen_options.addOption(bool, "log", log_option); - codegen_options.addOption([]const u8, "shape", shape_option); - codegen_options.addOption([]const u8, "type", input_type_option); - codegen_options.addOption([]const u8, "output_type", output_type_option); - codegen_options.addOption(bool, "comm", comm_option); - codegen_options.addOption(bool, "dynamic", dynamic_option); - //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // Name and path of the model @@ -197,6 +188,18 @@ pub fn build(b: *std.Build) void { IR_codegen_step.dependOn(&IR_codegen_cmd.step); // ************************************************CODEGEN EXECUTABLE************************************************ + // Define codegen options + const codegen_options = b.addOptions(); // Model name option + codegen_options.addOption([]const u8, "model", model_name_option); + codegen_options.addOption([]const u8, "model_path", model_path_option); + codegen_options.addOption([]const u8, "generated_path", generated_path_option); + codegen_options.addOption([]const u8, "user_tests", user_tests_option); + codegen_options.addOption(bool, "log", log_option); + codegen_options.addOption([]const u8, "shape", shape_option); + codegen_options.addOption([]const u8, "type", input_type_option); + codegen_options.addOption([]const u8, "output_type", output_type_option); + codegen_options.addOption(bool, "comm", comm_option); + codegen_options.addOption(bool, "dynamic", dynamic_option); // Define the main executable with target architecture and optimization settings. const codeGen_exe = b.addExecutable(.{ @@ -308,15 +311,15 @@ pub fn build(b: *std.Build) void { }); oneop_codegen_exe.root_module.addImport("zant", zant_mod); - codeGen_mod.addOptions("codegen_options", codegen_options); - oneop_codegen_exe.root_module.addImport("codegen", codeGen_mod); + IR_codeGen_mod.addOptions("codegen_options", IRC_options); + oneop_codegen_exe.root_module.addImport("IR_codegen", IR_codeGen_mod); oneop_codegen_exe.linkLibC(); const run_oneop_codegen_exe = b.addRunArtifact(oneop_codegen_exe); const step_test_oneOp_codegen = b.step("test-codegen-gen", "Run generated library tests"); step_test_oneOp_codegen.dependOn(&run_oneop_codegen_exe.step); - // ************************************************ test_all_oneOp ************************************************ + // ************************************************ ONEOP TESTING ************************************************ // Setup test_all_oneOp const test_all_oneOp = b.addTest(.{ @@ -327,8 +330,8 @@ pub fn build(b: *std.Build) void { }); test_all_oneOp.root_module.addImport("zant", zant_mod); - codeGen_mod.addOptions("codegen_options", codegen_options); - test_all_oneOp.root_module.addImport("codegen", codeGen_mod); + IR_codeGen_mod.addOptions("codegen_options", IRC_options); + test_all_oneOp.root_module.addImport("codegen", IR_codeGen_mod); test_all_oneOp.linkLibC(); const run_test_all_oneOp = b.addRunArtifact(test_all_oneOp); @@ -345,25 +348,25 @@ pub fn build(b: *std.Build) void { // ************************************************ // Write Op Test - const write_op_test = b.addExecutable(.{ - .name = "test_write_op", - .root_source_file = b.path("tests/IR_graph/test_write_op.zig"), - .target = target, - .optimize = optimize, - }); - - write_op_test.root_module.addImport("zant", zant_mod); - write_op_test.root_module.addImport("codegen", codeGen_mod); - write_op_test.root_module.addImport("IR_zant", IR_mod); - write_op_test.linkLibC(); - - const run_write_op_test = b.addRunArtifact(write_op_test); - if (b.args) |args| { - run_write_op_test.addArgs(args); - } + // const write_op_test = b.addExecutable(.{ + // .name = "test_write_op", + // .root_source_file = b.path("tests/IR_graph/test_write_op.zig"), + // .target = target, + // .optimize = optimize, + // }); + + // write_op_test.root_module.addImport("zant", zant_mod); + // write_op_test.root_module.addImport("codegen", codeGen_mod); + // write_op_test.root_module.addImport("IR_zant", IR_mod); + // write_op_test.linkLibC(); + + // const run_write_op_test = b.addRunArtifact(write_op_test); + // if (b.args) |args| { + // run_write_op_test.addArgs(args); + // } - const write_op_step = b.step("run-test-write-op", "Run the write_op test on a model"); - write_op_step.dependOn(&run_write_op_test.step); + // const write_op_step = b.step("run-test-write-op", "Run the write_op test on a model"); + // write_op_step.dependOn(&run_write_op_test.step); // ************************************************ // Benchmark diff --git a/src/Core/Tensor/TensorMath/lib_activation_function_math/op_softmax.zig b/src/Core/Tensor/TensorMath/lib_activation_function_math/op_softmax.zig index 5de1a0d5..710f174b 100644 --- a/src/Core/Tensor/TensorMath/lib_activation_function_math/op_softmax.zig +++ b/src/Core/Tensor/TensorMath/lib_activation_function_math/op_softmax.zig @@ -28,8 +28,10 @@ pub fn softmax(comptime T: anytype, tensor: *Tensor(T)) !Tensor(T) { } pub inline fn lean_softmax(comptime T: anytype, input: *Tensor(T), output: *Tensor(T)) !void { - const rows = input.shape[0]; - const cols = input.shape[1]; + const n_dims = input.shape.len; + + const rows = input.shape[n_dims - 2]; + const cols = input.shape[n_dims - 1]; var max_val: T = undefined; var sum_of_exp: T = 0.0; @@ -63,6 +65,7 @@ pub inline fn lean_softmax(comptime T: anytype, input: *Tensor(T), output: *Tens } pub fn softmax_backward(comptime T: anytype, dL_dX: *Tensor(T), softmax_output: *Tensor(T)) !void { + //checks if (dL_dX.size <= 0) return TensorError.ZeroSizeTensor; if (dL_dX.size != softmax_output.size) return TensorMathError.InputTensorDifferentSize; diff --git a/src/IR_codegen/main.zig b/src/IR_codegen/main.zig index 09f32fad..ef80df4f 100644 --- a/src/IR_codegen/main.zig +++ b/src/IR_codegen/main.zig @@ -8,6 +8,7 @@ const onnx = zant.onnx; const codegen_options = @import("codegen_options"); const codeGen_tests = @import("tests_writer.zig"); +// called by "zig build IR_codegen" optionals:" -Dlog -Dmodel="name" -D ..." see build.zig" pub fn main() !void { std.debug.print("\n\ncodegenOptions: ", .{}); std.debug.print("\n model:{s} ", .{codegen_options.IR_model}); diff --git a/src/IR_codegen/utils.zig b/src/IR_codegen/utils.zig index 8c531771..e9925371 100644 --- a/src/IR_codegen/utils.zig +++ b/src/IR_codegen/utils.zig @@ -373,3 +373,34 @@ pub fn parseI64RawData(raw_data: []const u8) ![]i64 { return result; } + +// ----------------- FILE MANAGEMENT ----------------- +// Copy file from src to dst +pub fn copyFile(src_path: []const u8, dst_path: []const u8) !void { + var src_file = try std.fs.cwd().openFile(src_path, .{}); + defer src_file.close(); + + var dst_file = try std.fs.cwd().createFile(dst_path, .{}); + defer dst_file.close(); + + // Use a buffer to copy in chunks + var buf: [4096]u8 = undefined; + while (true) { + const bytes_read = try src_file.read(&buf); + if (bytes_read == 0) break; + _ = try dst_file.write(buf[0..bytes_read]); + } +} + +// Read the user_tests json file and return a list of test cases +pub fn loadUserTests(comptime T: type, user_tests_path: []const u8) !std.json.Parsed([]tests.UserTest(T)) { + const user_tests_file = try std.fs.cwd().openFile(user_tests_path, .{}); + defer user_tests_file.close(); + + const user_tests_content: []const u8 = try user_tests_file.readToEndAlloc(allocator, 1024 * 1024); + defer allocator.free(user_tests_content); + + const parsed_user_tests = try std.json.parseFromSlice([]tests.UserTest(T), allocator, user_tests_content, .{}); + + return parsed_user_tests; +} diff --git a/src/IR_graph/TODO b/src/IR_graph/TODO index f81b55f1..4ae402c5 100644 --- a/src/IR_graph/TODO +++ b/src/IR_graph/TODO @@ -1,6 +1,31 @@ +IR_zant - for each ZantNode: - getInputs() - getOutputs() - once done above complete write_op_info() +- implement mean node( first check Merge MirkoGraphZant -> GraphZant ) +- implement slice node( first check Merge MirkoGraphZant -> GraphZant ) +- implement squeeze node( first check Merge MirkoGraphZant -> GraphZant ) +IR_CODEGEN - dynamicAllocation +- when running : `zig build IR_codegen -Dlog` and `zig build test-generated-lib` you obtain + [test_model] (info): + User tests are disabled for this model + It is related to the import of "codegen" inside *** GENERATED LIBRARY TESTS *** in build.zig + +- check that the passed that the parameters passed to IR_codegen.generate_from_*() are correct, string format, no wite spaces, + generated_path ending with "/", ecc... +- aggregate build steps where possible, too many . Write documentation about it, describin what does each build command +- error on Split op when launching "zig build test-codegen-gen -Dlog", now Split is excluded from "available_operations" + .......... file created, path:generated/oneOpModels/Split/static_parameters.zig + info: + .......... file created, path:generated/oneOpModels/Split/lib_Split.zig + thread 26012 panic: reached unreachable code + /home/mirko/Documents/zig/zig-linux-x86_64-0.14.0/lib/std/posix.zig:1267:23: 0x109187e in write (oneop_codegen) + .FAULT => unreachable, + +- codegenerating "T" as the input type of the lean math op is not possible anymore + +OTHER : +- check missing op between: python generator, available_operations.txt, IR_graph operators \ No newline at end of file diff --git a/src/IR_graph/op_union/op_union.zig b/src/IR_graph/op_union/op_union.zig index 2df27ef1..91ec26b3 100644 --- a/src/IR_graph/op_union/op_union.zig +++ b/src/IR_graph/op_union/op_union.zig @@ -16,8 +16,10 @@ pub const Op_union = union(enum) { div: operators.Div, elu: operators.Elu, flatten: operators.Flatten, + floor: operators.Floor, gather: operators.Gather, gemm: operators.Gemm, + gelu: operators.Gelu, identity: operators.Identity, leakyRelu: operators.LeakyRelu, matMul: operators.MatMul, @@ -33,6 +35,7 @@ pub const Op_union = union(enum) { slice: operators.Slice, softmax: operators.Softmax, split: operators.Split, + sqrt: operators.Sqrt, sub: operators.Sub, tanh: operators.Tanh, transpose: operators.Transpose, @@ -60,10 +63,14 @@ pub const Op_union = union(enum) { return Op_union{ .elu = try operators.Elu.init(nodeProto) }; } else if (std.mem.eql(u8, op_type, "Flatten")) { return Op_union{ .flatten = try operators.Flatten.init(nodeProto) }; + } else if (std.mem.eql(u8, op_type, "Floor")) { + return Op_union{ .floor = try operators.Floor.init(nodeProto) }; } else if (std.mem.eql(u8, op_type, "Gather")) { return Op_union{ .gather = try operators.Gather.init(nodeProto) }; } else if (std.mem.eql(u8, op_type, "Gemm")) { return Op_union{ .gemm = try operators.Gemm.init(nodeProto) }; + } else if (std.mem.eql(u8, op_type, "Gelu")) { + return Op_union{ .gelu = try operators.Gelu.init(nodeProto) }; } else if (std.mem.eql(u8, op_type, "Identity")) { return Op_union{ .identity = try operators.Identity.init(nodeProto) }; } else if (std.mem.eql(u8, op_type, "LeakyRelu")) { @@ -94,6 +101,8 @@ pub const Op_union = union(enum) { return Op_union{ .softmax = try operators.Softmax.init(nodeProto) }; } else if (std.mem.eql(u8, op_type, "Split")) { return Op_union{ .split = try operators.Split.init(nodeProto) }; + } else if (std.mem.eql(u8, op_type, "Sqrt")) { + return Op_union{ .sqrt = try operators.Sqrt.init(nodeProto) }; } else if (std.mem.eql(u8, op_type, "Sub")) { return Op_union{ .sub = try operators.Sub.init(nodeProto) }; } else if (std.mem.eql(u8, op_type, "Tanh")) { @@ -119,8 +128,10 @@ pub const Op_union = union(enum) { .div => |ptr| return ptr.get_output_shape(), .elu => |ptr| return ptr.get_output_shape(), .flatten => |ptr| return ptr.get_output_shape(), + .floor => |ptr| ptr.get_output_shape(), .gather => |ptr| return ptr.get_output_shape(), .gemm => |ptr| return ptr.get_output_shape(), + .gelu => |ptr| return ptr.get_output_shape(), .identity => |ptr| return ptr.get_output_shape(), .leakyRelu => |ptr| return ptr.get_output_shape(), .matMul => |ptr| return ptr.get_output_shape(), @@ -136,6 +147,7 @@ pub const Op_union = union(enum) { .slice => |ptr| return ptr.get_output_shape(), .softmax => |ptr| return ptr.get_output_shape(), .split => |ptr| return ptr.get_output_shape(), + .sqrt => |ptr| return ptr.get_output_shape(), .sub => |ptr| return ptr.get_output_shape(), .tanh => |ptr| return ptr.get_output_shape(), .transpose => |ptr| return ptr.get_output_shape(), @@ -158,8 +170,10 @@ pub const Op_union = union(enum) { .div => |ptr| ptr.get_output_tensor(), .elu => |ptr| ptr.get_output_tensor(), .flatten => |ptr| ptr.get_output_tensor(), + .floor => |ptr| ptr.get_output_tensor(), .gather => |ptr| ptr.get_output_tensor(), .gemm => |ptr| ptr.get_output_tensor(), + .gelu => |ptr| ptr.get_output_tensor(), .identity => |ptr| ptr.get_output_tensor(), .leakyRelu => |ptr| ptr.get_output_tensor(), .matMul => |ptr| ptr.get_output_tensor(), @@ -175,6 +189,7 @@ pub const Op_union = union(enum) { .slice => |ptr| ptr.get_output_tensor(), .softmax => |ptr| ptr.get_output_tensor(), .split => |ptr| ptr.get_output_tensor(), + .sqrt => |ptr| ptr.get_output_tensor(), .sub => |ptr| ptr.get_output_tensor(), .tanh => |ptr| ptr.get_output_tensor(), .transpose => |ptr| ptr.get_output_tensor(), @@ -197,8 +212,10 @@ pub const Op_union = union(enum) { .div => |ptr| try ptr.write_op(writer), .elu => |ptr| try ptr.write_op(writer), .flatten => |ptr| try ptr.write_op(writer), + .floor => |ptr| try ptr.write_op(writer), .gather => |ptr| try ptr.write_op(writer), .gemm => |ptr| try ptr.write_op(writer), + .gelu => |ptr| try ptr.write_op(writer), .identity => |ptr| try ptr.write_op(writer), .leakyRelu => |ptr| try ptr.write_op(writer), .matMul => |ptr| try ptr.write_op(writer), @@ -211,9 +228,10 @@ pub const Op_union = union(enum) { .resize => |ptr| try ptr.write_op(writer), .shape => |ptr| try ptr.write_op(writer), .sigmoid => |ptr| try ptr.write_op(writer), - // .slice => |ptr| try ptr.write_op(writer), //not present, see Slice node + .slice => |ptr| try ptr.write_op(writer), .softmax => |ptr| try ptr.write_op(writer), - // .split => |ptr| try ptr.write_op(writer), //not working! error: .FAULT => unreachable, + .split => |ptr| try ptr.write_op(writer), //not working! error: .FAULT => unreachable, + .sqrt => |ptr| try ptr.write_op(writer), .sub => |ptr| try ptr.write_op(writer), .tanh => |ptr| try ptr.write_op(writer), .transpose => |ptr| try ptr.write_op(writer), @@ -236,8 +254,10 @@ pub const Op_union = union(enum) { .div => |ptr| ptr.print(), .elu => |ptr| ptr.print(), .flatten => |ptr| ptr.print(), + .floor => |ptr| ptr.print(), .gather => |ptr| ptr.print(), .gemm => |ptr| ptr.print(), + .gelu => |ptr| ptr.print(), .identity => |ptr| ptr.print(), .leakyRelu => |ptr| ptr.print(), .matMul => |ptr| ptr.print(), @@ -253,6 +273,7 @@ pub const Op_union = union(enum) { .slice => |ptr| ptr.print(), .softmax => |ptr| ptr.print(), .split => |ptr| ptr.print(), + .sqrt => |ptr| ptr.print(), .sub => |ptr| ptr.print(), .tanh => |ptr| ptr.print(), .transpose => |ptr| ptr.print(), diff --git a/src/IR_graph/op_union/operators/op_gemm.zig b/src/IR_graph/op_union/operators/op_gemm.zig index 91496e6a..0e8dcaac 100644 --- a/src/IR_graph/op_union/operators/op_gemm.zig +++ b/src/IR_graph/op_union/operators/op_gemm.zig @@ -133,8 +133,9 @@ pub const Gemm = struct { _ = try writer.print( \\ \\ - \\ tensMath.gemm_lean(T, {s}, {s}, {s}, {}, {}, {s}, {s}, &tensor_{s} ) + \\ tensMath.gemm_lean({s}, {s}, {s}, {s}, {}, {}, {s}, {s}, &tensor_{s} ) , .{ + self.output.ty.toString(), // T tensor_A_string, // Input tensor A tensor_B_string, // Input tensor B tensor_C_string, diff --git a/src/IR_graph/op_union/operators/op_slice.zig b/src/IR_graph/op_union/operators/op_slice.zig index 410f8e08..973cef38 100644 --- a/src/IR_graph/op_union/operators/op_slice.zig +++ b/src/IR_graph/op_union/operators/op_slice.zig @@ -62,7 +62,10 @@ pub const Slice = struct { return self.output; } - pub fn write_op() !void {} //TODO manuel + pub fn write_op(self: Slice, writer: std.fs.File.Writer) !void { + _ = writer; + _ = self; + } //TODO manuel pub fn compute_output_shape(self: Slice) []usize { var output_shape: []usize = undefined; diff --git a/src/IR_graph/op_union/operators/op_softmax.zig b/src/IR_graph/op_union/operators/op_softmax.zig index ac8b655c..b4902be9 100644 --- a/src/IR_graph/op_union/operators/op_softmax.zig +++ b/src/IR_graph/op_union/operators/op_softmax.zig @@ -69,13 +69,15 @@ pub const Softmax = struct { } _ = try writer.print( - \\ tensMath.softmax_tensor_lean( - \\ T, + \\ + \\ tensMath.softmax_lean( + \\ {s}, //Type \\ {s}, // input tensor \\ &tensor_{s} // output tensor - \\ ); + \\ ) \\ , .{ + self.output_Y.ty.toString(), tensor_input_string, try utils.getSanitizedName(self.output_Y.name), }); diff --git a/tests/CodeGen/Python-ONNX/available_operations.txt b/tests/CodeGen/Python-ONNX/available_operations.txt index 9c6a6a66..1755ef55 100644 --- a/tests/CodeGen/Python-ONNX/available_operations.txt +++ b/tests/CodeGen/Python-ONNX/available_operations.txt @@ -16,15 +16,11 @@ Identity LeakyRelu MatMul MaxPool -Mean Mul Relu Reshape Sigmoid -Slice -Split Sqrt Sub Tanh -Transpose -Squeeze \ No newline at end of file +Transpose \ No newline at end of file diff --git a/tests/CodeGen/oneOpModelGenerator.zig b/tests/CodeGen/oneOpModelGenerator.zig index 0d5a359f..6489b4d7 100644 --- a/tests/CodeGen/oneOpModelGenerator.zig +++ b/tests/CodeGen/oneOpModelGenerator.zig @@ -7,13 +7,15 @@ const pkgAllocator = zant.utils.allocator; const allocator = pkgAllocator.allocator; const onnx = zant.onnx; -const codeGen = @import("codegen"); +const IR_codeGen = @import("IR_codegen"); const tests_log = std.log.scoped(.test_oneOP); +// called by "zig build test-codegen-gen" optionals:" -Dlog -Dmodel="name" -D ..." see build.zig" pub fn main() !void { tests_log.info("One ONNX Operator Model Generator", .{}); + //collecting available operations from tests/CodeGen/Python-ONNX/available_operations.txt tests_log.info("\n opening available_operations...", .{}); const op_file = try std.fs.cwd().openFile("tests/CodeGen/Python-ONNX/available_operations.txt", .{}); defer op_file.close(); @@ -52,6 +54,7 @@ pub fn main() !void { try test_oneop_writer.writeAll("\n"); while (true) { + // Get the next line from the iterator. const maybe_line = lines_iter.next(); @@ -77,7 +80,7 @@ pub fn main() !void { //Printing the model: //DEBUG - model.print(); + //model.print(); tests_log.info("\n CODEGENERATING {s} ...", .{model_path}); @@ -88,22 +91,16 @@ pub fn main() !void { // CORE PART ------------------------------------------------------- // ONNX model parsing - try codeGen.globals.setGlobalAttributes(model); - - // Create the code for the model - try codeGen.skeleton.writeZigFile(trimmed_line, generated_path, model, false); - - // Create relative tests - try codeGen.tests.writeSlimTestFile(trimmed_line, generated_path); + try IR_codeGen.codegnenerateFromOnnx(trimmed_line, generated_path, model); - // Copy user test file into the generated test file + // Copy user test file into the generated test file, do not touch, this is not related to model codegen ! const dataset_test_model_path = try std.fmt.allocPrint(allocator, "datasets/oneOpModels/{s}_0_user_tests.json", .{trimmed_line}); defer allocator.free(dataset_test_model_path); const generated_test_model_path = try std.fmt.allocPrint(allocator, "generated/oneOpModels/{s}/user_tests.json", .{trimmed_line}); defer allocator.free(generated_test_model_path); - try codeGen.utils.copyFile(dataset_test_model_path, generated_test_model_path); + try IR_codeGen.utils.copyFile(dataset_test_model_path, generated_test_model_path); tests_log.info("Written user test for {s}", .{trimmed_line}); // Add relative one op test to global tests file From 35a0e7a1d72c7b12fac2016cea4bf7153e209f64 Mon Sep 17 00:00:00 2001 From: MirkoCalvi Date: Thu, 5 Jun 2025 11:58:08 +0200 Subject: [PATCH 2/9] intensive debugging --- build.zig | 2 ++ src/IR_codegen/IR_codegen.zig | 3 +++ src/IR_codegen/predict_writer.zig | 6 +++--- src/IR_codegen/utils.zig | 20 +++--------------- src/IR_graph/TODO | 8 ++++++- .../op_union/operators/op_averagePool.zig | 3 ++- src/IR_graph/op_union/operators/op_ceil.zig | 3 ++- src/IR_graph/op_union/operators/op_concat.zig | 21 ++++++++++++++----- src/IR_graph/op_union/operators/op_div.zig | 3 ++- .../op_union/operators/op_flatten.zig | 3 ++- src/IR_graph/op_union/operators/op_floor.zig | 3 ++- src/IR_graph/op_union/operators/op_gather.zig | 16 ++++++++++++-- src/IR_graph/op_union/operators/op_gelu.zig | 3 ++- .../op_union/operators/op_identity.zig | 3 ++- .../op_union/operators/op_leakyRelu.zig | 7 ++++--- src/IR_graph/op_union/operators/op_mul.zig | 3 ++- .../op_union/operators/op_sigmoid.zig | 3 ++- src/IR_graph/op_union/operators/op_sqrt.zig | 3 ++- src/IR_graph/op_union/operators/op_sub.zig | 7 +++++-- src/IR_graph/op_union/operators/op_tanh.zig | 5 +++-- .../op_union/operators/op_transpose.zig | 10 +++++---- .../Python-ONNX/available_operations.txt | 1 - tests/CodeGen/oneOpModelGenerator.zig | 3 +++ 23 files changed, 89 insertions(+), 50 deletions(-) diff --git a/build.zig b/build.zig index e5862b1a..d54ee780 100644 --- a/build.zig +++ b/build.zig @@ -108,6 +108,7 @@ pub fn build(b: *std.Build) void { const output_type_option = b.option([]const u8, "output_type", "Output type") orelse "f32"; const comm_option = b.option(bool, "comm", "Codegen with comments") orelse false; const dynamic_option = b.option(bool, "dynamic", "Dynamic allocation") orelse false; + const export_option = b.option(bool, "do_export", "codegen Exportable ") orelse false; //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ @@ -151,6 +152,7 @@ pub fn build(b: *std.Build) void { IRC_options.addOption([]const u8, "IR_generated_path", generated_path_option); IRC_options.addOption([]const u8, "IR_user_tests", user_tests_option); IRC_options.addOption(bool, "IR_log", log_option); + IRC_options.addOption(bool, "IR_do_export", export_option); IRC_options.addOption([]const u8, "IR_shape", shape_option); IRC_options.addOption([]const u8, "IR_type", input_type_option); IRC_options.addOption([]const u8, "IR_output_type", output_type_option); diff --git a/src/IR_codegen/IR_codegen.zig b/src/IR_codegen/IR_codegen.zig index ba71da5b..56e854db 100644 --- a/src/IR_codegen/IR_codegen.zig +++ b/src/IR_codegen/IR_codegen.zig @@ -17,6 +17,9 @@ const allocator = zant.utils.allocator.allocator; const ParametersWriter = @import("parameter_writer.zig"); const PredictWriter = @import("predict_writer.zig"); +// -- testing +pub const testWriter = @import("tests_writer.zig"); + // -- GLOBAL VARIABLES pub var tensorZantMap: *std.StringHashMap(TensorZant) = undefined; diff --git a/src/IR_codegen/predict_writer.zig b/src/IR_codegen/predict_writer.zig index 260e27ee..12d86356 100644 --- a/src/IR_codegen/predict_writer.zig +++ b/src/IR_codegen/predict_writer.zig @@ -42,7 +42,7 @@ pub fn write(generated_path: []const u8, model_name: []const u8, linearizedGraph // _ = linearizedGraph; // Generate prediction function code - try codeGenPredict.writePredict(writer, linearizedGraph, true); //do_export; + try codeGenPredict.writePredict(writer, linearizedGraph, codegen_options.IR_do_export); } /// Writes the required library imports to the generated Zig file for predict function. @@ -76,11 +76,11 @@ fn write_logFunction(writer: std.fs.File.Writer) !void { \\ \\var log_function: ?*const fn ([*c]u8) callconv(.C) void = null; \\ - \\pub export fn setLogFunction(func: ?*const fn ([*c]u8) callconv(.C) void) void {{ + \\pub {s} fn setLogFunction(func: ?*const fn ([*c]u8) callconv(.C) void) void {{ \\ log_function = func; \\}} \\ - , .{}); + , .{if (codegen_options.IR_do_export == true) "export" else ""}); } fn write_FBA(writer: std.fs.File.Writer) !void { diff --git a/src/IR_codegen/utils.zig b/src/IR_codegen/utils.zig index e9925371..f3c94782 100644 --- a/src/IR_codegen/utils.zig +++ b/src/IR_codegen/utils.zig @@ -6,9 +6,7 @@ const GraphProto = onnx.GraphProto; const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; const allocator = zant.utils.allocator.allocator; -const codegen = @import("codegen"); -const globals = codegen.globals; -const tests = codegen.tests; +const testWriter = @import("tests_writer.zig"); // -------------------- GETTERS -------------------- @@ -115,18 +113,6 @@ pub inline fn getConstantTensorDims(nodeProto: *NodeProto) ![]const i64 { return if (nodeProto.attribute[0].t) |tensorProto| tensorProto.dims else error.ConstantTensorAttributeNotAvailable; } -/// This method search for the existance of a Tensor named "tensorName" inside the onnx model.graph.value_info array. -/// If founded return its shape, else returns null. -pub fn getTensorShape(tensorName: []const u8) ?[]i64 { - for (globals.onnxModel.graph.?.value_info) |vi| { - if (std.mem.eql(u8, vi.name.?, tensorName)) { - return vi.type.?.tensor_type.?.shape.?.shape; - } - } - - return null; -} - // ----------------- DATA TYPE management ------------- pub inline fn i64SliceToUsizeSlice(input: []const i64) ![]usize { @@ -393,14 +379,14 @@ pub fn copyFile(src_path: []const u8, dst_path: []const u8) !void { } // Read the user_tests json file and return a list of test cases -pub fn loadUserTests(comptime T: type, user_tests_path: []const u8) !std.json.Parsed([]tests.UserTest(T)) { +pub fn loadUserTests(comptime T: type, user_tests_path: []const u8) !std.json.Parsed([]testWriter.UserTest(T)) { const user_tests_file = try std.fs.cwd().openFile(user_tests_path, .{}); defer user_tests_file.close(); const user_tests_content: []const u8 = try user_tests_file.readToEndAlloc(allocator, 1024 * 1024); defer allocator.free(user_tests_content); - const parsed_user_tests = try std.json.parseFromSlice([]tests.UserTest(T), allocator, user_tests_content, .{}); + const parsed_user_tests = try std.json.parseFromSlice([]testWriter.UserTest(T), allocator, user_tests_content, .{}); return parsed_user_tests; } diff --git a/src/IR_graph/TODO b/src/IR_graph/TODO index 4ae402c5..e77f7d5f 100644 --- a/src/IR_graph/TODO +++ b/src/IR_graph/TODO @@ -26,6 +26,12 @@ IR_CODEGEN .FAULT => unreachable, - codegenerating "T" as the input type of the lean math op is not possible anymore +- use TensorZant.getNameSanitized() and not utils.getSanitizedName() +- remove "const utils = @import("codegen").utils;" from all op_*.zig and fix, +- do not codegenerate "const codegen = @import("codegen");" and use instead "const codegen = @import("codegen");" and fix by conseguence OTHER : -- check missing op between: python generator, available_operations.txt, IR_graph operators \ No newline at end of file +- check missing op between: python generator, available_operations.txt, IR_graph operators +- implement dynamic free in predict() +- Gathers leaks +- Gelu loops when called "zig build test-codegen" diff --git a/src/IR_graph/op_union/operators/op_averagePool.zig b/src/IR_graph/op_union/operators/op_averagePool.zig index 6bde2b5b..cbd7d824 100644 --- a/src/IR_graph/op_union/operators/op_averagePool.zig +++ b/src/IR_graph/op_union/operators/op_averagePool.zig @@ -158,7 +158,7 @@ pub const AveragePool = struct { \\ \\ \\ tensMath.onnx_averagepool_lean( - \\ T, + \\ {s}, \\ {s}, // Input \\ &tensor_{s}, // Output \\ {s}, // kernel_shape @@ -169,6 +169,7 @@ pub const AveragePool = struct { \\ {s}, // count_include_pad \\ ) , .{ + self.input_X.ty.toString(), tensor_X_string, // Input try utils.getSanitizedName(self.output_Y.name), // Output kernel_shape_string, // kernel_shape diff --git a/src/IR_graph/op_union/operators/op_ceil.zig b/src/IR_graph/op_union/operators/op_ceil.zig index 35236b9c..4b512d6a 100644 --- a/src/IR_graph/op_union/operators/op_ceil.zig +++ b/src/IR_graph/op_union/operators/op_ceil.zig @@ -65,8 +65,9 @@ pub const Ceil = struct { _ = try writer.print( \\ \\ - \\ tensMath.ceil_lean(T, {s}, &tensor_{s}) + \\ tensMath.ceil_lean({s}, {s}, &tensor_{s}) , .{ + self.input_X.ty.toString(), input_tensor_string, try utils.getSanitizedName(self.output_Y.name), }); diff --git a/src/IR_graph/op_union/operators/op_concat.zig b/src/IR_graph/op_union/operators/op_concat.zig index ed6c2cf2..1e5a7064 100644 --- a/src/IR_graph/op_union/operators/op_concat.zig +++ b/src/IR_graph/op_union/operators/op_concat.zig @@ -90,8 +90,13 @@ pub const Concat = struct { \\ mathHandler_log.warn("\\nWarning: Concatenating tensors with different ranks along axis 0\\n", .{{}}); \\ \\ // Create a list of tensors to concatenate - \\ var concat_tensor_list_{s} = [_]Tensor(T){{ - , .{try utils.getSanitizedName(self.concat_result.name)}); + \\ var concat_tensor_list_{s} = [_]Tensor({s}){{ + , + .{ + try utils.getSanitizedName(self.concat_result.name), //r_list_{s} + self.inputs.items[0].ty.toString(), //[_]Tensor({s}) + }, + ); for (self.inputs.items, 0..) |input, idx| { if (idx > 0) { @@ -132,8 +137,13 @@ pub const Concat = struct { _ = try writer.print( \\ \\ // Create a list of tensors to concatenate - \\ var concat_tensor_list_{s} = [_]Tensor(T){{ - , .{try utils.getSanitizedName(self.concat_result.name)}); + \\ var concat_tensor_list_{s} = [_]Tensor({s}){{ + , + .{ + try utils.getSanitizedName(self.concat_result.name), + self.inputs.items[0].ty.toString(), + }, + ); for (self.inputs.items, 0..) |input, idx| { if (idx > 0) { @@ -151,8 +161,9 @@ pub const Concat = struct { \\}}; \\ \\ // Perform concatenation - \\ tensMath.concatenate_lean(T, &allocator, &concat_tensor_list_{s}, {}, &tensor_{s} ) + \\ tensMath.concatenate_lean({s}, &allocator, &concat_tensor_list_{s}, {}, &tensor_{s} ) , .{ + self.inputs.items[0].ty.toString(), try utils.getSanitizedName(self.concat_result.name), self.axis, try utils.getSanitizedName(self.concat_result.name), diff --git a/src/IR_graph/op_union/operators/op_div.zig b/src/IR_graph/op_union/operators/op_div.zig index 1e5f2fb8..5fb16d33 100644 --- a/src/IR_graph/op_union/operators/op_div.zig +++ b/src/IR_graph/op_union/operators/op_div.zig @@ -83,8 +83,9 @@ pub const Div = struct { _ = try writer.print( \\ - \\ tensMath.div_lean(T, {s}, ({s}), &tensor_{s}) + \\ tensMath.div_lean({s}, {s}, ({s}), &tensor_{s}) , .{ + self.input_A.ty.toString(), tensor_A_string, // Input tensor A tensor_B_string, // Input tensor B try utils.getSanitizedName(self.output_C.name), // Output tensor C diff --git a/src/IR_graph/op_union/operators/op_flatten.zig b/src/IR_graph/op_union/operators/op_flatten.zig index 62948125..08e21ea8 100644 --- a/src/IR_graph/op_union/operators/op_flatten.zig +++ b/src/IR_graph/op_union/operators/op_flatten.zig @@ -92,8 +92,9 @@ pub const Flatten = struct { _ = try writer.print( \\ \\ - \\ try tensMath.flatten(T, {s}, &tensor_{s}); + \\ tensMath.flatten_lean({s}, {s}, &tensor_{s}) , .{ + self.data.ty.toString(), input_string, output_name, }); diff --git a/src/IR_graph/op_union/operators/op_floor.zig b/src/IR_graph/op_union/operators/op_floor.zig index f115c19d..0e308b9d 100644 --- a/src/IR_graph/op_union/operators/op_floor.zig +++ b/src/IR_graph/op_union/operators/op_floor.zig @@ -73,8 +73,9 @@ pub const Floor = struct { _ = try writer.print( \\ \\ - \\ tensMath.floor_lean(T, {s}, &tensor_{s}) + \\ tensMath.floor_lean({s}, {s}, &tensor_{s}) , .{ + self.input_X.ty.toString(), input_tensor_string, try utils.getSanitizedName(self.output_Y.name), }); diff --git a/src/IR_graph/op_union/operators/op_gather.zig b/src/IR_graph/op_union/operators/op_gather.zig index 7c740fdf..5a857967 100644 --- a/src/IR_graph/op_union/operators/op_gather.zig +++ b/src/IR_graph/op_union/operators/op_gather.zig @@ -106,16 +106,28 @@ pub const Gather = struct { }); } + _ = try writer.print( + \\ + \\ var tensor_usize_{s} = Tensor(usize).fromArray(&allocator, utils.sliceToUsizeSlice({s}.data), {s}.shape) catch return; + \\ defer tensor_usize_{s}.deinit(); + , .{ + try utils.getSanitizedName(self.input_B.name), //tensor_usize_{s} + tensor_B_string, + tensor_B_string, + try utils.getSanitizedName(self.input_B.name), //tensor_usize_{s} + }); + // Output C const output_name = try utils.getSanitizedName(self.output_C.name); _ = try writer.print( \\ \\ - \\ try tensMath.lean_gather(T, {s}, {s}, {}, &tensor_{s}); + \\ tensMath.gather_lean({s}, {s}, &tensor_usize_{s}, {}, &tensor_{s},) , .{ + self.input_A.ty.toString(), tensor_A_string, - tensor_B_string, + try utils.getSanitizedName(self.input_B.name), self.axis, output_name, }); diff --git a/src/IR_graph/op_union/operators/op_gelu.zig b/src/IR_graph/op_union/operators/op_gelu.zig index 1c5162e2..c59c7e44 100644 --- a/src/IR_graph/op_union/operators/op_gelu.zig +++ b/src/IR_graph/op_union/operators/op_gelu.zig @@ -77,8 +77,9 @@ pub const Gelu = struct { _ = try writer.print( \\ - \\ tensMath.gelu_lean(T, {s}, "{s}", &tensor_{s}) + \\ tensMath.gelu_lean({s}, {s}, "{s}", &tensor_{s}) , .{ + self.input_X.ty.toString(), input_tensor_string, self.approximate, try utils.getSanitizedName(self.output_Y.name), diff --git a/src/IR_graph/op_union/operators/op_identity.zig b/src/IR_graph/op_union/operators/op_identity.zig index 942c3027..9d33b165 100644 --- a/src/IR_graph/op_union/operators/op_identity.zig +++ b/src/IR_graph/op_union/operators/op_identity.zig @@ -66,8 +66,9 @@ pub const Identity = struct { _ = try writer.print( \\ \\ - \\ tensMath.identity_lean(T, {s}, &tensor_{s}) + \\ tensMath.identity_lean({s}, {s}, &tensor_{s}) , .{ + self.input.ty.toString(), input_tensor_string, try utils.getSanitizedName(self.output.name), }); diff --git a/src/IR_graph/op_union/operators/op_leakyRelu.zig b/src/IR_graph/op_union/operators/op_leakyRelu.zig index 99d0a18c..8910b69b 100644 --- a/src/IR_graph/op_union/operators/op_leakyRelu.zig +++ b/src/IR_graph/op_union/operators/op_leakyRelu.zig @@ -19,9 +19,9 @@ const utils = @import("codegen").utils; // https://onnx.ai/onnx/operators/onnx__LeakyRelu.html#l-onnx-doc-leakyrelu // INPUTS: -// - A (heterogeneous) - T: input tensor. +// - X (heterogeneous) - T: input tensor. // OUTPUTS: -// - C (heterogeneous) - T: output tensor. +// - Y (heterogeneous) - T: output tensor. // ATTRIBUTES: // - alpha (float) - coefficent of leakage. Default is 0.01. @@ -75,8 +75,9 @@ pub const LeakyRelu = struct { _ = try writer.print( \\ - \\ tensMath.leakyReLU_lean(T, {s}, {d}, &tensor_{s}) + \\ tensMath.leakyReLU_lean({s}, {s}, {d}, &tensor_{s}) , .{ + self.input_X.ty.toString(), input_tensor_string, self.alpha, try utils.getSanitizedName(self.output_Y.name), diff --git a/src/IR_graph/op_union/operators/op_mul.zig b/src/IR_graph/op_union/operators/op_mul.zig index 68a21299..7ab81cd0 100644 --- a/src/IR_graph/op_union/operators/op_mul.zig +++ b/src/IR_graph/op_union/operators/op_mul.zig @@ -86,8 +86,9 @@ pub const Mul = struct { _ = try writer.print( \\ \\ - \\ tensMath.mul_lean(T, {s}, ({s}), &tensor_{s}) + \\ tensMath.mul_lean({s}, {s}, ({s}), &tensor_{s}) , .{ + self.input_A.ty.toString(), tensor_A_string, // Input tensor A tensor_B_string, // Input tensor B try utils.getSanitizedName(self.output_C.name), // Output tensor C diff --git a/src/IR_graph/op_union/operators/op_sigmoid.zig b/src/IR_graph/op_union/operators/op_sigmoid.zig index cde71c93..90345a0a 100644 --- a/src/IR_graph/op_union/operators/op_sigmoid.zig +++ b/src/IR_graph/op_union/operators/op_sigmoid.zig @@ -64,12 +64,13 @@ pub const Sigmoid = struct { _ = try writer.print( \\ \\ tensMath.sigmoid_lean( - \\ T, + \\ {s}, \\ {s}, \\ &tensor_{s}, \\ ) , .{ + self.input_X.ty.toString(), tensor_X_string, try utils.getSanitizedName(self.output_Y.name), }, diff --git a/src/IR_graph/op_union/operators/op_sqrt.zig b/src/IR_graph/op_union/operators/op_sqrt.zig index 074f6595..09ca32b0 100644 --- a/src/IR_graph/op_union/operators/op_sqrt.zig +++ b/src/IR_graph/op_union/operators/op_sqrt.zig @@ -73,8 +73,9 @@ pub const Sqrt = struct { _ = try writer.print( \\ \\ - \\ tensMath.sqrt_lean(T, {s}, &tensor_{s}) + \\ tensMath.sqrt_lean({s}, {s}, &tensor_{s}) , .{ + self.input_X.ty.toString(), input_tensor_string, try utils.getSanitizedName(self.output_Y.name), }); diff --git a/src/IR_graph/op_union/operators/op_sub.zig b/src/IR_graph/op_union/operators/op_sub.zig index c746a99b..e9cfcf1d 100644 --- a/src/IR_graph/op_union/operators/op_sub.zig +++ b/src/IR_graph/op_union/operators/op_sub.zig @@ -95,12 +95,15 @@ pub const Sub = struct { _ = try writer.print( \\ tensMath.sub_tensors_lean( - \\ T, + \\ {s}, // input type + \\ {s}, // output type \\ {s}, // input A \\ {s}, // input B \\ &tensor_{s} // output Y - \\ ); + \\ ) , .{ + self.input_A.ty.toString(), + self.output_Y.ty.toString(), tensor_A_string, tensor_B_string, try utils.getSanitizedName(self.output_Y.name), diff --git a/src/IR_graph/op_union/operators/op_tanh.zig b/src/IR_graph/op_union/operators/op_tanh.zig index 48c928e8..76ea30ad 100644 --- a/src/IR_graph/op_union/operators/op_tanh.zig +++ b/src/IR_graph/op_union/operators/op_tanh.zig @@ -76,11 +76,12 @@ pub const Tanh = struct { // --- Write the Tanh op _ = try writer.print( \\ tensMath.tanh_lean( - \\ T, + \\ {s}, \\ {s}, // input tensor \\ &tensor_{s} // output tensor - \\ ); + \\ ) , .{ + self.input_X.ty.toString(), tensor_X_string, try utils.getSanitizedName(self.output_Y.name), }); diff --git a/src/IR_graph/op_union/operators/op_transpose.zig b/src/IR_graph/op_union/operators/op_transpose.zig index 85ac0e19..0cc8949b 100644 --- a/src/IR_graph/op_union/operators/op_transpose.zig +++ b/src/IR_graph/op_union/operators/op_transpose.zig @@ -99,13 +99,15 @@ pub const Transpose = struct { // --- Write transpose op _ = try writer.print( - \\ tensMath.transpose_lean( - \\ T, + \\ tensMath.transpose_onnx_lean( + \\ {s}, //input type \\ {s}, // input tensor \\ {s}, // perm array - \\ &tensor_{s} // output tensor - \\ ); + \\ &tensor_{s}, // output + \\ allocator, + \\ ) , .{ + self.input_X.ty.toString(), tensor_X_string, perm_string, try utils.getSanitizedName(self.output_Y.name), diff --git a/tests/CodeGen/Python-ONNX/available_operations.txt b/tests/CodeGen/Python-ONNX/available_operations.txt index 1755ef55..49a1c273 100644 --- a/tests/CodeGen/Python-ONNX/available_operations.txt +++ b/tests/CodeGen/Python-ONNX/available_operations.txt @@ -11,7 +11,6 @@ Flatten Floor Gather Gemm -Gelu Identity LeakyRelu MatMul diff --git a/tests/CodeGen/oneOpModelGenerator.zig b/tests/CodeGen/oneOpModelGenerator.zig index 6489b4d7..ef3c09e6 100644 --- a/tests/CodeGen/oneOpModelGenerator.zig +++ b/tests/CodeGen/oneOpModelGenerator.zig @@ -93,6 +93,9 @@ pub fn main() !void { // ONNX model parsing try IR_codeGen.codegnenerateFromOnnx(trimmed_line, generated_path, model); + // Create relative tests + try IR_codeGen.testWriter.writeSlimTestFile(trimmed_line, generated_path); + // Copy user test file into the generated test file, do not touch, this is not related to model codegen ! const dataset_test_model_path = try std.fmt.allocPrint(allocator, "datasets/oneOpModels/{s}_0_user_tests.json", .{trimmed_line}); defer allocator.free(dataset_test_model_path); From a45707d7c45887e7253c54eeab9c45068548e678 Mon Sep 17 00:00:00 2001 From: MirkoCalvi Date: Thu, 5 Jun 2025 12:00:05 +0200 Subject: [PATCH 3/9] read the IR_graph TODO --- src/IR_graph/TODO | 1 + 1 file changed, 1 insertion(+) diff --git a/src/IR_graph/TODO b/src/IR_graph/TODO index e77f7d5f..6baf4740 100644 --- a/src/IR_graph/TODO +++ b/src/IR_graph/TODO @@ -35,3 +35,4 @@ OTHER : - implement dynamic free in predict() - Gathers leaks - Gelu loops when called "zig build test-codegen" +- add comments on available_operation.txt, something like ?op_name is ignored From 916b5ba703210d60f16ad002c49ff34e3cf3c772 Mon Sep 17 00:00:00 2001 From: MirkoCalvi Date: Thu, 5 Jun 2025 14:42:13 +0200 Subject: [PATCH 4/9] gather fixed --- src/IR_codegen/utils.zig | 4 ++-- src/IR_graph/op_union/operators/op_gather.zig | 18 +++++++++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/IR_codegen/utils.zig b/src/IR_codegen/utils.zig index f3c94782..018012dc 100644 --- a/src/IR_codegen/utils.zig +++ b/src/IR_codegen/utils.zig @@ -171,7 +171,7 @@ pub inline fn toUsize(comptime T: type, value: T) !usize { return @intCast(value); } -pub inline fn sliceToUsizeSlice(slice: anytype) []usize { +pub inline fn sliceToUsizeSlice(this_allocator: std.mem.Allocator, slice: anytype) []usize { const T = @TypeOf(slice); const info = @typeInfo(T); @@ -180,7 +180,7 @@ pub inline fn sliceToUsizeSlice(slice: anytype) []usize { const child = info.pointer.child; const child_info = @typeInfo(child); - var output = allocator.alloc(usize, slice.len) catch @panic("Out of memory in sliceToUsizeSlice"); + var output = this_allocator.alloc(usize, slice.len) catch @panic("Out of memory in sliceToUsizeSlice"); const maxUsize = std.math.maxInt(usize); for (slice, 0..) |value, index| { diff --git a/src/IR_graph/op_union/operators/op_gather.zig b/src/IR_graph/op_union/operators/op_gather.zig index 5a857967..6f42c237 100644 --- a/src/IR_graph/op_union/operators/op_gather.zig +++ b/src/IR_graph/op_union/operators/op_gather.zig @@ -107,13 +107,15 @@ pub const Gather = struct { } _ = try writer.print( - \\ - \\ var tensor_usize_{s} = Tensor(usize).fromArray(&allocator, utils.sliceToUsizeSlice({s}.data), {s}.shape) catch return; + \\ const array_usize_{s}= utils.sliceToUsizeSlice(allocator, {s}.data); + \\ defer allocator.free(array_usize_{s); + \\ var tensor_usize_{s} = Tensor(usize).fromArray(&allocator, array_usize_{s}, {s}.shape) catch return; \\ defer tensor_usize_{s}.deinit(); , .{ + try utils.getSanitizedName(self.input_B.name), //array_usize_{s} + tensor_B_string, //{s}.data try utils.getSanitizedName(self.input_B.name), //tensor_usize_{s} - tensor_B_string, - tensor_B_string, + try utils.getSanitizedName(self.input_B.name), //array_usize_{s} try utils.getSanitizedName(self.input_B.name), //tensor_usize_{s} }); @@ -123,7 +125,13 @@ pub const Gather = struct { _ = try writer.print( \\ \\ - \\ tensMath.gather_lean({s}, {s}, &tensor_usize_{s}, {}, &tensor_{s},) + \\ tensMath.gather_lean( + \\ {s}, // input type + \\ {s}, // input tensor + \\ &tensor_usize_{s}, + \\ {}, + \\ &tensor_{s}, + \\ ) , .{ self.input_A.ty.toString(), tensor_A_string, From 8474c52b0f3236a506ae2971cde511a9a0b4516e Mon Sep 17 00:00:00 2001 From: MirkoCalvi Date: Thu, 5 Jun 2025 15:00:55 +0200 Subject: [PATCH 5/9] gather fixed again --- src/IR_graph/op_union/operators/op_gather.zig | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/IR_graph/op_union/operators/op_gather.zig b/src/IR_graph/op_union/operators/op_gather.zig index 6f42c237..a17abee9 100644 --- a/src/IR_graph/op_union/operators/op_gather.zig +++ b/src/IR_graph/op_union/operators/op_gather.zig @@ -107,16 +107,26 @@ pub const Gather = struct { } _ = try writer.print( + \\ + \\ \\ const array_usize_{s}= utils.sliceToUsizeSlice(allocator, {s}.data); - \\ defer allocator.free(array_usize_{s); + \\ defer allocator.free(array_usize_{s}); + , .{ + try self.input_B.getNameSanitized(), //array_usize_{s} + tensor_B_string, //{s}.data + try self.input_B.getNameSanitized(), //defer allocator.free(array_usize_{s); + }); + + _ = try writer.print( + \\ + \\ \\ var tensor_usize_{s} = Tensor(usize).fromArray(&allocator, array_usize_{s}, {s}.shape) catch return; \\ defer tensor_usize_{s}.deinit(); , .{ - try utils.getSanitizedName(self.input_B.name), //array_usize_{s} - tensor_B_string, //{s}.data - try utils.getSanitizedName(self.input_B.name), //tensor_usize_{s} - try utils.getSanitizedName(self.input_B.name), //array_usize_{s} - try utils.getSanitizedName(self.input_B.name), //tensor_usize_{s} + try self.input_B.getNameSanitized(), //tensor_usize_{s} + try self.input_B.getNameSanitized(), //array_usize_{s} + tensor_B_string, //{s}.shape + try self.input_B.getNameSanitized(), //defer tensor_usize_{s}.deinit(); }); // Output C From 9d69bdd04b7193eee55427991a51a08e650c668c Mon Sep 17 00:00:00 2001 From: MirkoCalvi Date: Thu, 5 Jun 2025 15:14:08 +0200 Subject: [PATCH 6/9] all op in available_operation pass on op tests --- tests/CodeGen/oneOpModelGenerator.zig | 20 ++++++++--------- tests/CodeGen/test_model.slim.template.zig | 17 ++++++-------- tests/CodeGen/test_model.template.zig | 26 ++++++++++------------ tests/IR_graph/test_all_write_op.zig | 2 +- 4 files changed, 29 insertions(+), 36 deletions(-) diff --git a/tests/CodeGen/oneOpModelGenerator.zig b/tests/CodeGen/oneOpModelGenerator.zig index ef3c09e6..ec235a17 100644 --- a/tests/CodeGen/oneOpModelGenerator.zig +++ b/tests/CodeGen/oneOpModelGenerator.zig @@ -9,17 +9,15 @@ const allocator = pkgAllocator.allocator; const onnx = zant.onnx; const IR_codeGen = @import("IR_codegen"); -const tests_log = std.log.scoped(.test_oneOP); - // called by "zig build test-codegen-gen" optionals:" -Dlog -Dmodel="name" -D ..." see build.zig" pub fn main() !void { - tests_log.info("One ONNX Operator Model Generator", .{}); + std.debug.print("One ONNX Operator Model Generator", .{}); //collecting available operations from tests/CodeGen/Python-ONNX/available_operations.txt - tests_log.info("\n opening available_operations...", .{}); + std.debug.print("\n opening available_operations...", .{}); const op_file = try std.fs.cwd().openFile("tests/CodeGen/Python-ONNX/available_operations.txt", .{}); defer op_file.close(); - tests_log.info(" done", .{}); + std.debug.print(" done", .{}); const file_size = try op_file.getEndPos(); const buffer = try allocator.alloc(u8, @intCast(file_size)); @@ -58,8 +56,8 @@ pub fn main() !void { // Get the next line from the iterator. const maybe_line = lines_iter.next(); - if (maybe_line) |ml| tests_log.info("maybe_line: {any}\n", .{ml}) else { - tests_log.info("maybe_line: null -----> break\n", .{}); + if (maybe_line) |ml| std.debug.print("maybe_line: {any}\n", .{ml}) else { + std.debug.print("maybe_line: null -----> break\n", .{}); break; } @@ -67,13 +65,13 @@ pub fn main() !void { // Trim whitespace from the line. const trimmed_line = std.mem.trim(u8, raw_line, " \t\r\n"); if (trimmed_line.len > 0) { - tests_log.info("Operation: {s}\n", .{trimmed_line}); + std.debug.print(" ############ Operation: {s} ############\n", .{trimmed_line}); } // Construct the model file path: "Phython-ONNX/{op}_0.onnx" const model_path = try std.fmt.allocPrint(allocator, "datasets/oneOpModels/{s}_0.onnx", .{trimmed_line}); defer allocator.free(model_path); - tests_log.info("model_path : {s}", .{model_path}); + std.debug.print("model_path : {s}", .{model_path}); // Load the model. var model = try onnx.parseFromFile(allocator, model_path); @@ -82,7 +80,7 @@ pub fn main() !void { //DEBUG //model.print(); - tests_log.info("\n CODEGENERATING {s} ...", .{model_path}); + std.debug.print("\n CODEGENERATING {s} ...", .{model_path}); // Create the generated model directory if not present const generated_path = try std.fmt.allocPrint(allocator, "generated/oneOpModels/{s}/", .{trimmed_line}); @@ -104,7 +102,7 @@ pub fn main() !void { defer allocator.free(generated_test_model_path); try IR_codeGen.utils.copyFile(dataset_test_model_path, generated_test_model_path); - tests_log.info("Written user test for {s}", .{trimmed_line}); + std.debug.print("Written user test for {s}", .{trimmed_line}); // Add relative one op test to global tests file try test_oneop_writer.print("\t _ = @import(\"{s}/test_{s}.zig\"); \n", .{ trimmed_line, trimmed_line }); diff --git a/tests/CodeGen/test_model.slim.template.zig b/tests/CodeGen/test_model.slim.template.zig index 1d37f2d8..a758b3b5 100644 --- a/tests/CodeGen/test_model.slim.template.zig +++ b/tests/CodeGen/test_model.slim.template.zig @@ -6,8 +6,6 @@ const Tensor = zant.core.tensor.Tensor; const pkgAllocator = zant.utils.allocator; const allocator = pkgAllocator.allocator; -const tests_log = std.log.scoped(.test_utils); - const model = @import("model_options.zig"); const ITERATION_COUNT: u32 = 100; @@ -15,7 +13,7 @@ const ITERATION_COUNT: u32 = 100; test "Static Library - Random data Prediction Test" { std.testing.log_level = .info; - tests_log.info("\n test: Static Library - Model: {s} - Random data Prediction Test\n", .{model.name}); + std.debug.print("\ntest: Static Library - Model: {s} - Random data Prediction Test -------------------------\n", .{model.name}); var input_shape = model.input_shape; @@ -50,13 +48,13 @@ test "Static Library - Random data Prediction Test" { &result, ); } - tests_log.info("\nRan 100 fuzzy tests on model \"{s}\", done without errors:\n", .{model.name}); + std.debug.print("\nRan 100 fuzzy tests on model \"{s}\", done without errors:\n", .{model.name}); } test "Static Library - Inputs Prediction Test" { std.testing.log_level = .info; - tests_log.info("\n test: Codegen one-op model: \"{s}\" compare with Pre-Generated results.\n", .{model.name}); + std.debug.print("\ntest: Codegen one-op model: \"{s}\" compare with Pre-Generated results. -------------------------\n", .{model.name}); var input_shape = model.input_shape; @@ -69,17 +67,17 @@ test "Static Library - Inputs Prediction Test" { const user_tests_path = try std.fmt.allocPrint(allocator, "generated/oneOpModels/{s}/user_tests.json", .{model.name}); defer allocator.free(user_tests_path); - tests_log.info("{s}", .{user_tests_path}); + std.debug.print("{s}", .{user_tests_path}); const parsed_user_tests = try utils.loadUserTests(model.data_type, user_tests_path); defer parsed_user_tests.deinit(); const user_tests = parsed_user_tests.value; - tests_log.info("\nUser tests loaded.\n", .{}); + std.debug.print("\nUser tests loaded.\n", .{}); for (user_tests) |user_test| { - tests_log.info("\n\tRunning user test: {s}\n\n", .{user_test.name}); + std.debug.print("\n\tRunning user test: {s}\n\n", .{user_test.name}); try std.testing.expectEqual(user_test.input.len, input_data_len); @@ -98,8 +96,7 @@ test "Static Library - Inputs Prediction Test" { const expected_output_value = expected_output; const approx_eq = std.math.approxEqAbs(model.data_type, expected_output_value, result_value, 0.001); if (!approx_eq) - tests_log.warn("Test failed for input: {d} expected: {} got: {}\n", .{ i, expected_output_value, result_value }); - + std.debug.print("Test failed for input: {d} expected: {} got: {}\n", .{ i, expected_output_value, result_value }); try std.testing.expect(approx_eq); } } diff --git a/tests/CodeGen/test_model.template.zig b/tests/CodeGen/test_model.template.zig index 914c4d42..f3e89e39 100644 --- a/tests/CodeGen/test_model.template.zig +++ b/tests/CodeGen/test_model.template.zig @@ -6,13 +6,11 @@ const Tensor = zant.core.tensor.Tensor; const pkgAllocator = zant.utils.allocator; const allocator = pkgAllocator.allocator; -const tests_log = std.log.scoped(.test_model); - const model = @import("model_options.zig"); test "Static Library - Random data Prediction Test" { std.testing.log_level = .info; - tests_log.info("\n test: Static Library - {s} Random data Prediction Test\n", .{model.name}); + std.debug.print("\ntest: Static Library - {s} Random data Prediction Test -------------------\n", .{model.name}); var input_shape = model.input_shape; @@ -42,7 +40,7 @@ test "Static Library - Random data Prediction Test" { const LogFn = fn ([*c]u8) callconv(.C) void; const logFn: LogFn = struct { fn log(msg: [*c]u8) callconv(.C) void { - tests_log.debug("{s}", .{msg}); + std.debug.print("{s}", .{msg}); } }.log; @@ -57,13 +55,13 @@ test "Static Library - Random data Prediction Test" { &result, ); - tests_log.info("\nPrediction done without errors:\n", .{}); + std.debug.print("\nPrediction done without errors:\n", .{}); } test "Static Library - Wrong Input Shape" { std.testing.log_level = .info; - tests_log.info("\n test: Static Library - {s} Wrong Input Shape\n", .{model.name}); + std.debug.print("\ntest: Static Library - {s} Wrong Input Shape -------------------\n", .{model.name}); // Test with wrong input shape @@ -113,7 +111,7 @@ test "Static Library - Wrong Input Shape" { test "Static Library - Empty Input" { std.testing.log_level = .info; - tests_log.info("\n test: Static Library - {s} Empty Input\n", .{model.name}); + std.debug.print("\ntest: Static Library - {s} Empty Input -------------------\n", .{model.name}); // Test with empty input var input_data = [_]model.data_type{}; @@ -131,7 +129,7 @@ test "Static Library - Empty Input" { test "Static Library - Wrong Number of Dimensions" { std.testing.log_level = .info; - tests_log.info("\n test: Static Library - {s} Wrong Number of Dimensions\n", .{model.name}); + std.debug.print("\ntest: Static Library - {s} Wrong Number of Dimensions -------------------\n", .{model.name}); const model_input_shape = model.input_shape; @@ -165,10 +163,10 @@ test "Static Library - Wrong Number of Dimensions" { test "Static Library - User data Prediction Test" { std.testing.log_level = .info; - tests_log.info("\n test: Static Library - {s} User data Prediction Test\n", .{model.name}); + std.debug.print("\ntest: Static Library - {s} User data Prediction Test -------------------\n", .{model.name}); if (!model.enable_user_tests) { - tests_log.info("\nUser tests are disabled for this model\n", .{}); + std.debug.print("\nUser tests are disabled for this model\n", .{}); return; } @@ -176,7 +174,7 @@ test "Static Library - User data Prediction Test" { const LogFn = fn ([*c]u8) callconv(.C) void; const logFn: LogFn = struct { fn log(msg: [*c]u8) callconv(.C) void { - tests_log.debug("{s}", .{msg}); + std.debug.print("{s}", .{msg}); } }.log; @@ -195,10 +193,10 @@ test "Static Library - User data Prediction Test" { const user_tests = parsed_user_tests.value; - tests_log.debug("\nUser tests loaded.\n", .{}); + std.debug.print("\nUser tests loaded.\n", .{}); for (user_tests) |user_test| { - tests_log.debug("\n\tRunning user test: {s}\n\n", .{user_test.name}); + std.debug.print("\n\tRunning user test: {s}\n\n", .{user_test.name}); try std.testing.expectEqual(user_test.input.len, input_data_len); @@ -245,7 +243,7 @@ test "Static Library - User data Prediction Test" { try std.testing.expectEqual(expected_output_value, result_value); } } else { - tests_log.debug("Unsupported test type: {s}\n", .{user_test.type}); + std.debug.print("Unsupported test type: {s}\n", .{user_test.type}); try std.testing.expect(false); } } diff --git a/tests/IR_graph/test_all_write_op.zig b/tests/IR_graph/test_all_write_op.zig index 2b2bbdb5..f317afd3 100644 --- a/tests/IR_graph/test_all_write_op.zig +++ b/tests/IR_graph/test_all_write_op.zig @@ -59,7 +59,7 @@ test "Test write_op on all oneOp models" { // Trim whitespace from the line. const trimmed_line = std.mem.trim(u8, raw_line, " \t\r\n"); if (trimmed_line.len > 0) { - std.debug.print("Operation: {s}\n", .{trimmed_line}); + std.debug.print(" ############ Operation: {s} ############ \n", .{trimmed_line}); } const model_name = trimmed_line; From 9cad14ea2fb3a576dc6e5c67d1dd72d4914d3451 Mon Sep 17 00:00:00 2001 From: MirkoCalvi Date: Sat, 7 Jun 2025 15:23:47 +0200 Subject: [PATCH 7/9] refactro, mnist-8 codegen working --- build.zig | 187 +- src/CodeGen/codegen.zig | 9 - src/CodeGen/globals.zig | 427 -- src/CodeGen/main.zig | 57 - src/CodeGen/math_handler.zig | 3666 ----------------- src/CodeGen/parameters.zig | 203 - src/CodeGen/predict.zig | 491 --- src/CodeGen/shape_handler.zig | 1818 -------- src/CodeGen/skeleton.zig | 138 - src/CodeGen/{utils.zig => utils_old.zig} | 0 src/{ => IR_zant}/IR_codegen/IR_codegen.zig | 4 +- src/{ => IR_zant}/IR_codegen/main.zig | 13 +- .../IR_codegen/parameter_writer.zig | 0 .../IR_codegen/parameters/parameters.zig | 13 +- .../IR_codegen/predict/predict.zig | 19 +- .../IR_codegen/predict_writer.zig | 16 +- src/{ => IR_zant}/IR_codegen/tests_writer.zig | 13 +- src/{ => IR_zant}/IR_codegen/utils.zig | 6 +- src/{ => IR_zant}/IR_graph/Context.md | 0 .../HOW_TO_ADD_MATHEMATICAL_OPERATIONS.md | 0 src/{ => IR_zant}/IR_graph/IR_graph.zig | 0 src/{ => IR_zant}/IR_graph/TODO | 12 +- src/{ => IR_zant}/IR_graph/graphZant.zig | 0 src/{ => IR_zant}/IR_graph/nodeZant.zig | 0 src/{ => IR_zant}/IR_graph/op_template.md | 0 .../IR_graph/op_union/op_union.zig | 0 .../IR_graph/op_union/operators/op_add.zig | 1 - .../op_union/operators/op_averagePool.zig | 15 +- .../operators/op_batchNormalization.zig | 26 +- .../IR_graph/op_union/operators/op_ceil.zig | 15 +- .../IR_graph/op_union/operators/op_concat.zig | 17 +- .../op_union/operators/op_constant.zig | 11 +- .../IR_graph/op_union/operators/op_conv.zig | 25 +- .../IR_graph/op_union/operators/op_div.zig | 19 +- .../IR_graph/op_union/operators/op_elu.zig | 17 +- .../op_union/operators/op_flatten.zig | 18 +- .../IR_graph/op_union/operators/op_floor.zig | 20 +- .../IR_graph/op_union/operators/op_gather.zig | 23 +- .../IR_graph/op_union/operators/op_gelu.zig | 21 +- .../IR_graph/op_union/operators/op_gemm.zig | 22 +- .../op_union/operators/op_identity.zig | 17 +- .../op_union/operators/op_leakyRelu.zig | 18 +- .../IR_graph/op_union/operators/op_matMul.zig | 22 +- .../op_union/operators/op_maxPool.zig | 20 +- .../IR_graph/op_union/operators/op_mul.zig | 20 +- .../IR_graph/op_union/operators/op_neg.zig | 18 +- .../IR_graph/op_union/operators/op_oneHot.zig | 24 +- .../op_union/operators/op_reduceMean.zig | 20 +- .../IR_graph/op_union/operators/op_relu.zig | 18 +- .../op_union/operators/op_reshape.zig | 20 +- .../IR_graph/op_union/operators/op_resize.zig | 24 +- .../IR_graph/op_union/operators/op_shape.zig | 18 +- .../op_union/operators/op_sigmoid.zig | 18 +- .../IR_graph/op_union/operators/op_slice.zig | 27 +- .../op_union/operators/op_softmax.zig | 19 +- .../IR_graph/op_union/operators/op_split.zig | 21 +- .../IR_graph/op_union/operators/op_sqrt.zig | 20 +- .../IR_graph/op_union/operators/op_sub.zig | 22 +- .../IR_graph/op_union/operators/op_tanh.zig | 19 +- .../op_union/operators/op_transpose.zig | 19 +- .../op_union/operators/op_unsqueeze.zig | 21 +- .../op_union/operators/op_useless.zig | 11 +- .../IR_graph/op_union/operators/operators.zig | 0 src/{ => IR_zant}/IR_graph/tensorZant.zig | 0 src/{ => IR_zant}/IR_graph/utils.zig | 0 src/IR_zant/IR_zant.zig | 5 + .../Python-ONNX/available_operations.txt | 2 +- tests/CodeGen/test_model.slim.template.zig | 2 +- 68 files changed, 499 insertions(+), 7288 deletions(-) delete mode 100644 src/CodeGen/codegen.zig delete mode 100644 src/CodeGen/globals.zig delete mode 100644 src/CodeGen/main.zig delete mode 100644 src/CodeGen/math_handler.zig delete mode 100644 src/CodeGen/parameters.zig delete mode 100644 src/CodeGen/predict.zig delete mode 100644 src/CodeGen/shape_handler.zig delete mode 100644 src/CodeGen/skeleton.zig rename src/CodeGen/{utils.zig => utils_old.zig} (100%) rename src/{ => IR_zant}/IR_codegen/IR_codegen.zig (94%) rename src/{ => IR_zant}/IR_codegen/main.zig (84%) rename src/{ => IR_zant}/IR_codegen/parameter_writer.zig (100%) rename src/{ => IR_zant}/IR_codegen/parameters/parameters.zig (94%) rename src/{ => IR_zant}/IR_codegen/predict/predict.zig (96%) rename src/{ => IR_zant}/IR_codegen/predict_writer.zig (90%) rename src/{ => IR_zant}/IR_codegen/tests_writer.zig (92%) rename src/{ => IR_zant}/IR_codegen/utils.zig (98%) rename src/{ => IR_zant}/IR_graph/Context.md (100%) rename src/{ => IR_zant}/IR_graph/HOW_TO_ADD_MATHEMATICAL_OPERATIONS.md (100%) rename src/{ => IR_zant}/IR_graph/IR_graph.zig (100%) rename src/{ => IR_zant}/IR_graph/TODO (75%) rename src/{ => IR_zant}/IR_graph/graphZant.zig (100%) rename src/{ => IR_zant}/IR_graph/nodeZant.zig (100%) rename src/{ => IR_zant}/IR_graph/op_template.md (100%) rename src/{ => IR_zant}/IR_graph/op_union/op_union.zig (100%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_add.zig (98%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_averagePool.zig (92%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_batchNormalization.zig (88%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_ceil.zig (79%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_concat.zig (91%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_constant.zig (96%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_conv.zig (91%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_div.zig (79%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_elu.zig (82%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_flatten.zig (81%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_floor.zig (79%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_gather.zig (84%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_gelu.zig (79%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_gemm.zig (87%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_identity.zig (80%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_leakyRelu.zig (82%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_matMul.zig (86%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_maxPool.zig (91%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_mul.zig (81%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_neg.zig (79%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_oneHot.zig (85%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_reduceMean.zig (88%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_relu.zig (79%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_reshape.zig (90%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_resize.zig (91%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_shape.zig (85%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_sigmoid.zig (80%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_slice.zig (67%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_softmax.zig (77%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_split.zig (84%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_sqrt.zig (79%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_sub.zig (80%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_tanh.zig (78%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_transpose.zig (84%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_unsqueeze.zig (80%) rename src/{ => IR_zant}/IR_graph/op_union/operators/op_useless.zig (70%) rename src/{ => IR_zant}/IR_graph/op_union/operators/operators.zig (100%) rename src/{ => IR_zant}/IR_graph/tensorZant.zig (100%) rename src/{ => IR_zant}/IR_graph/utils.zig (100%) create mode 100644 src/IR_zant/IR_zant.zig diff --git a/build.zig b/build.zig index d54ee780..a7ac03f7 100644 --- a/build.zig +++ b/build.zig @@ -28,20 +28,11 @@ pub fn build(b: *std.Build) void { const zant_mod = b.createModule(.{ .root_source_file = b.path("src/zant.zig") }); zant_mod.addOptions("build_options", build_options); - const codeGen_mod = b.createModule(.{ .root_source_file = b.path("src/CodeGen/codegen.zig") }); - codeGen_mod.addImport("zant", zant_mod); - - const IR_mod = b.createModule(.{ .root_source_file = b.path("src/IR_graph/IR_graph.zig") }); - IR_mod.addImport("zant", zant_mod); - IR_mod.addImport("codegen", codeGen_mod); - - const IR_codeGen_mod = b.createModule(.{ .root_source_file = b.path("src/IR_codegen/IR_codegen.zig") }); - IR_codeGen_mod.addImport("zant", zant_mod); - IR_codeGen_mod.addImport("IR_zant", IR_mod); + const IR_zant_mod = b.createModule(.{ .root_source_file = b.path("src/IR_zant/IR_zant.zig") }); + IR_zant_mod.addImport("zant", zant_mod); const Img2Tens_mod = b.createModule(.{ .root_source_file = b.path("src/ImageToTensor/imageToTensor.zig") }); Img2Tens_mod.addImport("zant", zant_mod); - Img2Tens_mod.addImport("codegen", codeGen_mod); //************************************************UNIT TESTS************************************************ @@ -62,8 +53,7 @@ pub fn build(b: *std.Build) void { test_options.addOption([]const u8, "test_name", test_name); unit_tests.root_module.addImport("zant", zant_mod); - unit_tests.root_module.addImport("codegen", codeGen_mod); - unit_tests.root_module.addImport("IR_zant", IR_mod); + unit_tests.root_module.addImport("IR_zant", IR_zant_mod); unit_tests.linkLibC(); @@ -101,7 +91,7 @@ pub fn build(b: *std.Build) void { return; }; } - const user_tests_option = b.option([]const u8, "user_tests", "User tests path") orelse ""; + const user_tests_option = b.option([]const u8, "enable_user_tests", "User tests path") orelse ""; const log_option = b.option(bool, "log", "Run with log") orelse false; const shape_option = b.option([]const u8, "shape", "Input shape") orelse ""; const input_type_option = b.option([]const u8, "type", "Input type") orelse "f32"; @@ -112,39 +102,6 @@ pub fn build(b: *std.Build) void { //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ - // Name and path of the model - // const IR_model_name_option = b.option([]const u8, "model", "Model name") orelse "mnist-8"; - // const IR_model_path_option = b.option([]const u8, "model_path", "Model path") orelse std.fmt.allocPrint(b.allocator, "datasets/models/{s}/{s}.onnx", .{ model_name_option, model_name_option }) catch |err| { - // std.log.scoped(.build).warn("Error allocating model path: {}\n", .{err}); - // return; - // }; - // // Generated path - // var IR_generated_path_option = b.option([]const u8, "generated_path", "Generated path") orelse ""; - // if (IR_generated_path_option.len == 0) { - // IR_generated_path_option = std.fmt.allocPrint(b.allocator, "generated/{s}/", .{model_name_option}) catch |err| { - // std.log.scoped(.build).warn("Error allocating generated path: {}\n", .{err}); - // return; - // }; - // } else { - // if (!std.mem.endsWith(u8, IR_generated_path_option, "/")) { - // IR_generated_path_option = std.fmt.allocPrint(b.allocator, "{s}/", .{IR_generated_path_option}) catch |err| { - // std.log.scoped(.build).warn("Error normalizing path: {}\n", .{err}); - // return; - // }; - // } - // IR_generated_path_option = std.fmt.allocPrint(b.allocator, "{s}{s}/", .{ IR_generated_path_option, IR_model_path_option }) catch |err| { - // std.log.scoped(.build).warn("Error allocating generated path: {}\n", .{err}); - // return; - // }; - // } - // const IR_user_tests_option = b.option([]const u8, "user_tests", "User tests path") orelse ""; - // const IR_log_option = b.option(bool, "log", "Run with log") orelse false; - // const IR_shape_option = b.option([]const u8, "shape", "Input shape") orelse ""; - // const IR_input_type_option = b.option([]const u8, "type", "Input type") orelse "f32"; - // const IR_output_type_option = b.option([]const u8, "output_type", "Output type") orelse "f32"; - // const IR_comm_option = b.option(bool, "comm", "Codegen with comments") orelse false; - // const IR_dynamic_option = b.option(bool, "dynamic", "Dynamic allocation") orelse false; - // Define IR codegen options const IRC_options = b.addOptions(); // Model name option IRC_options.addOption([]const u8, "IR_model", model_name_option); @@ -159,12 +116,12 @@ pub fn build(b: *std.Build) void { IRC_options.addOption(bool, "IR_comm", comm_option); IRC_options.addOption(bool, "IR_dynamic", dynamic_option); - // ************************************************ CODEGEN IR ************************************************ - + // ************************************************ CODEGEN IR EXECUTABLE ************************************************ + // // Define the main executable with target architecture and optimization settings. const IR_codeGen_exe = b.addExecutable(.{ .name = "CodegenIR", - .root_source_file = b.path("src/IR_codegen/main.zig"), + .root_source_file = b.path("src/IR_zant/IR_codegen/main.zig"), .target = target, .optimize = optimize, }); @@ -173,7 +130,7 @@ pub fn build(b: *std.Build) void { // Add necessary imports for the executable. IR_codeGen_exe.root_module.addImport("zant", zant_mod); - IR_codeGen_exe.root_module.addImport("IR_zant", IR_mod); + IR_codeGen_exe.root_module.addImport("IR_zant", IR_zant_mod); //IR IR_codeGen_exe.root_module.addOptions("codegen_options", IRC_options); // Install the executable. @@ -189,46 +146,48 @@ pub fn build(b: *std.Build) void { const IR_codegen_step = b.step("IR_codegen", "code generation"); IR_codegen_step.dependOn(&IR_codegen_cmd.step); - // ************************************************CODEGEN EXECUTABLE************************************************ - // Define codegen options - const codegen_options = b.addOptions(); // Model name option - codegen_options.addOption([]const u8, "model", model_name_option); - codegen_options.addOption([]const u8, "model_path", model_path_option); - codegen_options.addOption([]const u8, "generated_path", generated_path_option); - codegen_options.addOption([]const u8, "user_tests", user_tests_option); - codegen_options.addOption(bool, "log", log_option); - codegen_options.addOption([]const u8, "shape", shape_option); - codegen_options.addOption([]const u8, "type", input_type_option); - codegen_options.addOption([]const u8, "output_type", output_type_option); - codegen_options.addOption(bool, "comm", comm_option); - codegen_options.addOption(bool, "dynamic", dynamic_option); + //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ - // Define the main executable with target architecture and optimization settings. - const codeGen_exe = b.addExecutable(.{ - .name = "Codegen", - .root_source_file = b.path("src/CodeGen/main.zig"), - .target = target, - .optimize = optimize, - }); + // // Define codegen options + // const codegen_options = b.addOptions(); // Model name option + // codegen_options.addOption([]const u8, "model", model_name_option); + // codegen_options.addOption([]const u8, "model_path", model_path_option); + // codegen_options.addOption([]const u8, "generated_path", generated_path_option); + // codegen_options.addOption([]const u8, "user_tests", user_tests_option); + // codegen_options.addOption(bool, "log", log_option); + // codegen_options.addOption([]const u8, "shape", shape_option); + // codegen_options.addOption([]const u8, "type", input_type_option); + // codegen_options.addOption([]const u8, "output_type", output_type_option); + // codegen_options.addOption(bool, "comm", comm_option); + // codegen_options.addOption(bool, "dynamic", dynamic_option); + + // // ************************************************CODEGEN EXECUTABLE************************************************ + // // Define the main executable with target architecture and optimization settings. + // const codeGen_exe = b.addExecutable(.{ + // .name = "Codegen", + // .root_source_file = b.path("src/CodeGen/main.zig"), + // .target = target, + // .optimize = optimize, + // }); - codeGen_exe.linkLibC(); + // codeGen_exe.linkLibC(); - // Add necessary imports for the executable. - codeGen_exe.root_module.addImport("zant", zant_mod); - codeGen_exe.root_module.addOptions("codegen_options", codegen_options); + // // Add necessary imports for the executable. + // codeGen_exe.root_module.addImport("zant", zant_mod); + // codeGen_exe.root_module.addOptions("codegen_options", codegen_options); - // Install the executable. - b.installArtifact(codeGen_exe); + // // Install the executable. + // b.installArtifact(codeGen_exe); - // Define the run command for the main executable. - const codegen_cmd = b.addRunArtifact(codeGen_exe); - if (b.args) |args| { - codegen_cmd.addArgs(args); - } + // // Define the run command for the main executable. + // const codegen_cmd = b.addRunArtifact(codeGen_exe); + // if (b.args) |args| { + // codegen_cmd.addArgs(args); + // } - // Create a build step to run the application. - const codegen_step = b.step("codegen", "code generation"); - codegen_step.dependOn(&codegen_cmd.step); + // // Create a build step to run the application. + // const codegen_step = b.step("codegen", "code generation"); + // codegen_step.dependOn(&codegen_cmd.step); // ************************************************ STATIC LIBRARY CREATION ************************************************ @@ -245,7 +204,7 @@ pub fn build(b: *std.Build) void { }); static_lib.linkLibC(); static_lib.root_module.addImport("zant", zant_mod); - static_lib.root_module.addImport("codegen", codeGen_mod); + static_lib.root_module.addImport("IR_zant", IR_zant_mod); //IR_codegen const install_lib_step = b.addInstallArtifact(static_lib, .{ .dest_dir = .{ .override = .{ .custom = model_name_option } } }); const lib_step = b.step("lib", "Compile tensor_math static library"); @@ -295,7 +254,7 @@ pub fn build(b: *std.Build) void { }); test_generated_lib.root_module.addImport("zant", zant_mod); - test_generated_lib.root_module.addImport("codegen", codeGen_mod); + test_generated_lib.root_module.addImport("IR_zant", IR_zant_mod); //codegen test_generated_lib.linkLibC(); const run_test_generated_lib = b.addRunArtifact(test_generated_lib); @@ -313,8 +272,8 @@ pub fn build(b: *std.Build) void { }); oneop_codegen_exe.root_module.addImport("zant", zant_mod); - IR_codeGen_mod.addOptions("codegen_options", IRC_options); - oneop_codegen_exe.root_module.addImport("IR_codegen", IR_codeGen_mod); + IR_zant_mod.addOptions("codegen_options", IRC_options); + oneop_codegen_exe.root_module.addImport("IR_zant", IR_zant_mod); //codegen oneop_codegen_exe.linkLibC(); const run_oneop_codegen_exe = b.addRunArtifact(oneop_codegen_exe); @@ -332,8 +291,8 @@ pub fn build(b: *std.Build) void { }); test_all_oneOp.root_module.addImport("zant", zant_mod); - IR_codeGen_mod.addOptions("codegen_options", IRC_options); - test_all_oneOp.root_module.addImport("codegen", IR_codeGen_mod); + IR_zant_mod.addOptions("codegen_options", IRC_options); + test_all_oneOp.root_module.addImport("IR_zant", IR_zant_mod); //codegen test_all_oneOp.linkLibC(); const run_test_all_oneOp = b.addRunArtifact(test_all_oneOp); @@ -347,29 +306,6 @@ pub fn build(b: *std.Build) void { const step_test_oneOp = b.step("test-codegen", "Run generated library tests"); step_test_oneOp.dependOn(&run_test_all_oneOp.step); - // ************************************************ - // Write Op Test - - // const write_op_test = b.addExecutable(.{ - // .name = "test_write_op", - // .root_source_file = b.path("tests/IR_graph/test_write_op.zig"), - // .target = target, - // .optimize = optimize, - // }); - - // write_op_test.root_module.addImport("zant", zant_mod); - // write_op_test.root_module.addImport("codegen", codeGen_mod); - // write_op_test.root_module.addImport("IR_zant", IR_mod); - // write_op_test.linkLibC(); - - // const run_write_op_test = b.addRunArtifact(write_op_test); - // if (b.args) |args| { - // run_write_op_test.addArgs(args); - // } - - // const write_op_step = b.step("run-test-write-op", "Run the write_op test on a model"); - // write_op_step.dependOn(&run_write_op_test.step); - // ************************************************ // Benchmark @@ -408,24 +344,7 @@ pub fn build(b: *std.Build) void { const step_test_onnx_parser = b.step("onnx-parser", "Run generated library tests"); step_test_onnx_parser.dependOn(&run_test_onnx_parser.step); - // ************************************************ WRITE OP TESTS ************************************************ - - // Test write_op on all oneOp models - const test_all_write_op = b.addTest(.{ - .name = "test_all_write_op", - .root_source_file = b.path("tests/IR_graph/test_all_write_op.zig"), - .target = target, - .optimize = optimize, - }); - - test_all_write_op.root_module.addImport("zant", zant_mod); - test_all_write_op.root_module.addImport("codegen", codeGen_mod); - test_all_write_op.root_module.addImport("IR_zant", IR_mod); - test_all_write_op.linkLibC(); - - const run_test_all_write_op = b.addRunArtifact(test_all_write_op); - const test_all_write_op_step = b.step("test-all-write-op", "Run write_op test on all oneOp models"); - test_all_write_op_step.dependOn(&run_test_all_write_op.step); + // ************************************************ MAIN EXECUTABLE (for profiling) ************************************************ // Path to the generated model options file (moved here) const model_options_path = std.fmt.allocPrint(b.allocator, "{s}model_options.zig", .{generated_path_option}) catch |err| { @@ -433,8 +352,6 @@ pub fn build(b: *std.Build) void { return; }; - // ************************************************ MAIN EXECUTABLE (for profiling) ************************************************ - const main_executable = b.addExecutable(.{ .name = "main_profiling_target", .target = target, @@ -447,7 +364,7 @@ pub fn build(b: *std.Build) void { .root_source_file = b.path(model_options_path), }); model_opts_mod.addImport("zant", zant_mod); - model_opts_mod.addImport("codegen", codeGen_mod); + model_opts_mod.addImport("IR_zant", IR_zant_mod); main_executable.root_module.addImport("model_opts", model_opts_mod); const install_main_exe_step = b.addInstallArtifact(main_executable, .{}); // Installa l'eseguibile diff --git a/src/CodeGen/codegen.zig b/src/CodeGen/codegen.zig deleted file mode 100644 index a92b5115..00000000 --- a/src/CodeGen/codegen.zig +++ /dev/null @@ -1,9 +0,0 @@ -pub const math_handler = @import("math_handler.zig"); -pub const shape_handler = @import("shape_handler.zig"); -pub const parameters = @import("parameters.zig"); -pub const predict = @import("predict.zig"); -pub const skeleton = @import("skeleton.zig"); -pub const globals = @import("globals.zig"); -pub const utils = @import("utils.zig"); -pub const tests = @import("tests.zig"); -pub const zant_codegen = @import("main.zig").zant_codegen; diff --git a/src/CodeGen/globals.zig b/src/CodeGen/globals.zig deleted file mode 100644 index 632d6d87..00000000 --- a/src/CodeGen/globals.zig +++ /dev/null @@ -1,427 +0,0 @@ -const std = @import("std"); -const zant = @import("zant"); -const Tensor = zant.tensor.Tensor; -const onnx = zant.onnx; -const ModelOnnx = onnx.ModelProto; -const DataType = onnx.DataType; -//--- proto -const TensorProto = onnx.TensorProto; -const NodeProto = onnx.NodeProto; -const GraphProto = onnx.GraphProto; -const AttributeProto = onnx.AttributeProto; -const allocator = zant.utils.allocator.allocator; - -//--- other -const codegen = @import("codegen.zig"); -const utils = codegen.utils; -const mathGen = codegen.math_handler; -const shapeGen = codegen.shape_handler; -const codegen_options = @import("codegen_options"); -const globals_log = std.log.scoped(.globals); - -pub var readyGraph: std.ArrayList(ReadyNode) = std.ArrayList(ReadyNode).init(allocator); -pub var tensorHashMap: std.StringHashMap(ReadyTensor) = std.StringHashMap(ReadyTensor).init(allocator); //key: TensorProto.name -// Map from tensor name to remaining use count in generated predict -pub var tensorUseCount: std.StringHashMap(usize) = std.StringHashMap(usize).init(allocator); - -pub var onnxModel: ModelOnnx = undefined; //initialized in setGlobalAttributes(), it is mandatory - -pub const io_struct = struct { - name: []const u8, - shape: []const i64, -}; - -pub var networkInput = io_struct{ - .name = "", - .shape = &[_]i64{}, -}; - -pub var networkOutput = io_struct{ - .name = "", - .shape = &[_]i64{}, -}; -// DataType of the network input tensor (derived from ONNX graph) -// String form of the network input element type (e.g. "f32", "u8", etc.) -pub var networkInputTypeString: []const u8 = ""; -// Add a global variable to store the actual DataType enum value -pub var networkInputDataType: DataType = .UNDEFINED; - -pub var inputType: type = f32; - -pub const TensorTag = enum { - INITIALIZER, - CONSTANT, - INPUT, - OUTPUT, - LINK, //with "LINK" I mean a tensor that is used to link two nodes, it means that is is the output of a node and the input of another -}; - -// Struct to represent a tensor that is ready for computation -pub const ReadyTensor = struct { - name: []const u8, - ready: bool, - shape: []const i64, - dtype: DataType = .UNDEFINED, - tensorProto: ?*TensorProto = null, - tag: TensorTag = TensorTag.LINK, - - pub fn createInitializer(tensorProto: *TensorProto) !ReadyTensor { - return ReadyTensor{ - .name = tensorProto.name.?, - .ready = true, - .shape = tensorProto.dims, - .tensorProto = tensorProto, - .tag = TensorTag.INITIALIZER, - }; - } - - pub fn createInput(name: []const u8) !ReadyTensor { - return ReadyTensor{ - .name = name, - .ready = true, - .shape = networkInput.shape, - .tensorProto = null, - .tag = TensorTag.INPUT, - }; - } - - pub fn createConstant(name: []const u8, tensorProto: *TensorProto) !ReadyTensor { - return ReadyTensor{ - .name = name, - .ready = true, - .shape = networkInput.shape, - .tensorProto = tensorProto, - .tag = TensorTag.CONSTANT, - }; - } - - pub fn createLink(name: []const u8) !ReadyTensor { - return ReadyTensor{ //default - .name = name, - .ready = false, - .shape = networkInput.shape, - .tensorProto = null, - .tag = TensorTag.LINK, - }; - } - - pub fn print(tensor: *ReadyTensor, detailed: bool) void { - globals_log.info("\n READY TENSOR : {s}", .{tensor.name}); - globals_log.info("\n status:{s}ready", .{if (!tensor.ready) " not " else " "}); - globals_log.info("\n tag: {any}", .{tensor.tag}); - globals_log.info("\n shape: {any}", .{tensor.shape}); - if (detailed) if (tensor.tensorProto) |tp| tp.print(" ") else globals_log.info("\n tensor.tensorProto :(null)", .{}); - } -}; - -// Struct representing a computational node in the ONNX model -pub const ReadyNode = struct { - nodeProto: *NodeProto, - inputs: std.ArrayList(?*ReadyTensor), - outputs: std.ArrayList(*ReadyTensor), - ready: bool, - - // Creates a ReadyNode by preparing its input and output tensors - pub fn create(nodeProto: *NodeProto) !ReadyNode { - // globals_log.info("\n\nReadyNode.create() --> {s}", .{nodeProto.name.?}); - var newReadyNode = ReadyNode{ - .nodeProto = nodeProto, - .inputs = std.ArrayList(?*ReadyTensor).init(allocator), - .outputs = std.ArrayList(*ReadyTensor).init(allocator), - .ready = false, - }; - - for (nodeProto.input) |input_name| { //for each input tensor in NodeProto - - //adding the readyTensor to the model - if (std.mem.eql(u8, input_name, "")) { - try newReadyNode.inputs.append(null); - } else { - try newReadyNode.inputs.append(if (tensorHashMap.getPtr(input_name)) |V_ptr| V_ptr else return error.keyNotAvailable); - } - } - for (nodeProto.output) |output_name| { //for each output tensor - - //adding the readyTensor to the model - try newReadyNode.outputs.append(if (tensorHashMap.getPtr(output_name)) |V_ptr| V_ptr else return error.keyNotAvailable); - // globals_log.info("\n added output {s} to node {s} ", .{ output_name, nodeProto.name.? }); - } - - // -- COMPUTING THE OUTPUT SHAPE -- - try shapeGen.compute_output_shape(&newReadyNode); - - return newReadyNode; - } - - pub fn print(node: *ReadyNode, detailed: bool) void { - globals_log.info("\n ------ READY NODE : ", .{}); - if (detailed) node.nodeProto.print(" ") else globals_log.info("\n {s} ", .{node.nodeProto.name.?}); - globals_log.info("\n ---inputs : ", .{}); - for (node.inputs.items) |in| if (in) |i| i.print(detailed) else globals_log.info("\n NULL INPUT", .{}); - globals_log.info("\n ---outputs : ", .{}); - for (node.outputs.items) |out| out.print(detailed); - } -}; - -pub fn setGlobalAttributes(model: ModelOnnx) !void { - //initializing global attributes - onnxModel = model; - - //ready graph - readyGraph.deinit(); - readyGraph = std.ArrayList(ReadyNode).init(allocator); - - //hash map - tensorHashMap.deinit(); - tensorHashMap = std.StringHashMap(ReadyTensor).init(allocator); - - //First convert the optional String of numbers divided by a comma into an array - const parsedInputshape: []const i64 = try utils.parseNumbers(codegen_options.shape); - - //setting the input - const inputs = model.graph.?.inputs; - networkInput.name = inputs[0].name.?; - // record input shape - networkInput.shape = inputs[0].type.?.tensor_type.?.shape.?.shape; - // Derive and store the input element type string (e.g., "f32", "u8") - const raw_et: u32 = inputs[0].type.?.tensor_type.?.elem_type; - const int_val = @as(i32, @intCast(raw_et)); - const input_dt = @as(DataType, @enumFromInt(int_val)); - // Store the calculated DataType globally - networkInputDataType = input_dt; - networkInputTypeString = try utils.getTypeString(input_dt); - - //setting the output - const outputs = model.graph.?.outputs; - globals_log.info("\n SETTING networkOutput \n name = {s} \n shape={any}", .{ outputs[0].name.?, outputs[0].type.?.tensor_type.?.shape.?.shape }); - networkOutput.name = outputs[0].name.?; - networkOutput.shape = outputs[0].type.?.tensor_type.?.shape.?.shape; - - // Use -Dshape if provided, otherwise keep the ONNX model's shape - if (parsedInputshape.len > 0) { - networkInput.shape = parsedInputshape; - } else if (networkInput.shape.len == 0) { - globals_log.warn("\n\n ERROR: \n Input shape is necessary to proceed! \n Ensure that the onnx model has one or compile with -Dshape=''''", .{}); - return error.NoInputShape; - } - - // Print the final input details AFTER potentially overriding shape - globals_log.info("\n FINAL networkInput \n name = {s} \n shape={any}", .{ networkInput.name, networkInput.shape }); - - //create the hashMap - try populateReadyTensorHashMap(model); - - //create the ReadyGraph - try populateReadyGraph(model); - // Initialize the tensor use counts (number of times each tensor is consumed) - tensorUseCount.deinit(); - tensorUseCount = std.StringHashMap(usize).init(allocator); - for (readyGraph.items) |*node| { - for (node.inputs.items) |input_opt| { - if (input_opt) |input| { - const name = input.name; - if (name.len > 0) { - const old_count = if (tensorUseCount.getPtr(name)) |ptr| ptr.* else 0; - try tensorUseCount.put(name, old_count + 1); - } - } - } - } - - globals_log.info("\n NODE: {s}", .{model.graph.?.nodes[0].output[0]}); -} - -// ----------------------- HASH MAP ----------------------- -// Populates tensorHashMap with the tensors used in the onnx graph, where the key is the name of the tensor -fn populateReadyTensorHashMap(model: ModelOnnx) !void { - const protoGraph = try if (model.graph) |graph| graph else error.GraphNotAvailable; - - //adding initializers to the hash map - for (protoGraph.initializers) |init_ptr| { - //create the readyTensor - var readyTensor: ReadyTensor = try ReadyTensor.createInitializer(init_ptr); - readyTensor.dtype = init_ptr.data_type; - //add the readyTensor to the HashMap - try tensorHashMap.put(readyTensor.name, readyTensor); - } - - //adding all the nodes inputs and outputs - for (protoGraph.nodes) |node| { //for each NodeProto in the GraphProto - for (node.input) |input_name| { - try addToTensorHashMap(input_name, node, protoGraph); - } - for (node.output) |output_name| { - try addToTensorHashMap(output_name, node, protoGraph); - } - } -} - -pub fn addToTensorHashMap(name: []const u8, nodeProto: *NodeProto, graph: *GraphProto) !void { - if (tensorHashMap.get(name) != null or std.mem.eql(u8, name, "")) { - return; - } else { - var readyTensor: ReadyTensor = undefined; - var tensor_dtype: DataType = .UNDEFINED; - - //if input - if (utils.isInput(name)) { - readyTensor = try ReadyTensor.createInput(name); - // Find dtype from graph inputs - // Attempt to read the data type from graph inputs - for (graph.inputs) |graph_input| { - if (std.mem.eql(u8, graph_input.name.?, name)) { - const raw_et: u32 = graph_input.type.?.tensor_type.?.elem_type; - const int_val_in = @as(i32, @intCast(raw_et)); - tensor_dtype = @as(DataType, @enumFromInt(int_val_in)); - break; - } - } - } - //if constant, pay attention, we add the Constatant only if it is a TENSOR (aka AttributeProto.t) - else if (std.mem.eql(u8, nodeProto.op_type, "Constant")) { - //add the readyTensor to the HashMap - if (nodeProto.attribute.len > 0 and nodeProto.attribute[0].type == onnx.AttributeType.TENSOR) { - const const_tensor_proto = nodeProto.attribute[0].t.?; - readyTensor = try ReadyTensor.createConstant(name, const_tensor_proto); - tensor_dtype = const_tensor_proto.data_type; - } else { - // Handle non-tensor constants if necessary, or assume LINK for now - readyTensor = try ReadyTensor.createLink(name); - // Try to find dtype from value_info for non-tensor constants if needed - // Try to infer dtype from value_info - for (graph.value_info) |vi| { - if (vi.name) |vi_name| { - if (std.mem.eql(u8, vi_name, name)) { - if (vi.type) |t| { - if (t.tensor_type) |tt| { - const raw_et_vi_link = tt.elem_type; - const int_val_vi_link = @as(i32, @intCast(raw_et_vi_link)); - tensor_dtype = @as(DataType, @enumFromInt(int_val_vi_link)); - break; - } - } - break; - } - } - } - } - } - //else default (LINK) - else { - readyTensor = try ReadyTensor.createLink(name); - // Find dtype from value_info for LINK tensors - var found_in_value_info = false; - // Also check value_info for LINK tensors - for (graph.value_info) |vi| { - // Check if vi.name matches and is not null - if (vi.name) |vi_name| { - if (std.mem.eql(u8, vi_name, name)) { - // Safely access type and tensor_type - if (vi.type) |t| { - if (t.tensor_type) |tt| { - const raw_et_vi_link = tt.elem_type; - const int_val_vi_link = @as(i32, @intCast(raw_et_vi_link)); - tensor_dtype = @as(DataType, @enumFromInt(int_val_vi_link)); - found_in_value_info = true; - // Found the type, exit the loop - break; - } - } - // Break if name matches, even if type info wasn't found/complete - break; - } - } - } - // Also check graph outputs if not found in value_info - if (!found_in_value_info) { - // Finally check graph outputs - for (graph.outputs) |graph_output| { - // Check if graph_output.name matches and is not null - if (graph_output.name) |output_name| { - if (std.mem.eql(u8, output_name, name)) { - // Safely access type and tensor_type - if (graph_output.type) |t| { - if (t.tensor_type) |tt| { - const raw_et_out = tt.elem_type; - const int_val_out = @as(i32, @intCast(raw_et_out)); - tensor_dtype = @as(DataType, @enumFromInt(int_val_out)); - // Found the type, exit the loop - break; - } - } - // Break if name matches, even if type info wasn't found/complete - break; - } - } - } - } - - // --- START HEURISTIC FALLBACK FOR SHAPE TENSORS --- - // If type is still undefined, check common shape tensor naming patterns - if (tensor_dtype == .UNDEFINED) { - if (std.mem.endsWith(u8, name, "_shape")) { - globals_log.info("\nINFO: Tensor '{s}' type is UNDEFINED. Defaulting to INT64 based on name pattern (likely a shape tensor).", .{name}); - tensor_dtype = .INT64; // Default to INT64 for likely shape tensors - } - } - // --- END HEURISTIC FALLBACK --- - } - - // --- START TYPE OVERRIDE FOR SPECIFIC OPS --- - if (std.mem.eql(u8, nodeProto.op_type, "DynamicQuantizeLinear")) { - if (nodeProto.output.len >= 3) { // Check if node has expected outputs - if (std.mem.eql(u8, name, nodeProto.output[0])) { - globals_log.info("\nINFO: Overriding dtype for DynamicQuantizeLinear output y '{s}' to UINT8.", .{name}); - tensor_dtype = .UINT8; // y output is u8 - } else if (std.mem.eql(u8, name, nodeProto.output[1])) { - globals_log.info("\nINFO: Overriding dtype for DynamicQuantizeLinear output y_scale '{s}' to FLOAT.", .{name}); - tensor_dtype = .FLOAT; // y_scale output is f32 - } else if (std.mem.eql(u8, name, nodeProto.output[2])) { - globals_log.info("\nINFO: Overriding dtype for DynamicQuantizeLinear output y_zero_point '{s}' to UINT8.", .{name}); - tensor_dtype = .UINT8; // y_zero_point output is u8 - } - } - } - // Add specific override for ConvInteger output type - else if (std.mem.eql(u8, nodeProto.op_type, "ConvInteger")) { - if (nodeProto.output.len > 0 and std.mem.eql(u8, name, nodeProto.output[0])) { - globals_log.info("\nINFO: Overriding dtype for ConvInteger output '{s}' to INT32.", .{name}); - tensor_dtype = .INT32; // ConvInteger output is always i32 - } - } - // Add overrides for other ops if needed (e.g., Cast might benefit too) - // --- END TYPE OVERRIDE FOR SPECIFIC OPS --- - - if (tensor_dtype == .UNDEFINED) { - globals_log.warn("\nWARNING: Could not determine dtype for tensor '{s}' (Node: {s}). Defaulting to FLOAT.", .{ name, nodeProto.name orelse "unnamed" }); - // Assign a default type instead of leaving it undefined - tensor_dtype = .FLOAT; - // Optionally return an error here if type is mandatory - // return error.DataTypeNotFoundForTensor; - } - - readyTensor.dtype = tensor_dtype; - //add the readyTensor to the HashMap - try tensorHashMap.put(name, readyTensor); - } -} - -// ----------------------- READY GRAPH ----------------------- -// Creates a graph representation with all nodes in a ready-to-compute state -fn populateReadyGraph(model: ModelOnnx) !void { - const graph = try if (model.graph) |graph| graph else error.GraphNotAvailable; - - for (graph.nodes) |node_ptr| { //for each NodeProto in the GraphProto - - try readyGraph.append(try ReadyNode.create(node_ptr)); - } -} -// Decrements the remaining use count for a tensor. Returns updated count (0 if none or unknown). -pub fn decrementUseCount(name: []const u8) usize { - if (tensorUseCount.getPtr(name)) |ptr| { - ptr.* -= 1; - return ptr.*; - } else { - return 0; - } -} diff --git a/src/CodeGen/main.zig b/src/CodeGen/main.zig deleted file mode 100644 index f56798c4..00000000 --- a/src/CodeGen/main.zig +++ /dev/null @@ -1,57 +0,0 @@ -const std = @import("std"); -const zant = @import("zant"); -const onnx = zant.onnx; -const Tensor = zant.core.tensor.Tensor; -const tensorMath = zant.core.tensor.math_standard; -const allocator = zant.utils.allocator.allocator; -const codeGen = @import("codegen.zig"); -const codeGen_utils = codeGen.utils; -const codeGen_init = codeGen.parameters; -const codeGen_mathHandl = codeGen.math_handler; -const codeGen_predict = codeGen.predict; -const codeGen_tests = codeGen.tests; - -const codegen_options = @import("codegen_options"); -const globals = codeGen.globals; - -pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - defer _ = gpa.deinit(); - const gpa_allocator = gpa.allocator(); - - const model_name = codegen_options.model; - const model_path = codegen_options.model_path; - - var model = try onnx.parseFromFile(gpa_allocator, model_path); - defer model.deinit(gpa_allocator); - - model.print(); - - // Create the generated model directory if not present - const generated_path = codegen_options.generated_path; - //const generated_path = "src/codeGen/"; - try std.fs.cwd().makePath(generated_path); - - // ONNX model parsing - try globals.setGlobalAttributes(model); - - //DEBUG - //utils.printTensorHashMap(tensorHashMap); - - //DEBUG - //try utils.printOperations(model.graph.?); - - //DEBUG - //try utils.printNodeList(readyGraph); - - ////////////////////////////////////////// - - // Create the code for the model - try codeGen.skeleton.writeZigFile(model_name, generated_path, model, true); - - // Test the generated code - try codeGen_tests.writeTestFile(model_name, generated_path); - - //PRINTING DETAILS OF THE MODEL - try onnx.printModelDetails(&model); -} diff --git a/src/CodeGen/math_handler.zig b/src/CodeGen/math_handler.zig deleted file mode 100644 index ed6d4419..00000000 --- a/src/CodeGen/math_handler.zig +++ /dev/null @@ -1,3666 +0,0 @@ -const std = @import("std"); -const os = std.os; - -const zant = @import("zant"); - -const Tensor = zant.core.tensor.Tensor; -const tensorMath = zant.core.tensor.math_standard; -const onnx = zant.onnx; -const ModelOnnx = onnx.ModelProto; -const DataType = onnx.DataType; -const allocator = zant.utils.allocator.allocator; - -const mathHandler_log = std.log.scoped(.mathHandler); - -// --- proto libs -const TensorProto = onnx.TensorProto; -const NodeProto = onnx.NodeProto; -const GraphProto = onnx.GraphProto; -const AttributeType = onnx.AttributeType; - -// --- codeGen libs -const ReadyNode = @import("globals.zig").ReadyNode; -const ReadyTensor = @import("globals.zig").ReadyTensor; -const codegen = @import("codegen.zig"); -const utils = codegen.utils; -const parameters = codegen.parameters; -const codegen_options = @import("codegen_options"); -const globals = @import("globals.zig"); - -// ----------------------------------- MATH ----------------------------------- - -/// This method map and write the ONNX operations with the Zant LeanTensorMath mathods -/// Follow the link for details: https://onnx.ai/onnx/operators/?utm_source=chatgpt.com -pub fn write_math_op(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // Dynamic allocation of intermediate output tensors if requested - if (codegen_options.dynamic) { - for (node.outputs.items) |output| { - const san_name = try utils.getSanitizedName(output.name); - const type_str = try utils.getTypeString(output.dtype); - const dims = output.shape; - // Emit shape constant for this output - _ = try writer.print(" var shape_{s} = [_]usize{{", .{san_name}); - for (dims, 0..) |dim, i| { - if (i != 0) _ = try writer.print(", ", .{}); - _ = try writer.print("{d}", .{dim}); - } - _ = try writer.print("}};", .{}); - // Allocate tensor on heap - _ = try writer.print(" var tensor_{s} = Tensor({s}).fromShape(&allocator, &shape_{s}) catch return;", .{ san_name, type_str, san_name }); - // Defer deinitialization ONLY if it's not the final network output - if (!std.mem.eql(u8, output.name, globals.networkOutput.name)) { - _ = try writer.print(" defer tensor_{s}.deinit();\n", .{san_name}); - } - } - } - if (codegen_options.comm) { - try write_op_info(writer, node); - } - if (codegen_options.log) { - try writer.print( - \\ - \\ - \\ if (log_function) |log| {{ - \\ log(@constCast(@ptrCast("Running {s} operation...\n"))); - \\ }} - , .{node.*.nodeProto.*.op_type}); - } - - if (std.mem.eql(u8, node.nodeProto.op_type, "Add")) { - try write_add(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "AveragePool")) { - try write_averagePool(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "BatchNormalization")) { - try write_BatchNormalization(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Ceil")) { - try write_ceil(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Clip")) { - try write_clip(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Concat")) { - try write_concat(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Constant")) { - try write_constant(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Conv")) { - try write_conv(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "ConvInteger")) { - try write_convInteger(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Div")) { - try write_div(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "DynamicQuantizeLinear")) { - try write_dynamicQuantizeLinear(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Elu")) { - try write_elu(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Flatten")) { - try write_flatten(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Floor")) { - try write_floor(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Squeeze")) { - try write_squeeze(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Gather")) { - try write_gather(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Gemm")) { - try write_gemm(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Gelu")) { - try write_gelu(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Identity")) { - try write_identity(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "LeakyRelu")) { - try write_leaky_relu(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "LogSoftmax")) { - try writer.writeAll("// Handle LogSoftmax\n"); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "MatMul")) { - try write_matmul(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "MaxPool")) { - try write_maxPool(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Mul")) { - try write_mul(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Neg")) { - try write_neg(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "OneHot")) { - try write_oneHot(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Pad")) { - try write_pads(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "ReduceMean")) { - try write_reduceMean(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Relu")) { - try write_ReLU(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Reshape")) { - try write_reshape(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Resize")) { - try write_resize(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Sigmoid")) { - try write_sigmoid(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Softmax")) { - try write_softmax(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Slice")) { - try write_slice(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Split")) { - try write_split(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Sqrt")) { - try write_sqrt(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Sub")) { - try write_sub(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Sum")) { - try write_sum(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Tanh")) { - try write_tanh(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Transpose")) { - try write_transpose(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Shape")) { - try write_shape(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Unsqueeze")) { - try write_unsqueeze(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Mean")) { - try write_mean(writer, node); - } else if (std.mem.eql(u8, node.nodeProto.op_type, "Cast")) { - try write_cast(writer, node); - } else { - // Stub for unsupported operations: generate unreachable at runtime - _ = try writer.print( - \\ - \\ // Operation {s} not supported, inserting stub - \\ unreachable("Unsupported op: {s}"); - , .{ node.nodeProto.op_type, node.nodeProto.op_type }); - return; - } - - try writer.writeAll(" catch return;"); -} - -fn write_op_info(writer: std.fs.File.Writer, node: *ReadyNode) !void { - try writer.print( - \\ - \\ - \\ //forwarding operation : {s} - \\ //parameters: - \\ // inputs: - , .{node.*.nodeProto.*.op_type}); - - //write the inputs - for (node.inputs.items) |input| { - try writer.print( - \\ - \\ // -> {s} - , .{input.?.name}); - } - try writer.print( - \\ - \\ // outputs: - , .{}); - - //write the outputs - for (node.outputs.items) |output| { - try writer.print( - \\ - \\ // <- {s} - , .{output.name}); - } -} - -inline fn write_add(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Add.html - // INPUTS: - // - A (heterogeneous) - T: First operand. - // - B (heterogeneous) - T: Second operand. - // OUTPUTS: - // - C (heterogeneous) - T: Result, has same element type as two inputs. - - //----create tensor_A_string - var tensor_A_string: []u8 = undefined; - defer allocator.free(tensor_A_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name), ")" }); - } - - //----create tensor_B_string - var tensor_B_string: []u8 = undefined; - defer allocator.free(tensor_B_string); - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[1].?.name), - ")", - }); - } else { - tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[1].?.name), ")" }); - } - - _ = try writer.print( - \\ - \\ - \\ tensMath.sum_tensors_lean(T, T, {s}, {s}, &tensor_{s}) - , .{ - tensor_A_string, // Input tensor A - tensor_B_string, // Input tensor B - try utils.getSanitizedName(node.outputs.items[0].name), // Output tensor C - }); -} - -inline fn write_BatchNormalization(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__BatchNormalization.html - // INPUTS: - // - X (heterogeneous) - T: Input data tensor from the previous operator; dimensions are in the form of (N x C x D1 x D2 … Dn), where N is the batch size, C is the number of channels. Statistics are computed for every channel of C over N and D1 to Dn dimensions. For image data, input dimensions become (N x C x H x W). The op also accepts single dimension input of size N in which case C is assumed to be 1 - // - scale (heterogeneous) - T1: Scale tensor of shape ©. - // - B (heterogeneous) - T1: Bias tensor of shape ©. - // - input_mean (heterogeneous) - T2: running (training) or estimated (testing) mean tensor of shape ©. - // - input_var (heterogeneous) - T2: running (training) or estimated (testing) variance tensor of shape ©. - // OUTPUT: - // - Y (heterogeneous) - T: The output tensor of the same shape as X - // ATTRIBUTES: - // - epsilon - FLOAT (default is '1e-05'): The epsilon value to use to avoid division by zero. - // - momentum - FLOAT (default is '0.9'): Factor used in computing the running mean and variance.e.g., running_mean = running_mean * momentum + mean * (1 - momentum). - // - training_mode - INT (default is '0'): If set to true, it indicates BatchNormalization is being used for training, and outputs 1 and 2 are to be computed. - - var epsilon: f32 = 1e-05; - var momentum: f32 = 0.9; - // var training_mode: bool = false; -> NOT USED, ALWAYS FALSE for Zant - - for (node.nodeProto.attribute) |attr| { - if (std.mem.indexOf(u8, attr.name, "epsilon")) |_| { - if (attr.type == AttributeType.FLOAT) epsilon = attr.f else return error.BatchNorm_epsilon_NotFloat; - } else if (std.mem.indexOf(u8, attr.name, "momentum")) |_| { - if (attr.type == AttributeType.FLOAT) momentum = attr.f else return error.BatchNorm_momentum_NotFloat; - } else if (std.mem.indexOf(u8, attr.name, "training_mode")) |_| { - if (attr.type == AttributeType.INT) if (attr.i != 0) return error.BatchNorm_training_NotAvailable; - } - } - - //----create tensor_X_string - var tensor_X_string: []u8 = undefined; - defer allocator.free(tensor_X_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_X_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_X_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name), ")" }); - } - - //----create tensor_scale_string - var tensor_scale_string: []u8 = undefined; - defer allocator.free(tensor_scale_string); - - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - tensor_scale_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[1].?.name), - ")", - }); - } else { - tensor_scale_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[1].?.name), ")" }); - } - - //----create tensor_B_string - var tensor_B_string: []u8 = undefined; - defer allocator.free(tensor_B_string); - - if (node.inputs.items[2].?.tag == globals.TensorTag.INITIALIZER) { - tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[2].?.name), - ")", - }); - } else { - tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[2].?.name), ")" }); - } - - //----create tensor_input_mean_string - var tensor_input_mean_string: []u8 = undefined; - defer allocator.free(tensor_input_mean_string); - - if (node.inputs.items[3].?.tag == globals.TensorTag.INITIALIZER) { - tensor_input_mean_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[3].?.name), - ")", - }); - } else { - tensor_input_mean_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[3].?.name), ")" }); - } - - //----create tensor_input_var_string - var tensor_input_var_string: []u8 = undefined; - defer allocator.free(tensor_input_var_string); - - if (node.inputs.items[4].?.tag == globals.TensorTag.INITIALIZER) { - tensor_input_var_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[4].?.name), - ")", - }); - } else { - tensor_input_var_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[4].?.name), ")" }); - } - - // pub inline fn batchNormalization_lean( comptime T: anytype, comptime T1: anytype, comptime T2: anytype, input: *Tensor(T), scales: *Tensor(T1), B: *Tensor(T1), input_mean: Tensor(T2), input_var: Tensor(T2), epsilon: f32, momentum: f32, training_mode: bool, output: *Tensor(T)) - _ = try writer.print( - \\ - \\ - \\ tensMath.batchNormalization_lean( - \\ {s}, //type 0 - \\ {s}, //type 1 - \\ {s}, //type 2 - \\ {s}, //input - \\ {s}, //scales - \\ {s}, //B - \\ {s}, //input_mean - \\ {s}, //input_var - \\ {}, //epsilon - \\ {}, //momentum - \\ false, //training_mode - \\ &tensor_{s}, //output - \\ ) - , .{ - try getSafeTensorTypeString(node.inputs.items[0].?, node.nodeProto.name orelse "UnnamedBatchNormInput0"), // MODIFIED: Use helper for input X type - try getSafeTensorTypeString(node.inputs.items[1].?, node.nodeProto.name orelse "UnnamedBatchNormInput1"), // MODIFIED: Use helper for input scale type - try getSafeTensorTypeString(node.inputs.items[3].?, node.nodeProto.name orelse "UnnamedBatchNormInput3"), // MODIFIED: Use helper for input mean/var type (check ONNX spec for correct index if this is not mean's type) - tensor_X_string, - tensor_scale_string, - tensor_B_string, - tensor_input_mean_string, - tensor_input_var_string, - epsilon, - momentum, - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} - -inline fn write_oneHot(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__OneHot.html - // INPUTS: - // - indices (heterogeneous) - T1: Tensor of indices. - // - depth (heterogeneous) - T2: Scalar tensor for depth. - // - values (heterogeneous) - T3: Tensor of shape [off_value, on_value]. - // OUTPUT: - // - output (heterogeneous) - T3: Output tensor with one-hot encoding. - // ATTRIBUTES: - // - axis - INT (default is -1): Axis along which to add the one-hot dimension. - - var axis: i64 = -1; // Default axis per ONNX - for (node.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "axis")) { - if (attr.type != AttributeType.INT) return error.InvalidAxisType; - axis = attr.i; - } - } - - //----create indices string - var indices_string: []u8 = undefined; - defer allocator.free(indices_string); - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - indices_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - indices_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(&tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } - - //----create depth string - var depth_string: []u8 = undefined; - defer allocator.free(depth_string); - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - depth_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[1].?.name), - ")", - }); - } else { - depth_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(&tensor_", - try utils.getSanitizedName(node.inputs.items[1].?.name), - ")", - }); - } - - //----create values string - var values_string: []u8 = undefined; - defer allocator.free(values_string); - if (node.inputs.items[2].?.tag == globals.TensorTag.INITIALIZER) { - values_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[2].?.name), - ")", - }); - } else { - values_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(&tensor_", - try utils.getSanitizedName(node.inputs.items[2].?.name), - ")", - }); - } - - _ = try writer.print( - \\ - \\ - \\ tensMath.oneHot_lean( - \\ {s}, // T - \\ {s}, // indices - \\ {s}.data[0], // depth (scalare) - \\ {s}, // values - \\ {}, // axis - \\ &tensor_{s}, // output - \\ ) - , .{ - try utils.getTypeString(globals.tensorHashMap.getPtr(node.inputs.items[2].?.name).?.tensorProto.?.data_type), // T - indices_string, // indices - depth_string, // depth - values_string, // values - axis, // axis - try utils.getSanitizedName(node.outputs.items[0].name), // output - }); -} - -inline fn write_sub(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Sub.html - // INPUTS: - // - A (heterogeneous) - T: First operand. - // - B (heterogeneous) - T: Second operand. - // OUTPUTS: - // - C (heterogeneous) - T: Result, has same element type as two inputs. - - //----create tensor_A_string - var tensor_A_string: []u8 = undefined; - defer allocator.free(tensor_A_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name), ")" }); - } - - //----create tensor_B_string - var tensor_B_string: []u8 = undefined; - defer allocator.free(tensor_B_string); - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[1].?.name), - ")", - }); - } else { - tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[1].?.name), ")" }); - } - - _ = try writer.print( - \\ - \\ tensMath.sub_tensors_lean(T, T, {s}, ({s}), &tensor_{s}) - , .{ - tensor_A_string, // Input tensor A - tensor_B_string, // Input tensor B - try utils.getSanitizedName(node.outputs.items[0].name), // Output tensor C - }); -} - -inline fn write_conv(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Conv.html - // INPUTS: - // - X (heterogeneous) - T: Input data tensor - // - W (heterogeneous) - T: The weight tensor - // - B (optional, heterogeneous) - T: Optional 1D bias to be added to the convolution, has size of M. - // OUTPUTS: - // - Y (heterogeneous) - T: Output data tensor that contains the result of the convolution - // ATTRIBUTES: - // - auto_pad - STRING (default is 'NOTSET'): auto_pad must be either NOTSET, SAME_UPPER, SAME_LOWER or VALID. Where default value is NOTSET - // - dilations - INTS : dilation value along each spatial axis of the filter. If not present, the dilation defaults is 1 along each spatial axis. - // - group - INT (default is '1'): number of groups input channels and output channels are divided into - // - kernel_shape - INTS : The shape of the convolution kernel. If not present, should be inferred from input W - // - pads - INTS : Padding for the beginning and ending along each spatial axis, it can take any value greater than or equal to 0. - // - strides - INTS : Stride along each spatial axis. If not present, the stride defaults is 1 along each spatial axis. - - var auto_pad: []const u8 = "NOTSET"; - var dilations: ?[]i64 = null; - var group: i64 = 1; - var kernel_shape: ?[]i64 = null; - var pads: ?[]i64 = null; - var strides: ?[]i64 = null; //mandatory - - for (node.nodeProto.attribute) |attr| { - if (std.mem.indexOf(u8, attr.name, "auto_pad")) |_| { - if (attr.type == AttributeType.STRING) auto_pad = attr.s else return error.ConvAuto_padNotSTRING; - } else if (std.mem.indexOf(u8, attr.name, "dilations")) |_| { - if (attr.type == AttributeType.INTS) dilations = attr.ints else return error.ConvDilatationNoINTS; - } else if (std.mem.indexOf(u8, attr.name, "group")) |_| { - if (attr.type == AttributeType.INT) group = attr.i else return error.ConvGroupNotINT; - } else if (std.mem.indexOf(u8, attr.name, "kernel_shape")) |_| { - if (attr.type == AttributeType.INTS) kernel_shape = attr.ints else return error.ConvKernelShapeNotINTS; - } else if (std.mem.indexOf(u8, attr.name, "pads")) |_| { - if (attr.type == AttributeType.INTS) pads = attr.ints else return error.ConvPadsNotINTS; - } else if (std.mem.indexOf(u8, attr.name, "strides")) |_| { - if (attr.type == AttributeType.INTS) strides = attr.ints else return error.ConvStridesNotINTS; - } - } - - //----create tensor_X_string - var tensor_X_string: []u8 = undefined; - defer allocator.free(tensor_X_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_X_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_X_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name), ")" }); - } - - //----create tensor_W_string - var tensor_W_string: []u8 = undefined; - defer allocator.free(tensor_W_string); - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - tensor_W_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[1].?.name), - ")", - }); - } else { - tensor_W_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[1].?.name), ")" }); - } - - //----create ?bias string - var bias_string: []u8 = undefined; - // Bias Tensor B is optional! verify the presence - if (node.inputs.items.len == 3) { - const B_name = try utils.getSanitizedName(node.inputs.items[2].?.name); - bias_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(¶m_lib.tensor_", B_name, ")" }); - } else { - bias_string = try std.mem.concat(allocator, u8, &[_][]const u8{"null"}); - } - - //----create stride string (mandatory) - // TODO: implement default stride, see docs above - if (strides == null) return error.StrideNotFound; - const stride_string: []const u8 = try utils.i64SliceToUsizeArrayString(strides.?); - - //----create ?pads string - var pads_string: []const u8 = "null"; - if (pads != null) { - if (pads.?.len > 0) { // Check if the slice is actually non-empty - pads_string = try utils.i64SliceToUsizeArrayString(pads.?); - // Assuming no allocation needed to be freed, following write_conv - } else { - pads_string = "&[_]usize{}"; // Use explicit empty slice literal if input slice is empty - } - } // else pads_string remains "null" - - //----create ?dilatations string - var dilat_string: []const u8 = "null"; - if (dilations != null) { - if (dilations.?.len > 0) { - dilat_string = try utils.i64SliceToUsizeArrayString(dilations.?); - } else { - dilat_string = "&[_]usize{}"; - } - } // else dilat_string remains "null" - - // pub fn OnnxConvLean(comptime T: type, input: *Tensor(T), kernel: *Tensor(T), output: *Tensor(T), bias: ?*const Tensor(T), stride: []const usize, pads: ?[]const usize, dilations: ?[]const usize, group: ?usize, auto_pad: ?[]const u8) !void - _ = try writer.print( - \\ - \\ - \\ tensMath.conv_lean( - \\ T, //type - \\ {s}, //input - \\ {s}, //kernel - \\ &tensor_{s}, //output - \\ {s}, //bias - \\ {s}, //stride - \\ {s}, //pads - \\ {s}, //dilatations - \\ {}, //group - \\ "{s}", //auto_pad - \\ ) - , .{ - tensor_X_string, //Input - tensor_W_string, //Kernel - try utils.getSanitizedName(node.outputs.items[0].name), //Output - bias_string, //Bias - stride_string, //Strides - pads_string, //Pads - dilat_string, //Dilatations - group, //Group - auto_pad, //auto_pad - }); -} - -inline fn write_concat(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Concat.html - // INPUTS: - // - inputs (variadic, heterogeneous) - T: List of tensors for concatenation - // OUTPUTS: - // - concat_result (heterogeneous) - T: Concatenated tensor - // ATTRIBUTES: - // - axis (int, required): Which axis to concat on - - // Get the axis attribute - var axis: i64 = 0; - var axis_found = false; - - for (node.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "axis")) { - if (attr.type == AttributeType.INT) { - axis = attr.i; - axis_found = true; - } else { - return error.ConcatAxisNotINT; - } - } - } - - if (!axis_found) { - return error.ConcatAxisNotFound; - } - - // Special case for axis 0 with different ranks - if (axis == 0) { - // Find if there are tensors with different ranks - var has_different_ranks = false; - const first_rank = node.inputs.items[0].?.shape.len; - - for (node.inputs.items[1..]) |input| { - if (input.?.shape.len != first_rank) { - has_different_ranks = true; - break; - } - } - - if (has_different_ranks) { - _ = try writer.print( - \\ - \\ // Special case for concatenation along axis 0 with different ranks - \\ // This requires custom handling as the standard concatenate function expects same rank - \\ mathHandler_log.warn("\\nWarning: Concatenating tensors with different ranks along axis 0\\n", .{{}}); - \\ - \\ // Create a list of tensors to concatenate - \\ var concat_tensor_list_{s} = [_]Tensor(T){{ - , .{try utils.getSanitizedName(node.outputs.items[0].name)}); - - for (node.inputs.items, 0..) |input, idx| { - if (idx > 0) { - _ = try writer.print(", ", .{}); - } - - var tensor_string: []u8 = undefined; - defer allocator.free(tensor_string); - if (input.?.tag == globals.TensorTag.INITIALIZER) { - tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(input.?.name), - ")", - }); - } else { - tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(input.?.name) }); - } - _ = try writer.print("{s}", .{tensor_string}); - } - - _ = try writer.print( - \\}}; - \\ - \\ // Perform concatenation with special handling for different ranks - \\ try tensMath.concatenate_lean(T, &allocator, &concat_tensor_list_{s}, {},tensor_{s}) - , .{ - try utils.getSanitizedName(node.outputs.items[0].name), - axis, - try utils.getSanitizedName(node.outputs.items[0].name), - }); - - return; - } - } - - // Standard case: all tensors have the same rank - // Create a tensor list with all input tensors - _ = try writer.print( - \\ - \\ // Create a list of tensors to concatenate - \\ var concat_tensor_list_{s} = [_]Tensor(T){{ - , .{try utils.getSanitizedName(node.outputs.items[0].name)}); - - for (node.inputs.items, 0..) |input, idx| { - if (idx > 0) { - _ = try writer.print(", ", .{}); - } - - if (input.?.tag == globals.TensorTag.INITIALIZER) { - _ = try writer.print("param_lib.tensor_{s}", .{try utils.getSanitizedName(input.?.name)}); - } else { - _ = try writer.print("tensor_{s}", .{try utils.getSanitizedName(input.?.name)}); - } - } - - _ = try writer.print( - \\}}; - \\ - \\ // Perform concatenation - \\ tensMath.concatenate_lean(T, &allocator, &concat_tensor_list_{s}, {}, &tensor_{s} ) - , .{ - try utils.getSanitizedName(node.outputs.items[0].name), - axis, - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} - -inline fn write_constant(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Constant.html - // Outputs: - // - output (heterogeneous) - T: Output tensor containing the same value of the provided tensor. - // Attributes - only one of these should be specified: - // - value (TENSOR): The value for the elements of the output tensor. - // - sparse_value (SPARSE_TENSOR): The value for the elements of the output tensor in sparse format. - // - value_float (FLOAT): The value for the sole element for the scalar, float32, output tensor. - // - value_floats (FLOATS): The values for the elements for the 1D, float32, output tensor. - // - value_int (INT): The value for the sole element for the scalar, int64, output tensor. - // - value_ints (INTS): The values for the elements for the 1D, int64, output tensor. - // - value_string (STRING): The value for the sole element for the scalar, UTF-8 string, output tensor. - // - value_strings (STRINGS): The values for the elements for the 1D, UTF-8 string, output tensor. - - const output_name = try utils.getSanitizedName(node.outputs.items[0].name); - - for (node.nodeProto.attribute) |attr| { - if (attr.type == onnx.AttributeType.TENSOR) { - try writer.print( - \\ - \\ // Constant tensor_{s} already declared and inizialized in predict.zig write_constantTensor() - , .{output_name}); - - return; - } else if (std.mem.eql(u8, attr.name, "value_float")) { - if (attr.type != AttributeType.FLOAT) return error.ConstantAttributeTypeMismatch; - - // Create a scalar tensor with a float value - try writer.print( - \\ - \\ // Initialize scalar float constant - \\ tensor_{s} = Tensor(T).initScalar(&allocator, {d}) catch return; - , .{ output_name, attr.f }); - return; - } else if (std.mem.eql(u8, attr.name, "value_floats")) { - if (attr.type != AttributeType.FLOATS) return error.ConstantAttributeTypeMismatch; - - // Create 1D tensor with float values - try writer.print( - \\ - \\ // Initialize 1D float array constant - \\ const data_{s} = [_]T{{ - , .{output_name}); - - // Write array elements - for (attr.floats, 0..) |val, i| { - if (i > 0) try writer.writeAll(", "); - try writer.print("{d}", .{val}); - } - - try writer.print( - \\ - \\ }}; - \\ tensor_{s} = Tensor(T).fromSlice(&allocator, &data_{s}, &[_]usize{{{d}}}) catch return; - , .{ output_name, output_name, attr.floats.len }); - return; - } else if (std.mem.eql(u8, attr.name, "value_int")) { - if (attr.type != AttributeType.INT) return error.ConstantAttributeTypeMismatch; - - // Create a scalar tensor with an int value - try writer.print( - \\ - \\ // Initialize scalar int constant - \\ tensor_{s} = Tensor(T).initScalar(&allocator, @as(T, @floatFromInt({d}))) catch return; - , .{ output_name, attr.i }); - return; - } else if (std.mem.eql(u8, attr.name, "value_ints")) { - if (attr.type != AttributeType.INTS) return error.ConstantAttributeTypeMismatch; - - // Create 1D tensor with int values - try writer.print( - \\ - \\ // Initialize 1D int array constant - \\ const data_{s} = [_]T{{ - , .{output_name}); - - // Write array elements - for (attr.ints, 0..) |val, i| { - if (i > 0) try writer.writeAll(", "); - try writer.print("@as(T, @floatFromInt({d}))", .{val}); - } - - try writer.print( - \\ - \\ }}; - \\ tensor_{s} = Tensor(T).fromSlice(&allocator, &data_{s}, &[_]usize{{{d}}}) catch return; - , .{ output_name, output_name, attr.ints.len }); - return; - } else if (std.mem.eql(u8, attr.name, "value_string")) { - if (attr.type != AttributeType.STRING) return error.ConstantAttributeTypeMismatch; - - // String constants are not directly supported in this numeric tensor library - try writer.print( - \\ - \\ // String constants are not directly supported in this numeric tensor library - \\ // For now, we'll create a placeholder tensor with a single value - \\ tensor_{s} = Tensor(T).initScalar(&allocator, 0) catch return; - \\ // The actual string value was: "{s}" - , .{ output_name, attr.s }); - return; - } else if (std.mem.eql(u8, attr.name, "value_strings")) { - if (attr.type != AttributeType.STRINGS) return error.ConstantAttributeTypeMismatch; - - // String array constants are not directly supported in this numeric tensor library - try writer.print( - \\ - \\ // String array constants are not directly supported in this numeric tensor library - \\ // For now, we'll create a placeholder tensor with zeros - \\ const data_{s} = [_]T{{ - , .{output_name}); - - // Create a placeholder array of zeros with the same length - for (attr.strings, 0..) |_, i| { - if (i > 0) try writer.writeAll(", "); - try writer.print("0", .{}); - } - - try writer.print( - \\ - \\ }}; - \\ tensor_{s} = Tensor(T).fromSlice(&allocator, &data_{s}, &[_]usize{{{d}}}) catch return; - \\ // Note: This is a placeholder for string values that cannot be directly represented - , .{ output_name, output_name, attr.strings.len }); - return; - } else if (std.mem.eql(u8, attr.name, "sparse_value")) { - // Sparse tensor constants require special handling - try writer.print( - \\ - \\ // Sparse tensor constants are not yet fully supported - \\ // Creating a placeholder tensor for sparse_value - \\ tensor_{s} = Tensor(T).initScalar(&allocator, 0) catch return; - \\ mathHandler_log.warn("Warning: sparse_value attribute used but not fully supported\\n", .{{}}); - , .{output_name}); - return; - } - } - - // If we get here, no valid constant value was found - try writer.writeAll( - \\ - \\ return error.ConstantValueNotFound; - ); -} - -inline fn write_div(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Div.html - // INPUTS: - // - A (heterogeneous) - T: First operand. - // - B (heterogeneous) - T: Second operand. - // OUTPUTS: - // - C (heterogeneous) - T: Result, has same element type as two inputs. - - //----create tensor_A_string - var tensor_A_string: []u8 = undefined; - defer allocator.free(tensor_A_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - - try utils.getSanitizedName(node.inputs.items[0].?.name), - - ")", - }); - } else { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name), ")" }); - } - - //----create tensor_B_string - var tensor_B_string: []u8 = undefined; - defer allocator.free(tensor_B_string); - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - - try utils.getSanitizedName(node.inputs.items[1].?.name), - - ")", - }); - } else { - tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[1].?.name), ")" }); - } - - _ = try writer.print( - \\ - \\ tensMath.div_lean(T, {s}, ({s}), &tensor_{s}) - , .{ - tensor_A_string, // Input tensor A - tensor_B_string, // Input tensor B - try utils.getSanitizedName(node.outputs.items[0].name), // Output tensor C - }); -} - -inline fn write_gather(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Gather.html - // INPUTS: - // - data (heterogeneous) - T: Tensor of rank r >= 1. - // - indices (heterogeneous) - tensor(int64): Tensor of int64 indices, of any rank q. - // OUTPUTS: - // - output (heterogeneous) - T: Tensor of rank q + r - 1. - // ATTRIBUTES: - // - axis (int, default is 0): Which axis to gather on. Negative value means counting dimensions from the back. - - var axis: i64 = 0; - for (node.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "axis")) { - if (attr.type == AttributeType.INT) axis = attr.i; - } - } - - // Create data tensor string - var data_tensor_string: []u8 = undefined; - defer allocator.free(data_tensor_string); - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - data_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - data_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - // Create indices tensor string - const indices_name = try utils.getSanitizedName(node.inputs.items[1].?.name); - var indices_tensor_string: []u8 = undefined; - defer allocator.free(indices_tensor_string); - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - indices_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "param_lib.tensor_", - indices_name, - }); - } else { - indices_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "tensor_", - indices_name, - }); - } - - _ = try writer.print( - \\ - \\ - \\ //creating the indices Tensor(usize) - \\ - \\ const usize_slice_{s} = utils.sliceToUsizeSlice({s}.data); - \\ var usize_tensor_{s} = Tensor(usize).fromConstBuffer(&allocator, usize_slice_{s}, {s}.shape); - \\ defer allocator.free(usize_slice_{s}); - \\ - , .{ - indices_name, //usize_slice_ - indices_tensor_string, //tensor_ - indices_name, //usize_tensor_ - indices_name, //usize_slice_ - indices_tensor_string, //tensor_.shape - indices_name, //usize_slice_ for free - }); - - _ = try writer.print( - \\ - \\ - \\ tensMath.gather_lean( - \\ T, //type - \\ {s}, //data tensor - \\ &usize_tensor_{s}, //indices tensor - \\ {}, //axis - \\ &tensor_{s}, //output tensor - \\ ) - , .{ - data_tensor_string, // Input data tensor - indices_name, // Input indices tensor - axis, // Selected axis - try utils.getSanitizedName(node.outputs.items[0].name), // Output tensor - }); -} - -inline fn write_gemm(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Gemm.html - // INPUTS: - // - Input tensor A. The shape of A should be (M, K) if transA is 0, or (K, M) if transA is non-zero. - // - Input tensor B. The shape of B should be (K, N) if transB is 0, or (N, K) if transB is non-zero. - // - Optional input tensor C. If not specified, the computation is done as if C is a scalar 0. The shape of C should be unidirectional broadcastable to (M, N). - //OUTPUTS: - // - Output tensor of shape (M, N). - // ATTRIBUTES: - // - alpha. FLOAT (default is '1.0'): Scalar multiplier for the product of input tensors A * B. - // - beta - FLOAT (default is '1.0'): Scalar multiplier for input tensor C. - // - transA - INT (default is '0'): Whether A should be transposed - // - transB - INT (default is '0'): Whether B should be transposed - - var alpha: f32 = 1.0; - var beta: f32 = 1.0; - var transA: bool = false; - var transB: bool = false; - - for (node.nodeProto.attribute) |attr| { - if (std.mem.indexOf(u8, attr.name, "alpha")) |_| { - if (attr.type == AttributeType.FLOAT) alpha = attr.f else return error.GemmAphaNotFLOAT; - } else if (std.mem.indexOf(u8, attr.name, "beta")) |_| { - if (attr.type == AttributeType.FLOAT) beta = attr.f else return error.GemmBetaNotFLOAT; - } else if (std.mem.indexOf(u8, attr.name, "transA")) |_| { - if (attr.type == AttributeType.INT) transA = if (attr.i != 0) true else false else return error.GemmTransANotINT; - } else if (std.mem.indexOf(u8, attr.name, "transB")) |_| { - if (attr.type == AttributeType.INT) transB = if (attr.i != 0) true else false else return error.GemmTransBNotINT; - } - } - - //----create tensor_A_string - var tensor_A_string: []u8 = undefined; - defer allocator.free(tensor_A_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - //----create tensor_B_string - var tensor_B_string: []u8 = undefined; - defer allocator.free(tensor_B_string); - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[1].?.name), - ")", - }); - } else { - tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[1].?.name) }); - } - - // Input Tensor C is optional! verify the presence - var tensor_C_string: []u8 = undefined; - if (node.inputs.items.len == 3) { - const sanitized_tensor_C = try utils.getSanitizedName(node.inputs.items[2].?.name); - tensor_C_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(&", - if (globals.tensorHashMap.getPtr(node.inputs.items[2].?.name).?.tag == globals.TensorTag.INITIALIZER) "param_lib." else "", - "tensor_", - sanitized_tensor_C, - ")", - }); - } else { - tensor_C_string = try std.mem.concat(allocator, u8, &[_][]const u8{" null"}); - } - - _ = try writer.print( - \\ - \\ - \\ tensMath.gemm_lean(T, {s}, {s}, {s}, {}, {}, {s}, {s}, &tensor_{s} ) - , .{ - tensor_A_string, // Input tensor A - tensor_B_string, // Input tensor B - tensor_C_string, - alpha, - beta, - if (transA) "true" else "false", - if (transB) "true" else "false", - try utils.getSanitizedName(node.outputs.items[0].name), // Output - }); -} - -inline fn write_matmul(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__MatMul.html - // INPUTS: - // - A (heterogeneous) - T: First operand. - // - B (heterogeneous) - T: Second operand. - // OUTPUTS: - // - C (heterogeneous) - T: Result, has same element type as two inputs. - - //----create tensor_A_string - var tensor_A_string: []u8 = undefined; - defer allocator.free(tensor_A_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - //----create tensor_B_string - var tensor_B_string: []u8 = undefined; - defer allocator.free(tensor_B_string); - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[1].?.name), - ")", - }); - } else { - tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[1].?.name) }); - } - - // Calculate b_width_bytes safely, handling potential null tensorProto - // Get type information for tensor B to estimate element size - const input_B_name = node.inputs.items[1].?.name; - const ready_tensor_B = globals.tensorHashMap.getPtr(input_B_name) orelse { - mathHandler_log.warn("Error: Tensor '{s}' not found in globals.tensorHashMap for MatMul.\n", .{input_B_name}); - return error.TensorNotFound; - }; - - var element_size_bytes: usize = 4; // Default to f32 size as fallback - if (ready_tensor_B.tensorProto) |tp| { - const data_type = tp.data_type; - // Determine size from DataType enum - element_size_bytes = switch (data_type) { - .FLOAT => @sizeOf(f32), - .FLOAT16 => @sizeOf(f16), - .INT64 => @sizeOf(i64), - .INT32 => @sizeOf(i32), - .INT8 => @sizeOf(i8), - .UINT8 => @sizeOf(u8), - // Add other supported types as needed - else => blk: { - mathHandler_log.warn("Warning: Unsupported DataType '{any}' for MatMul input B '{s}'. Assuming f32 size.\n", .{ data_type, input_B_name }); - break :blk 4; - }, - }; - } else { - // Fallback if tensorProto is null - log a warning - mathHandler_log.warn("Warning: TensorProto for MatMul input B '{s}' is null. Assuming f32 size for width calculation.\n", .{input_B_name}); - } - - const b_dims = node.inputs.items[1].?.shape.len; - if (b_dims == 0) { - mathHandler_log.warn("Error: MatMul input B '{s}' has zero dimensions.\n", .{input_B_name}); - return error.InvalidShape; // Avoid panic on empty shape - } - - const b_width_elements: usize = @intCast(node.inputs.items[1].?.shape[b_dims - 1]); - const b_width_bytes: usize = b_width_elements * element_size_bytes; - - if (b_width_bytes >= std.atomic.cache_line) { //B is large enough for the new mat mul to work; - _ = try writer.print( - \\ - \\ tensMath.blocked_mat_mul_lean(T, {s}, {s}, &tensor_{s}) - , .{ - tensor_A_string, // Input tensor A - tensor_B_string, // Input tensor B - try utils.getSanitizedName(node.outputs.items[0].name), // Output tensor C - }); - } else { //B is not large enough, so we keep the old but improved mat_mul - _ = try writer.print( - \\ - \\ tensMath.mat_mul_lean(T, {s}, {s}, &tensor_{s}) - , .{ - tensor_A_string, // Input tensor A - tensor_B_string, // Input tensor B - try utils.getSanitizedName(node.outputs.items[0].name), // Output tensor C - }); - } -} - -inline fn write_maxPool(writer: std.fs.File.Writer, node: *ReadyNode) !void { - //https://onnx.ai/onnx/operators/onnx__MaxPool.html - // INPUTS: - // - X (heterogeneous) - T: Input data tensor - // OUTPUTS: - // - Y (heterogeneous) - T: Output data tensor from average or max pooling across the input tensor. - // - (NOT IMPLEMENTED) Indices (optional, heterogeneous) - I: Indices tensor from max pooling across the input tensor. - // ATTRIBUTES: - // - auto_pad - STRING (default is 'NOTSET'): auto_pad must be either NOTSET, SAME_UPPER, SAME_LOWER or VALID - // - ceil_mode - INT (default is '0'): Whether to use ceil or floor (default) to compute the output shape - // - dilations - INTS : Dilation value along each spatial axis of filter. If not present, the dilation defaults to 1 along each spatial axis - // - kernel_shape - INTS (required) : The size of the kernel along each axis. - // - pads - INTS : Padding for the beginning and ending along each spatial axis, it can take any value greater than or equal to 0. - // - storage_order - INT (default is '0'): The storage order of the tensor. 0 is row major, and 1 is column major. This attribute is used only to convert an n-tuple index value into a single integer value for producing the second output. - // - strides - INTS : Stride along each spatial axis. If not present, the stride defaults to 1 along each spatial axis. - - var auto_pad: []const u8 = "NOTSET"; - - var ceil_mode: i64 = 0; - - var dilations: ?[]i64 = null; - - var kernel_shape: ?[]i64 = null; //mandatory - - var pads: ?[]i64 = null; - - var storage_order: i64 = 0; - - var strides: ?[]i64 = null; - - for (node.nodeProto.attribute) |attr| { - if (std.mem.indexOf(u8, attr.name, "auto_pad")) |_| { - if (attr.type == AttributeType.STRING) auto_pad = attr.s else return error.MaxPoolAuto_padNotSTRING; - } else if (std.mem.indexOf(u8, attr.name, "ceil_mode")) |_| { - if (attr.type == AttributeType.INT) ceil_mode = attr.i else return error.MaxPoolCeil_modeNotINT; - } else if (std.mem.indexOf(u8, attr.name, "dilations")) |_| { - if (attr.type == AttributeType.INTS) dilations = attr.ints else return error.MaxPoolDilatationNoINTS; - } else if (std.mem.indexOf(u8, attr.name, "kernel_shape")) |_| { - if (attr.type == AttributeType.INTS) kernel_shape = attr.ints else return error.MaxPoolKernelShapeNotINTS; - } else if (std.mem.indexOf(u8, attr.name, "pads")) |_| { - if (attr.type == AttributeType.INTS) pads = attr.ints else return error.MaxPoolPadsNotINTS; - } else if (std.mem.indexOf(u8, attr.name, "storage_order")) |_| { - if (attr.type == AttributeType.INT) storage_order = attr.i else return error.MaxPoolStorage_orderNotINT; - } else if (std.mem.indexOf(u8, attr.name, "strides")) |_| { - if (attr.type == AttributeType.INTS) strides = attr.ints else return error.MaxPoolStridesNotINTS; - } - } - - //----create tensor_X_string - var tensor_X_string: []u8 = undefined; - defer allocator.free(tensor_X_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_X_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_X_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - //----create kernel_shape string - var kernel_shape_string: []const u8 = undefined; - if (kernel_shape != null) { - kernel_shape_string = try utils.i64SliceToUsizeArrayString(kernel_shape.?); - } else { - return error.Kernel_shapeNotFound; - } - - //----create strides string - var strides_string: []const u8 = undefined; - if (strides != null) { - strides_string = try utils.i64SliceToUsizeArrayString(strides.?); - } else { - return error.StridesNotFound; - } - - //----create dilations string - var dilations_string: []const u8 = undefined; - if (dilations != null) { - dilations_string = try utils.i64SliceToUsizeArrayString(dilations.?); - } else { - dilations_string = try utils.i64SliceToUsizeArrayString(&[_]i64{ 1, 1, 1, 1 }); // TODO: It is hardcoded in 4D, not the most elegant solution - } - - //----create pads string - var pads_string: []const u8 = undefined; - if (pads != null) { - pads_string = try utils.i64SliceToUsizeArrayString(pads.?); - } else { - return error.PadsNotFound; - } - - _ = try writer.print( - \\ - \\ - \\ tensMath.onnx_maxpool_lean( - \\ T, - \\ {s}, //Input - \\ &tensor_{s}, //Output - \\ {s}, //kernel_shape - \\ {s}, //strides - \\ {s}, //dilations - \\ {s}, //pads - \\ tensMath.AutoPadType.{s}, //auto_pad - \\ ) - , .{ - tensor_X_string, //Input - try utils.getSanitizedName(node.outputs.items[0].name), //Output - kernel_shape_string, //kernel_shape - strides_string, //strides - dilations_string, //dilatations - pads_string, //pads - auto_pad, //auto_pad - }); -} - -inline fn write_averagePool(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__AveragePool.html - // INPUTS: - // - X (heterogeneous) - T: Input data tensor - // OUTPUTS: - // - Y (heterogeneous) - T: Output data tensor from average pooling - // ATTRIBUTES: - // - auto_pad - STRING (default is 'NOTSET'): NOTSET, SAME_UPPER, SAME_LOWER, VALID - // - ceil_mode - INT (default is '0'): Whether to use ceil or floor - // - count_include_pad - INT (default is '0'): Whether to include padding in averaging - // - dilations - INTS: Dilation value along each spatial axis (default 1) - // - kernel_shape - INTS (required): Kernel size along each axis - // - pads - INTS: Padding for each spatial axis - // - strides - INTS: Stride along each spatial axis (default 1) - - mathHandler_log.debug("DEBUG: write_averagePool called for node: {s}\n", .{node.nodeProto.name orelse "unnamed"}); - - var auto_pad: []const u8 = "NOTSET"; - var ceil_mode: i64 = 0; - var count_include_pad: i64 = 0; - var dilations: ?[]i64 = null; - var kernel_shape: ?[]i64 = null; // Obbligatorio - var pads: ?[]i64 = null; - var strides: ?[]i64 = null; - - // Leggi gli attributi - for (node.nodeProto.attribute) |attr| { - if (std.mem.indexOf(u8, attr.name, "auto_pad")) |_| { - if (attr.type == AttributeType.STRING) auto_pad = attr.s else return error.AveragePoolAutoPadNotSTRING; - } else if (std.mem.indexOf(u8, attr.name, "ceil_mode")) |_| { - if (attr.type == AttributeType.INT) ceil_mode = attr.i else return error.MaxPoolCeil_modeNotINT; - } else if (std.mem.indexOf(u8, attr.name, "count_include_pad")) |_| { - if (attr.type == AttributeType.INT) count_include_pad = attr.i else return error.AveragePoolCountIncludePadNotINT; - } else if (std.mem.indexOf(u8, attr.name, "dilations")) |_| { - if (attr.type == AttributeType.INTS) dilations = attr.ints else return error.AveragePoolDilationsNotINTS; - } else if (std.mem.indexOf(u8, attr.name, "kernel_shape")) |_| { - if (attr.type == AttributeType.INTS) kernel_shape = attr.ints else return error.AveragePoolKernelShapeNotINTS; - } else if (std.mem.indexOf(u8, attr.name, "pads")) |_| { - if (attr.type == AttributeType.INTS) pads = attr.ints else return error.AveragePoolPadsNotINTS; - } else if (std.mem.indexOf(u8, attr.name, "strides")) |_| { - if (attr.type == AttributeType.INTS) strides = attr.ints else return error.AveragePoolStridesNotINTS; - } - } - - // Crea tensor_X_string per l'input - var tensor_X_string: []u8 = undefined; - defer allocator.free(tensor_X_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_X_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_X_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "&tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - }); - } - - // Crea stringa per kernel_shape - var kernel_shape_string: []const u8 = undefined; - if (kernel_shape != null) { - kernel_shape_string = try utils.i64SliceToUsizeArrayString(kernel_shape.?); - } else { - return error.Kernel_shapeNotFound; - } - - // Crea stringa per strides - var strides_string: []const u8 = undefined; - if (strides != null) { - strides_string = try utils.i64SliceToUsizeArrayString(strides.?); - } else { - return error.StridesNotFound; - } - - // Crea stringa per dilations - var dilations_string: []const u8 = undefined; - if (dilations != null) { - dilations_string = try utils.i64SliceToUsizeArrayString(dilations.?); - } else { - dilations_string = try utils.i64SliceToUsizeArrayString(&[_]i64{ 1, 1, 1, 1 }); // TODO: Hardcoded in 4D, not the most elegant solution - } - - // Crea stringa per pads - var pads_string: []const u8 = undefined; - if (pads != null) { - pads_string = try utils.i64SliceToUsizeArrayString(pads.?); - } else { - return error.PadsNotFound; - } - - // Scrivi la chiamata a onnx_averagepool_lean - _ = try writer.print( - \\ - \\ - \\ tensMath.onnx_averagepool_lean( - \\ T, - \\ {s}, // Input - \\ &tensor_{s}, // Output - \\ {s}, // kernel_shape - \\ {s}, // strides - \\ {s}, // dilations - \\ {s}, // pads - \\ tensMath.AutoPadType.{s}, // auto_pad - \\ {s}, // count_include_pad - \\ ) - , .{ - tensor_X_string, // Input - try utils.getSanitizedName(node.outputs.items[0].name), // Output - kernel_shape_string, // kernel_shape - strides_string, // strides - dilations_string, // dilations - pads_string, // pads - auto_pad, // auto_pad - if (count_include_pad == 1) "true" else "false", // count_include_pad - }); -} - -inline fn write_mul(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Mul.html - // INPUTS: - // - A (heterogeneous) - T: First operand. - // - B (heterogeneous) - T: Second operand. - // OUTPUTS: - // - C (heterogeneous) - T: Result, has same element type as two inputs. - - //----create tensor_A_string - var tensor_A_string: []u8 = undefined; - defer allocator.free(tensor_A_string); - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - //----create tensor_B_string - var tensor_B_string: []u8 = undefined; - defer allocator.free(tensor_B_string); - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[1].?.name), - ")", - }); - } else { - tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[1].?.name), ")" }); - } - - _ = try writer.print( - \\ - \\ - \\ tensMath.mul_lean(T, {s}, ({s}), &tensor_{s}) - , .{ - tensor_A_string, // Input tensor A - tensor_B_string, // Input tensor B - try utils.getSanitizedName(node.outputs.items[0].name), // Output tensor C - }); -} - -inline fn write_reduceMean(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__ReduceMean.html - // INPUTS: - // - data (heterogeneous) - T: An input tensor. - // - axes (optional, heterogeneous) - tensor(int64): A list of integers, along which to reduce. The default is to reduce over all the dimensions of the input tensor if 'keepdims' is true. - // OUTPUTS: - // - reduced (heterogeneous) - T: Reduced output tensor. - // ATTRIBUTES: - // - keepdims (int, default is 1): Keep the reduced dimension or not, default 1 means keep the reduced dimension. - // - noop_with_empty_axes (int, default is 0): Defines behavior if 'axes' is empty. Default behavior is to reduce all axes. - - // Get attributes - var keepdims: bool = true; - var noop_with_empty_axes: bool = false; - var axes_attr: ?[]i64 = null; - - for (node.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "keepdims")) { - if (attr.type == AttributeType.INT) keepdims = attr.i != 0; - } else if (std.mem.eql(u8, attr.name, "noop_with_empty_axes")) { - if (attr.type == AttributeType.INT) noop_with_empty_axes = attr.i != 0; - } else if (std.mem.eql(u8, attr.name, "axes")) { - if (attr.type == AttributeType.INTS) axes_attr = attr.ints; - } - } - - // Create input tensor string - var input_tensor_string: []u8 = undefined; - defer allocator.free(input_tensor_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - // Handle axes - either from attribute, input tensor, or as null - var axes_str: []const u8 = "null"; - var needs_free = false; - - // First check if axes is defined as an attribute - if (axes_attr != null) { - // Create a static array from the axes attribute - const axes_array_name = try std.fmt.allocPrint(allocator, "axes_{s}", .{try utils.getSanitizedName(node.outputs.items[0].name)}); - defer allocator.free(axes_array_name); - - try writer.print( - \\ - \\ // Define axes array from attribute - \\ const {s} = [_]i64{{ - , .{axes_array_name}); - - for (axes_attr.?, 0..) |axis, i| { - if (i > 0) try writer.writeAll(", "); - try writer.print("{d}", .{axis}); - } - - try writer.print( - \\}}; - \\ - , .{}); - - axes_str = try std.fmt.allocPrint(allocator, "&{s}", .{axes_array_name}); - needs_free = true; - } - // If not found in attributes, check if provided as an input tensor - else if (node.inputs.items.len > 1) { - // Get axes from second input - const axes_name = try utils.getSanitizedName(node.inputs.items[1].?.name); - - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - // For initializer tensors, we need to extract the data directly - axes_str = try std.fmt.allocPrint(allocator, "(@ptrCast([*]const i64, param_lib.tensor_{s}.data.ptr))[0..param_lib.tensor_{s}.size]", .{ axes_name, axes_name }); - } else { - // For regular tensors - axes_str = try std.fmt.allocPrint(allocator, "(@ptrCast([*]const i64, tensor_{s}.data.ptr))[0..tensor_{s}.size]", .{ axes_name, axes_name }); - } - needs_free = true; - } - defer if (needs_free) allocator.free(axes_str); - - _ = try writer.print( - \\ - \\ tensMath.reduce_mean_lean( - \\ T, // type - \\ {s}, // input tensor - \\ &tensor_{s}, // output tensor - \\ {s}, // axes - \\ {s}, // keepdims - \\ {s} // noop_with_empty_axes - \\ ) - , .{ - input_tensor_string, - try utils.getSanitizedName(node.outputs.items[0].name), - axes_str, - if (keepdims) "true" else "false", - if (noop_with_empty_axes) "true" else "false", - }); -} - -inline fn write_ReLU(writer: std.fs.File.Writer, node: *ReadyNode) !void { - //node.inputs.items[0].? -> input - //node.outputs.items[0] -> output - - //----create tensor_A_string - var tensor_A_string: []u8 = undefined; - defer allocator.free(tensor_A_string); - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - _ = try writer.print( - \\ - \\ - \\ tensMath.ReLU_lean(T, {s}, &tensor_{s}) - , .{ - tensor_A_string, - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} - -inline fn write_elu(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Elu.html - // INPUTS: - // - X (heterogeneous) - T: Input tensor - // OUTPUTS: - // - Y (heterogeneous) - T: Output tensor - // ATTRIBUTES: - // - alpha - FLOAT (default is '1.0'): Coefficient of ELU operator - - var input_tensor_string: []u8 = undefined; - defer allocator.free(input_tensor_string); - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(&tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } - - const tensor_type = try utils.getTypeString(globals.tensorHashMap.getPtr(node.inputs.items[0].?.name).?.tensorProto.?.data_type); - - // alpha attribute - var alpha: f32 = 1.0; - for (node.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "alpha")) { - if (attr.type != AttributeType.FLOAT) { - return error.InvalidAttributeType; - } - alpha = attr.f; - } - } - - _ = try writer.print( - \\ - \\ tensMath.elu_lean( - \\ {s}, // type - \\ {s}, // input - \\ &tensor_{s}, // output - \\ {d} // alpha - \\ ) - , .{ - tensor_type, - input_tensor_string, - try utils.getSanitizedName(node.outputs.items[0].name), - alpha, - }); -} - -inline fn write_flatten(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Flatten.html - // INPUTS: - // - data (heterogeneous) - T: Input tensor of any shape. - // OUTPUTS: - // - output (heterogeneous) - T: Output tensor with shape [outer_dim, inner_dim]. - // ATTRIBUTES: - // - axis - INT (default is '1'): Indicate up to which input dimension should be flattened. - - //----create tensor_input_string - var tensor_input_string: []u8 = undefined; - defer allocator.free(tensor_input_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_input_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_input_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(&tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } - - const tensor_type = try utils.getTypeString(globals.tensorHashMap.getPtr(node.inputs.items[0].?.name).?.tensorProto.?.data_type); - - _ = try writer.print( - \\ - \\ tensMath.flatten_lean( - \\ {s}, // type - \\ {s}, // input - \\ &tensor_{s}, // output - \\ ) - , .{ - tensor_type, - tensor_input_string, - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} - -inline fn write_squeeze(writer: std.fs.File.Writer, node: *ReadyNode) !void { - - // Squeeze - 23 : https://onnx.ai/onnx/operators/onnx__Squeeze.html - // Inputs: - // - data (heterogeneous) - T: Tensors with at least max(dims) dimensions - // - axes (optional, heterogeneous) - tensor(int64): List of integers indicating the dimensions to squeeze - // Negative value means counting dimensions from the back - // Accepted range is [-r, r-1] where r = rank(data) - // Outputs: - // - squeezed (heterogeneous) - T: Reshaped tensor with same data as input - - var tensor_input_string: []u8 = undefined; - defer allocator.free(tensor_input_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_input_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_input_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(&tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } - - var tensor_type: []const u8 = undefined; - const input_ready_tensor = globals.tensorHashMap.getPtr(node.inputs.items[0].?.name); - - if (input_ready_tensor) |rt| { - if (rt.tensorProto) |tp| { - tensor_type = try utils.getTypeString(tp.data_type); - } else { - // Fallback if tensorProto is null - mathHandler_log.warn("Warning: tensorProto is null for Squeeze input '{s}'. Falling back to f32 type.\n", .{node.inputs.items[0].?.name}); - tensor_type = "f32"; - } - } else { - // Fallback if ReadyTensor is not found in the map - mathHandler_log.warn("Warning: ReadyTensor not found for Squeeze input '{s}'. Falling back to f32 type.\n", .{node.inputs.items[0].?.name}); - tensor_type = "f32"; - } - - _ = try writer.print( - \\ - \\ tensMath.squeeze_lean( - \\ {s}, // type - \\ {s}, // input - \\ &tensor_{s}, // output - \\ ) - , .{ - tensor_type, - tensor_input_string, - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} - -inline fn write_reshape(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Reshape.html - // Inputs: - // - data (T): An input tensor. - // - shape (tensor(int64)): Specifies the output shape. - // Attributes: - // - allowzero (int, default 0): DEPRECATED. If true (non-zero), the output shape can contain 0. - // - shape (ints): Alternative way to provide shape (used if input 'shape' is not provided). - // REMOVED: const T = node.outputs.items[0].tensorProto.?.dataType; // T is not needed by reshape_lean - - // Find allowzero attribute (deprecated but might exist) - var allowzer0: bool = false; - var shape_attribute: ?[]const i64 = null; - - for (node.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "allowzero")) { - if (attr.type == AttributeType.INT) allowzer0 = attr.i != 0; - } else if (std.mem.eql(u8, attr.name, "shape")) { - if (attr.type == AttributeType.INTS) shape_attribute = attr.ints; - } - } - - // Input tensor string creation - const sanitized_input_name = try utils.getSanitizedName(node.inputs.items[0].?.name); - const input_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - if (globals.tensorHashMap.getPtr(node.inputs.items[0].?.name).?.tag == globals.TensorTag.INITIALIZER) "param_lib." else "", - "tensor_", - sanitized_input_name, - }); - defer allocator.free(input_string); - - // Shape slice generation logic - var shape_slice_code = std.ArrayList(u8).init(allocator); - defer shape_slice_code.deinit(); - const output_sanitized_name = try utils.getSanitizedName(node.outputs.items[0].name); - var shape_from_attr = false; // Track source of shape - - if (shape_attribute) |attr_shape| { - shape_from_attr = true; - // Shape from attribute - // Generate code like: const shape_slice_ = [_]isize{ val1, val2, ... }; - try shape_slice_code.writer().print("const shape_slice_{s} = [_]isize{{", .{output_sanitized_name}); - for (attr_shape, 0..) |val, i| { - try shape_slice_code.writer().print("{s}{}", .{ if (i > 0) ", " else "", val }); - } - try shape_slice_code.writer().print("}};", .{}); - } else { - // Shape from input tensor - if (node.inputs.items.len < 2) { - mathHandler_log.warn("ERROR: Reshape node '{s}' requires a 'shape' attribute or a second input tensor, but neither was found during code generation.", .{node.nodeProto.name orelse "-"}); - return error.ShapeNotFound; - } - const shape_input_tensor = node.inputs.items[1].?; - const sanitized_shape_name = try utils.getSanitizedName(shape_input_tensor.name); - const shape_tensor_name = try std.mem.concat(allocator, u8, &[_][]const u8{ - if (globals.tensorHashMap.getPtr(shape_input_tensor.name).?.tag == globals.TensorTag.INITIALIZER) "param_lib." else "", - "tensor_", - sanitized_shape_name, - }); - defer allocator.free(shape_tensor_name); - - // Generate code to convert tensor data to isize slice - try shape_slice_code.writer().print( - \\ // Convert shape tensor data to isize slice - \\ // Pass the local allocator to the utils function - \\ const shape_slice_{s} = utils.sliceToIsizeSlice(allocator, {s}.data); // Removed catch return - \\ defer allocator.free(shape_slice_{s}); // Free the runtime allocated slice - , .{ - output_sanitized_name, // Use output name for uniqueness - shape_tensor_name, - output_sanitized_name, - }); - } - - const input_ready_tensor = globals.tensorHashMap.getPtr(node.inputs.items[0].?.name) orelse return error.TensorNotFound; - const input_type_string = try utils.getTypeString(input_ready_tensor.dtype); - - // Pre-build complex arguments for the format string - const shape_slice_var_name = try std.fmt.allocPrint(allocator, "shape_slice_{s}", .{output_sanitized_name}); - defer allocator.free(shape_slice_var_name); - const shape_slice_arg = try std.fmt.allocPrint(allocator, "{s}{s}", .{ if (shape_from_attr) "&" else "", shape_slice_var_name }); - defer allocator.free(shape_slice_arg); - - const output_tensor_arg = try std.fmt.allocPrint(allocator, "&tensor_{s}", .{output_sanitized_name}); - defer allocator.free(output_tensor_arg); - - // Generate the final call using pre-built arguments - _ = try writer.print( - \\ - \\ - \\ // Reshape Operation for {s} - \\ {s} // Generated shape slice code - \\ - \\ tensMath.reshape_lean( - \\ {s}, // Use actual input tensor type - \\ @constCast(&{s}), - \\ {s}, // Pre-built shape slice argument - \\ {s}, // Format boolean correctly - \\ {s} // Pre-built output tensor argument - \\ ) - , .{ - node.nodeProto.name orelse "-", // Arg 1 for op name - shape_slice_code.items, // Arg 2 for shape code - input_type_string, // Arg 3 for input type - input_string, // Arg 4 for input tensor - shape_slice_arg, // Arg 5 for shape slice - if (allowzer0) "true" else "false", // Arg 6 for allowzero - output_tensor_arg, // Arg 7 for output tensor - }); -} - -inline fn write_sigmoid(writer: std.fs.File.Writer, node: *ReadyNode) !void { - //node.inputs.items[0].? -> input - //node.outputs.items[0] -> output - - //----create tensor_A_string - var tensor_A_string: []u8 = undefined; - defer allocator.free(tensor_A_string); - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - _ = try writer.print( - \\ - \\ - \\ tensMath.sigmoid_lean(T, {s}, &tensor_{s}) - , .{ - tensor_A_string, - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} - -const Converter = zant.utils.type_converter; - -/// Helper function to cast tensor data to i64 array -fn castTensorDataToI64Array(tensor_string: []const u8) ![]const u8 { - // Create a temporary array and initialize with the tensor data - return try std.fmt.allocPrint(allocator, - \\blk: {{ - \\ const data_slice = {s}; - \\ // Define the result array in one go, directly applying the conversion - \\ var temp_i64_arr = allocator.alloc(i64, data_slice.len) catch return; - \\ for (data_slice, 0..) |val, i| {{ - \\ temp_i64_arr[i] = if (@typeInfo(@TypeOf(val)) == .int) val else @intFromFloat(val); - \\ }} - \\ // The result array will be managed by the caller - \\ break :blk temp_i64_arr; - \\}} - , .{tensor_string}); -} - -inline fn write_slice(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Slice.html - // INPUTS: - // - input (heterogeneous) - T: Tensor of data to extract slices from. - // - starts (heterogeneous) - T1: 1-D tensor of starting indices of corresponding axis in `axes`. - // - ends (heterogeneous) - T1: 1-D tensor of ending indices (exclusive) of corresponding axis in `axes`. - // - axes (heterogeneous) - T1: 1-D tensor of axes that `starts` and `ends` apply to. - // - steps (heterogeneous) - T1: 1-D tensor of slice step of corresponding axis in `axes`. - // OUTPUTS: - // - output (heterogeneous) - T: Sliced data tensor. - - // First, get the sanitized names for all tensors - const input_name = try utils.getSanitizedName(node.inputs.items[0].?.name); - const starts_name = try utils.getSanitizedName(node.inputs.items[1].?.name); - const ends_name = try utils.getSanitizedName(node.inputs.items[2].?.name); - const output_name = try utils.getSanitizedName(node.outputs.items[0].name); - - // Create input tensor string - var input_tensor_string: []u8 = undefined; - defer allocator.free(input_tensor_string); - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(¶m_lib.tensor_", input_name, ")" }); - } else { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", input_name, ")" }); - } - - // Create starts tensor string - var starts_tensor_string: []u8 = undefined; - defer allocator.free(starts_tensor_string); - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - starts_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "param_lib.tensor_", starts_name, ".data" }); - } else { - starts_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "tensor_", starts_name, ".data" }); - } - - // Create cast code for starts - const starts_i64_code = try castTensorDataToI64Array(starts_tensor_string); - defer allocator.free(starts_i64_code); - - // Create ends tensor string - var ends_tensor_string: []u8 = undefined; - defer allocator.free(ends_tensor_string); - if (node.inputs.items[2].?.tag == globals.TensorTag.INITIALIZER) { - ends_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "param_lib.tensor_", ends_name, ".data" }); - } else { - ends_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "tensor_", ends_name, ".data" }); - } - - // Create cast code for ends - const ends_i64_code = try castTensorDataToI64Array(ends_tensor_string); - defer allocator.free(ends_i64_code); - - // Handle optional axes and steps inputs - var axes_str: []const u8 = "null"; - var axes_i64_code: []const u8 = "null"; - var steps_str: []const u8 = "null"; - var steps_i64_code: []const u8 = "null"; - - if (node.inputs.items.len > 3) { - const axes_name = try utils.getSanitizedName(node.inputs.items[3].?.name); - if (node.inputs.items[3].?.tag == globals.TensorTag.INITIALIZER) { - axes_str = try std.fmt.allocPrint(allocator, "param_lib.tensor_{s}.data", .{axes_name}); - } else { - axes_str = try std.fmt.allocPrint(allocator, "tensor_{s}.data", .{axes_name}); - } - axes_i64_code = try castTensorDataToI64Array(axes_str); - defer if (axes_str.len > 4) allocator.free(axes_str); - } - - if (node.inputs.items.len > 4) { - const steps_name = try utils.getSanitizedName(node.inputs.items[4].?.name); - if (node.inputs.items[4].?.tag == globals.TensorTag.INITIALIZER) { - steps_str = try std.fmt.allocPrint(allocator, "param_lib.tensor_{s}.data", .{steps_name}); - } else { - steps_str = try std.fmt.allocPrint(allocator, "tensor_{s}.data", .{steps_name}); - } - steps_i64_code = try castTensorDataToI64Array(steps_str); - defer if (steps_str.len > 4) allocator.free(steps_str); - } - - // Generate defer code for axes and steps - var axes_defer_code: []const u8 = ""; - var steps_defer_code: []const u8 = ""; - var axes_var_code: []const u8 = "null"; - var steps_var_code: []const u8 = "null"; - var axes_decl_code: []const u8 = ""; - var steps_decl_code: []const u8 = ""; - - if (axes_str.len > 4) { - axes_defer_code = try std.fmt.allocPrint(allocator, "defer allocator.free(axes_arr_{s});", .{output_name}); - axes_var_code = try std.fmt.allocPrint(allocator, "axes_arr_{s}", .{output_name}); - axes_decl_code = try std.fmt.allocPrint(allocator, "const axes_arr_{s} = {s};", .{ output_name, axes_i64_code }); - } - - if (steps_str.len > 4) { - steps_defer_code = try std.fmt.allocPrint(allocator, "defer allocator.free(steps_arr_{s});", .{output_name}); - steps_var_code = try std.fmt.allocPrint(allocator, "steps_arr_{s}", .{output_name}); - steps_decl_code = try std.fmt.allocPrint(allocator, "const steps_arr_{s} = {s};", .{ output_name, steps_i64_code }); - } - - defer { - if (axes_defer_code.len > 0) allocator.free(axes_defer_code); - if (steps_defer_code.len > 0) allocator.free(steps_defer_code); - if (axes_var_code.len > 4) allocator.free(axes_var_code); - if (steps_var_code.len > 4) allocator.free(steps_var_code); - if (axes_decl_code.len > 0) allocator.free(axes_decl_code); - if (steps_decl_code.len > 0) allocator.free(steps_decl_code); - } - - _ = try writer.print( - \\ - \\ - \\ // Allocate arrays for slice operation - \\ const starts_arr_{s} = {s}; - \\ const ends_arr_{s} = {s}; - \\ {s} - \\ {s} - \\ defer allocator.free(starts_arr_{s}); - \\ defer allocator.free(ends_arr_{s}); - \\ {s} - \\ {s} - \\ - \\ tensMath.slice_onnx_lean( - \\ T, //type - \\ {s}, //input tensor - \\ starts_arr_{s}, //starts (casted to i64) - \\ ends_arr_{s}, //ends (casted to i64) - \\ {s}, //axes (casted to i64 if not null) - \\ {s}, //steps (casted to i64 if not null) - \\ &tensor_{s}, //output tensor - \\ ) - , .{ - // Variable names with unique suffixes - output_name, - starts_i64_code, - output_name, - ends_i64_code, - // Only declare axes and steps if they're used - if (axes_decl_code.len > 0) axes_decl_code else "// no axes needed", - if (steps_decl_code.len > 0) steps_decl_code else "// no steps needed", - output_name, - output_name, - if (axes_defer_code.len > 0) axes_defer_code else "// no axes to free", - if (steps_defer_code.len > 0) steps_defer_code else "// no steps to free", - input_tensor_string, - output_name, - output_name, - axes_var_code, - steps_var_code, - output_name, - }); - - // Free any allocated memory for the i64 code strings - if (axes_i64_code.len > 4 and axes_str.len > 4) allocator.free(axes_i64_code); - if (steps_i64_code.len > 4 and steps_str.len > 4) allocator.free(steps_i64_code); -} - -inline fn write_softmax(writer: std.fs.File.Writer, node: *ReadyNode) !void { - //node.inputs.items[0].? -> input - //node.outputs.items[0] -> output - - //----create tensor_A_string - var tensor_A_string: []u8 = undefined; - defer allocator.free(tensor_A_string); - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - _ = try writer.print( - \\ - \\ - \\ tensMath.softmax_lean(T, {s}, &tensor_{s}) - , .{ - tensor_A_string, - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} - -inline fn write_sum(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Sum.html - // INPUTS: - // - list of tensors - // OUTPUTS: - // - sum (heterogeneous) - T: Output tensor. - - //Writing the tensor list with all the inputs - _ = try writer.print( - \\ - \\ - \\ const my_tensor_list = [_]*const Tensor(T){{ - , .{}); - - for (node.inputs.items, 0..) |tens, idx| { - if (idx > 0) { - _ = try writer.print(", ", .{}); - } - - var new_tensor_string: []u8 = undefined; - const sanitized_tensor_name = try utils.getSanitizedName(tens.?.name); - - new_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - if (globals.tensorHashMap.getPtr(tens.?.name).?.tag == globals.TensorTag.INITIALIZER) "param_lib." else "", - "tensor_", - sanitized_tensor_name, - }); - - _ = try writer.print( - \\{s} - , .{try utils.getSanitizedName(new_tensor_string)}); - } - - _ = try writer.print("}}", .{}); - - _ = try writer.print( - \\ - \\ tensMath.sum_tensor_list_lean(T, T, &my_tensor_list, &tensor_{s}) - , .{try utils.getSanitizedName(node.outputs.items[0].name)}); -} - -inline fn write_shape(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Shape.html - // INPUTS: - // - data (heterogeneous) - T: An input tensor. - // OUTPUTS: - // - shape (heterogeneous) - T1: Shape of the input tensor - // ATTRIBUTES: - // - start - INT: First dimension to take - // - end - INT: Last dimension to take - - var start: ?i64 = null; - var end: ?i64 = null; - - for (node.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "start")) { - if (attr.type == AttributeType.INT) start = attr.i; - } else if (std.mem.eql(u8, attr.name, "end")) { - if (attr.type == AttributeType.INT) end = attr.i; - } - } - - //----create tensor_A_string - var tensor_A_string: []u8 = undefined; - defer allocator.free(tensor_A_string); - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "¶m_lib.tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } else { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - _ = try writer.print( - \\ - \\ tensMath.shape_onnx_lean( - \\ T, - \\ T, //type - \\ @constCast({s}), //input tensor - \\ {s}, //start - \\ {s}, //end - \\ &tensor_{s}, //output tensor, - \\ ) - , .{ - tensor_A_string, - if (start) |s| try std.fmt.allocPrint(allocator, "{}", .{s}) else "null", - if (end) |e| try std.fmt.allocPrint(allocator, "{}", .{e}) else "null", - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} - -inline fn write_unsqueeze(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Unsqueeze.html - // INPUTS: - // - data (heterogeneous) - T: Original tensor - // - axes (optional) - tensor(int64): List of integers indicating the dimensions to be inserted. - // Negative value means counting dimensions from the back. - // OUTPUTS: - // - expanded (heterogeneous) - T: Reshaped tensor with same data as input. - // ATTRIBUTES (deprecated in opset 13): - // - axes - INTS: List of integers indicating the dimensions to be inserted. - - if (node.inputs.items[0]) |input_tensor| { - const input_name = try utils.getSanitizedName(input_tensor.name); - const output_name = try utils.getSanitizedName(node.outputs.items[0].name); - - // Create input tensor string - var input_tensor_string: []u8 = undefined; - defer allocator.free(input_tensor_string); - if (input_tensor.tag == globals.TensorTag.INITIALIZER) { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(¶m_lib.tensor_", input_name, ")" }); - } else { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", input_name, ")" }); - } - - // Determine if axes is provided as an input tensor or as an attribute - var axes_str: []const u8 = "null"; - var needs_free = false; - - if (node.inputs.items.len > 1) { - // Axes is provided as an input tensor (opset 13+) - const axes_tensor_name = try utils.getSanitizedName(node.inputs.items[1].?.name); - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - axes_str = try std.fmt.allocPrint(allocator, "@constCast(¶m_lib.tensor_{s})", .{axes_tensor_name}); - } else { - axes_str = try std.fmt.allocPrint(allocator, "&tensor_{s}", .{axes_tensor_name}); - } - needs_free = true; - } else { - // Axes is provided as an attribute (opset < 13) - for (node.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "axes")) { - if (attr.type == AttributeType.INTS) { - axes_str = try utils.i64ToI64ArrayString(attr.ints); - needs_free = true; - break; - } - } - } - } - - defer if (needs_free) allocator.free(axes_str); - - // Generate code for the unsqueeze operation - try writer.print( - \\ - \\ tensMath.unsqueeze_lean( - \\ T, //type - \\ {s}, //input tensor - \\ {s}, //axes tensor - \\ &tensor_{s}, //output tensor - \\ ) - , .{ - input_tensor_string, //input tensor - axes_str, //axes tensor - output_name, //output tensor - }); - } else { - return error.InvalidInput; - } -} - -inline fn write_transpose(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Transpose.html - // INPUTS: - // - data (heterogeneous) - T: An input tensor. - // OUTPUTS: - // - transposed (heterogeneous) - T: Transposed output. - // ATTRIBUTES: - // - perm - INTS: A list of integers. By default, reverse the dimensions, - // otherwise permute the axes according to the values given. - - // Get the perm attribute if it exists - var perm_str: []const u8 = "null"; - for (node.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "perm")) { - if (attr.type == AttributeType.INTS) { - perm_str = try utils.i64SliceToUsizeArrayString(attr.ints); - } - } - } - - //----create tensor_A_string - var tensor_A_string: []u8 = undefined; - defer allocator.free(tensor_A_string); - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "¶m_lib.tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } else { - tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - _ = try writer.print( - \\ - \\ - \\ tensMath.transpose_onnx_lean( - \\ T, //type - \\ @constCast({s}), //input tensor - \\ {s}, //perm - \\ &tensor_{s}, //output tensor - \\ allocator, // pass the local allocator instance - \\ ) - , .{ - tensor_A_string, // Input tensor - perm_str, // Permutation array - try utils.getSanitizedName(node.outputs.items[0].name), // Output tensor - }); -} - -inline fn write_floor(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Floor.html - // INPUTS: - // - X (heterogeneous) - T: Input tensor - // OUTPUTS: - // - Y (heterogeneous) - T: Output tensor with floor of input elements (If x is integral, +0, -0, NaN, or infinite, x itself is returned) - - // Create input tensor string - var input_tensor_string: []u8 = undefined; - defer allocator.free(input_tensor_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - _ = try writer.print( - \\ - \\ - \\ tensMath.floor_lean(T, {s}, &tensor_{s}) - , .{ - input_tensor_string, - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} - -inline fn write_gelu(writer: std.fs.File.Writer, node: *ReadyNode) !void { - var approximate: []const u8 = "none"; - for (node.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "approximate")) { - if (attr.type == AttributeType.STRING) approximate = attr.s; - } - } - - var input_tensor_string: []u8 = undefined; - defer allocator.free(input_tensor_string); - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - _ = try writer.print( - \\ - \\ tensMath.gelu_lean(T, {s}, "{s}", &tensor_{s}) - , .{ - input_tensor_string, - approximate, - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} - -inline fn write_tanh(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Tanh.html - // INPUTS: - // - X (heterogeneous) - T: Input tensor - // OUTPUTS: - // - Y (heterogeneous) - T: Output tensor with hyperbolic tangent of input elements - - // Create input tensor string - var input_tensor_string: []u8 = undefined; - defer allocator.free(input_tensor_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - _ = try writer.print( - \\ - \\ - \\ tensMath.tanh_lean(T, {s}, &tensor_{s}) - , .{ - input_tensor_string, - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} - -inline fn write_sqrt(writer: std.fs.File.Writer, node: *ReadyNode) !void { - var input_tensor_string: []u8 = undefined; - defer allocator.free(input_tensor_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - _ = try writer.print( - \\ - \\ - \\ tensMath.sqrt_lean(T, {s}, &tensor_{s}) - , .{ - input_tensor_string, - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} - -inline fn write_ceil(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Ceil.html - // INPUTS: - // - X (heterogeneous) - T: Input tensor - // OUTPUTS: - // - Y (heterogeneous) - T: Output tensor with ceiling of input elements - - // Create input tensor string - var input_tensor_string: []u8 = undefined; - defer allocator.free(input_tensor_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - _ = try writer.print( - \\ - \\ - \\ tensMath.ceil_lean(T, {s}, &tensor_{s}) - , .{ - input_tensor_string, - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} - -inline fn write_identity(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Identity.html - // INPUTS: - // - input (heterogeneous) - T: Input tensor - // OUTPUTS: - // - output (heterogeneous) - T: Tensor with same shape and contents as input - - // Create input tensor string - var input_tensor_string: []u8 = undefined; - defer allocator.free(input_tensor_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - _ = try writer.print( - \\ - \\ - \\ tensMath.identity_lean(T, {s}, &tensor_{s}) - , .{ - input_tensor_string, - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} - -inline fn write_leaky_relu(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__LeakyRelu.html - // INPUTS: - // - X (heterogeneous) - T: Input tensor - // OUTPUTS: - // - Y (heterogeneous) - T: Output tensor - // ATTRIBUTES: - // - alpha (float, default is 0.01): Coefficient of leakage - - // Get alpha attribute, default to 0.01 if not specified - var alpha: f32 = 0.01; - for (node.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "alpha")) { - if (attr.type == AttributeType.FLOAT) alpha = attr.f; - } - } - - // Create input tensor string - var input_tensor_string: []u8 = undefined; - defer allocator.free(input_tensor_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - _ = try writer.print( - \\ - \\ tensMath.leakyReLU_lean(T, {s}, {d}, &tensor_{s}) - , .{ - input_tensor_string, - alpha, - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} - -inline fn write_split(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Split.html - // INPUTS: - // - input (heterogeneous) - T: The tensor to split - // - split (optional, heterogeneous) - tensor(int64): Optional tensor specifying the size of each split - // OUTPUTS: - // - outputs (variadic, heterogeneous) - T: One or more outputs forming splits of the input - // ATTRIBUTES: - // - axis (int, default is 0): Which axis to split on - // - split (list of ints, deprecated): Length of each output. This attribute is deprecated in favor of the 'split' input - - // Get axis attribute (default is 0) - var axis: i64 = 0; - var split_sizes_attr: ?[]i64 = null; - - for (node.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "axis")) { - if (attr.type == AttributeType.INT) axis = attr.i; - } else if (std.mem.eql(u8, attr.name, "split")) { - if (attr.type == AttributeType.INTS) split_sizes_attr = attr.ints; - } - } - - // Create input tensor string - var input_tensor_string: []u8 = undefined; - defer allocator.free(input_tensor_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - // Handle split sizes - either from input tensor or attribute - var split_sizes_str: []const u8 = "null"; - var needs_free = false; - - if (node.inputs.items.len > 1 and node.inputs.items[1].?.tensorProto != null) { - // Split sizes from input tensor (opset 13+) - const output_name = try utils.getSanitizedName(node.outputs.items[0].name); - - // Extract split sizes from the input tensor - try writer.print( - \\ - \\ // Extract split sizes from the input tensor - , .{}); - - // For initializers, access directly from the parameter library - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - try writer.print( - \\ - \\ const split_sizes_tensor_{0s} = param_lib.tensor_{1s}; - \\ var split_sizes_{0s} = allocator.alloc(usize, split_sizes_tensor_{0s}.size) catch @panic("Out of memory"); - \\ defer allocator.free(split_sizes_{0s}); - \\ - \\ // Convert int64 data to usize - \\ for (split_sizes_tensor_{0s}.data, 0..) |val, i| {{ - \\ split_sizes_{0s}[i] = @as(usize, @intFromFloat(val)); - \\ }} - , .{ output_name, try utils.getSanitizedName(node.inputs.items[1].?.name) }); - } else { - try writer.print( - \\ - \\ const split_sizes_tensor_{0s} = tensor_{1s}; - \\ var split_sizes_{0s} = allocator.alloc(usize, split_sizes_tensor_{0s}.size) catch @panic("Out of memory"); - \\ defer allocator.free(split_sizes_{0s}); - \\ - \\ // Convert int64 data to usize - \\ for (split_sizes_tensor_{0s}.data, 0..) |val, i| {{ - \\ split_sizes_{0s}[i] = @as(usize, @intFromFloat(val)); - \\ }} - , .{ output_name, try utils.getSanitizedName(node.inputs.items[1].?.name) }); - } - - split_sizes_str = try std.fmt.allocPrint(allocator, "split_sizes_{s}", .{output_name}); - needs_free = true; - } else if (split_sizes_attr != null) { - // Split sizes from attribute (deprecated but still supported) - const split_array_name = try std.fmt.allocPrint(allocator, "split_sizes_{s}", .{try utils.getSanitizedName(node.outputs.items[0].name)}); - defer allocator.free(split_array_name); - - try writer.print( - \\ - \\ // Define split sizes array - \\ const {s} = [_]i64{{ - , .{split_array_name}); - - for (split_sizes_attr.?, 0..) |size, i| { - if (i > 0) try writer.writeAll(", "); - try writer.print("{d}", .{size}); - } - - try writer.print( - \\}}; - \\ - , .{}); - - split_sizes_str = try std.fmt.allocPrint(allocator, "&{s}", .{split_array_name}); - needs_free = true; - } - defer if (needs_free) allocator.free(split_sizes_str); - - // Create a different approach that works with the expected types - try writer.print( - \\ - \\ // Create array for output tensor pointers to store final results - \\ var output_ptrs_{s} = [_]*Tensor(T){{ - , .{try utils.getSanitizedName(node.outputs.items[0].name)}); - - for (node.outputs.items, 0..) |output, i| { - if (i > 0) try writer.writeAll(", "); - try writer.print("&tensor_{s}", .{try utils.getSanitizedName(output.name)}); - } - - try writer.print( - \\}}; - \\ - \\ // Create temporary tensors that split_lean can operate on - \\ var temp_tensors_{0s} = allocator.alloc(Tensor(T), {1d}) catch @panic("Out of memory"); - \\ defer {{ - \\ for (temp_tensors_{0s}) |*t| t.deinit(); - \\ allocator.free(temp_tensors_{0s}); - \\ }} - \\ - \\ // Initialize the temporary tensors - \\ for (temp_tensors_{0s}) |*t| {{ - \\ t.* = Tensor(T).init(&allocator) catch @panic("Failed to initialize tensor"); - \\ }} - , .{ try utils.getSanitizedName(node.outputs.items[0].name), node.outputs.items.len }); - - // Convert split sizes to usize if provided - if (!std.mem.eql(u8, split_sizes_str, "null")) { - try writer.print( - \\ - \\ // Call split_lean with the extracted split sizes - \\ tensMath.split_lean(T, {2s}, {3d}, {1s}, &temp_tensors_{0s}) catch unreachable; - , .{ try utils.getSanitizedName(node.outputs.items[0].name), split_sizes_str, input_tensor_string, axis }); - } else { - // Get the proper axis value string - const axis_str = if (axis < 0) - try std.fmt.allocPrint(allocator, "@intCast((@as(i64, @intCast({s}.shape.len)) + {d}) %% @as(i64, @intCast({s}.shape.len)))", .{ input_tensor_string, axis, input_tensor_string }) - else - try std.fmt.allocPrint(allocator, "{d}", .{axis}); - defer allocator.free(axis_str); - - try writer.print( - \\ - \\ // Create default split size array for evenly dividing the tensor - \\ const dim_size = {0s}.shape[{1s}]; - \\ const num_splits = {2d}; - \\ if (dim_size % num_splits != 0) @panic("Cannot evenly split dimension"); - \\ const split_size = dim_size / num_splits; - \\ - \\ const default_split_sizes_{4s} = allocator.alloc(usize, num_splits) catch @panic("Out of memory"); - \\ defer allocator.free(default_split_sizes_{4s}); - \\ for (default_split_sizes_{4s}) |*split_size_item| {{ - \\ split_size_item.* = split_size; - \\ }} - \\ - \\ // Call split_lean with default split sizes - \\ tensMath.split_lean(T, {0s}, {3d}, default_split_sizes_{4s}, &temp_tensors_{4s}) catch unreachable; - , .{ input_tensor_string, axis_str, node.outputs.items.len, axis, try utils.getSanitizedName(node.outputs.items[0].name) }); - } - - // Now copy the data from temp_tensors to the output tensors - try writer.print( - \\ - \\ // Copy data to existing output tensor arrays - \\ for (temp_tensors_{0s}, 0..) |*src, i| {{ - \\ // Copy data directly to the existing array - \\ const size_to_copy = @min(src.size, output_ptrs_{0s}[i].size); - \\ if (size_to_copy > 0) {{ - \\ @memcpy(output_ptrs_{0s}[i].data[0..size_to_copy], src.data[0..size_to_copy]); - \\ }} - \\ - \\ // Shape is pre-allocated statically, just update if needed - \\ const shape_size_to_copy = @min(src.shape.len, output_ptrs_{0s}[i].shape.len); - \\ if (shape_size_to_copy > 0) {{ - \\ @memcpy(output_ptrs_{0s}[i].shape[0..shape_size_to_copy], src.shape[0..shape_size_to_copy]); - \\ }} - \\ - \\ // Update the size - \\ output_ptrs_{0s}[i].size = src.size; - \\ }} - , .{try utils.getSanitizedName(node.outputs.items[0].name)}); - - // End with a function that returns an error union - try writer.writeAll( - \\ - \\ // Final dummy operation that returns an error union - \\ _ = @import("std").fmt.bufPrint(&[_]u8{}, "", .{}) - ); -} - -inline fn write_resize(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Resize.html - // INPUTS: - // - X (heterogeneous) - T: Input tensor - // - roi (optional) - T2: ROI (region of interest) tensor - // - scales (optional, heterogeneous) - tensor(float): The scale array along each dimension - // - sizes (optional, heterogeneous) - tensor(int64): Target size of the output tensor - // OUTPUTS: - // - Y (heterogeneous) - T: Resized output tensor - // ATTRIBUTES: - // - antialias - INT (default is '0') - // - axes - INTS - // - coordinate_transformation_mode - STRING (default is 'half_pixel') - // - cubic_coeff_a - FLOAT (default is '-0.75') - // - exclude_outside - INT (default is '0') - // - extrapolation_value - FLOAT (default is '0.0') - // - keep_aspect_ratio_policy - STRING (default is 'stretch') - // - mode - STRING (default is 'nearest') - // - nearest_mode - STRING (default is 'round_prefer_floor') - // - - //----create tensor_X_string - var tensor_X_string: []u8 = undefined; - defer allocator.free(tensor_X_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_X_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_X_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - // ---- optional inputs - var tensor_roi_string: []const u8 = try allocator.dupe(u8, "null"); - defer { - if (node.inputs.items.len >= 2 and node.inputs.items[1] != null) { - allocator.free(tensor_roi_string); - } - } - var data_scales_string: []const u8 = try allocator.dupe(u8, "null"); - defer { - if (node.inputs.items.len >= 3 and node.inputs.items[2] != null) { - allocator.free(data_scales_string); - } - } - var data_sizes_string: []const u8 = try allocator.dupe(u8, "null"); - defer { - if (node.inputs.items.len >= 4 and node.inputs.items[3] != null) { - allocator.free(data_sizes_string); - } - } - - if (node.inputs.items.len >= 2 and node.inputs.items[1] != null) { //----create tensor_roi_string - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - tensor_roi_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[1].?.name), - ")", - }); - } else { - tensor_roi_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[1].?.name) }); - } - } - - if (node.inputs.items.len >= 3 and node.inputs.items[2] != null) { //----create tensor_scales_string - if (node.inputs.items[2].?.tag == globals.TensorTag.INITIALIZER) { - data_scales_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "param_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[2].?.name), - ".data", - }); - } else { - data_scales_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "tensor_", try utils.getSanitizedName(node.inputs.items[2].?.name), ".data" }); - } - } - - if (node.inputs.items.len >= 4 and node.inputs.items[3] != null) { //----create tensor_sizes_string - if (node.inputs.items[3].?.tag == globals.TensorTag.INITIALIZER) { - data_sizes_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "param_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[3].?.name), - ".data", - }); - } else { - data_sizes_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "tensor_", try utils.getSanitizedName(node.inputs.items[3].?.name), ".data" }); - } - } - - // ---- gasthering ATTRIBUTES from protoNode - var antialias: i64 = 0; - var axes: []i64 = &[_]i64{}; - defer allocator.free(axes); - var coordinate_transformation_mode: []const u8 = try allocator.dupe(u8, "half_pixel"); - - var cubic_coeff_a: f64 = -0.75; - var exclude_outside: i64 = 0; - var extrapolation_value: f64 = 0.0; - var keep_aspect_ratio_policy: []const u8 = try allocator.dupe(u8, "stretch"); - defer allocator.free(keep_aspect_ratio_policy); - var mode: []const u8 = try allocator.dupe(u8, "nearest"); - - var nearest_mode: []const u8 = try allocator.dupe(u8, "round_prefer_floor"); - defer allocator.free(nearest_mode); - - for (node.nodeProto.attribute) |attr| { - if (std.mem.indexOf(u8, attr.name, "antialias")) |_| { - if (attr.type == AttributeType.INT) antialias = attr.i else return error.ResizeAnitialiasNotINT; - } else if (std.mem.indexOf(u8, attr.name, "axes")) |_| { - if (attr.type == AttributeType.INTS) axes = attr.ints else return error.ResizeAxesNotINTS; - } else if (std.mem.indexOf(u8, attr.name, "coordinate_transformation_mode")) |_| { - if (attr.type == AttributeType.STRING) coordinate_transformation_mode = attr.s else return error.Resize_coordinate_transformation_mode_NotSTRING; - } else if (std.mem.indexOf(u8, attr.name, "cubic_coeff_a")) |_| { - if (attr.type == AttributeType.FLOAT) cubic_coeff_a = attr.f else return error.Resize_cubic_coeff_a_NotFLOAT; - } else if (std.mem.indexOf(u8, attr.name, "exclude_outside")) |_| { - if (attr.type == AttributeType.INT) exclude_outside = attr.i else return error.Resize_exclude_outside_NotINT; - } else if (std.mem.indexOf(u8, attr.name, "extrapolation_value")) |_| { - if (attr.type == AttributeType.FLOAT) extrapolation_value = attr.f else return error.Resize_extrapolation_value_NotFLOAT; - } else if (std.mem.indexOf(u8, attr.name, "keep_aspect_ratio_policy")) |_| { - if (attr.type == AttributeType.STRING) keep_aspect_ratio_policy = attr.s else return error.Resize_keep_aspect_ratio_policy_NotSTRING; - } else if (std.mem.indexOf(u8, attr.name, "mode")) |_| { - if (attr.type == AttributeType.STRING) mode = attr.s else return error.Resize_mode_NotSTRING; - } else if (std.mem.indexOf(u8, attr.name, "nearest_mode")) |_| { - if (attr.type == AttributeType.STRING) nearest_mode = attr.s else return error.Resize_nearest_mode_NotSTRING; - } - } - - // ---- CREATING ATTRIBUTES strings - const axes_string = try utils.i64SliceToUsizeArrayString(axes); - _ = axes_string; - - //pub fn rezise_lean(comptime T: type, t: *Tensor(T), comptime mode: []const u8, scales: ?[]const f32, sizes: ?[]const usize, coordinate_transformation_mode: []const u8, output_tensor: *Tensor(T)) !void { - _ = try writer.print( - \\ - \\ tensMath.resize_lean( - \\ T, - \\ {s}, //*Tensor(T) - \\ "{s}", //mode - \\ {s}, //scales: ?[]const f32 - \\ {s}, //sizes: ?[]const usize - \\ "{s}", //coordinate_transformation_mode: []const u8 - \\ &tensor_{s}, //output_tensor: *Tensor(T) - \\ ) - , - .{ - tensor_X_string, // input - mode, - data_scales_string, - data_sizes_string, - coordinate_transformation_mode, - try utils.getSanitizedName(node.outputs.items[0].name), //output - }, - ); -} - -inline fn write_neg(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Neg.html - // INPUTS: - // - X (heterogeneous) - T: Input tensor - // OUTPUTS: - // - Y (heterogeneous) - T: Output tensor with flipped elements - - // Create input tensor string - var input_tensor_string: []u8 = undefined; - defer allocator.free(input_tensor_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name) }); - } - - _ = try writer.print( - \\ - \\ - \\ tensMath.neg_lean(T, {s}, &tensor_{s}) - , .{ - input_tensor_string, - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} - -inline fn write_mean(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Mean.html - // INPUTS: - // - Variadic input tensors (data_0, data_1, ...). All inputs must have the same data type. - // OUTPUTS: - // - Output tensor with shape determined by broadcasting the input shapes. - // ATTRIBUTES: - // - None - - if (node.inputs.items.len == 0) { - return error.EmptyInputList; - } - - // Costruisci l'array degli input - var input_strings = std.ArrayList([]u8).init(allocator); - defer { - for (input_strings.items) |str| allocator.free(str); - input_strings.deinit(); - } - - for (node.inputs.items) |input| { - var input_str: []u8 = undefined; - if (input.?.tag == globals.TensorTag.INITIALIZER) { - input_str = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(input.?.name), - ")", - }); - } else { - input_str = try std.mem.concat(allocator, u8, &[_][]const u8{ - "&tensor_", - try utils.getSanitizedName(input.?.name), - }); - } - try input_strings.append(input_str); - } - - // Costruisci la stringa dell'array degli input - var inputs_array_str = std.ArrayList(u8).init(allocator); - defer inputs_array_str.deinit(); - try inputs_array_str.writer().writeAll("[_]*Tensor(f32){ "); - for (input_strings.items, 0..) |input_str, i| { - if (i > 0) try inputs_array_str.writer().writeAll(", "); - try inputs_array_str.writer().writeAll(input_str); - } - try inputs_array_str.writer().writeAll(" }"); - - // Scrivi la chiamata a tensMath.mean_lean - const output_name = try utils.getSanitizedName(node.outputs.items[0].name); - _ = try writer.print( - \\ - \\ - \\ var inputs_{s} = {s}; - \\ tensMath.mean_lean(f32, &inputs_{s}, &tensor_{s}) - , .{ - output_name, // Nome della variabile temporanea degli input - inputs_array_str.items, // Array dei puntatori ai tensori di input - output_name, // Nome del tensore di output - output_name, // Nome della variabile temporanea per il riferimento all'array - }); -} - -inline fn write_pads(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Pad.html - // INPUTS: - // - data (T): Input tensor. - // - pads (tensor(int64)): Tensor of integers indicating the number of padding elements. - // Shape [2 * num_axes], format [x1_begin, x2_begin, ..., x1_end, x2_end,...] - // - constant_value (optional, T): Scalar constant value to use for constant mode. Defaults to 0. - // - axes (optional, tensor(int64)): Axes to pad. If not provided, all axes are padded. - // OUTPUTS: - // - output (T): Tensor after padding. - // ATTRIBUTES: - // - mode (STRING, default is 'constant'): Supported modes: constant, reflect, edge, wrap. - - // Get mode attribute - var mode_str: []const u8 = "constant"; // Default - for (node.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "mode")) { - if (attr.type == AttributeType.STRING) mode_str = attr.s; - break; - } - } - // Convert mode string to PadMode enum - var pad_mode_enum: []const u8 = undefined; - if (std.ascii.eqlIgnoreCase(mode_str, "constant")) { - pad_mode_enum = "tensMath.PadMode.constant"; - } else if (std.ascii.eqlIgnoreCase(mode_str, "reflect")) { - pad_mode_enum = "tensMath.PadMode.reflect"; - } else if (std.ascii.eqlIgnoreCase(mode_str, "edge")) { - pad_mode_enum = "tensMath.PadMode.edge"; - } else if (std.ascii.eqlIgnoreCase(mode_str, "wrap")) { - pad_mode_enum = "tensMath.PadMode.wrap"; - } else { - return error.UnsupportedMode; - } - - // --- Get Input Strings --- - - // Input 0: data - const data_name = try utils.getSanitizedName(node.inputs.items[0].?.name); - const data_tensor_string = try std.fmt.allocPrint(allocator, "{s}tensor_{s}{s}", .{ if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) "@constCast(¶m_lib." else "&", data_name, if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) ")" else "" }); - defer allocator.free(data_tensor_string); - - // Input 1: pads (must be int64 constant) - if (node.inputs.items.len < 2 or node.inputs.items[1] == null or node.inputs.items[1].?.tag != globals.TensorTag.INITIALIZER) { - return error.PadsInputInvalid; - } - const pads_name = try utils.getSanitizedName(node.inputs.items[1].?.name); - const pads_data_string = try std.fmt.allocPrint(allocator, "param_lib.tensor_{s}.data", .{pads_name}); - defer allocator.free(pads_data_string); - - // Input 2: constant_value (optional) - var constant_value_str: []const u8 = "null"; - var constant_value_alloc: ?[]u8 = null; - if (node.inputs.items.len > 2 and node.inputs.items[2] != null) { - const const_val_name = try utils.getSanitizedName(node.inputs.items[2].?.name); - // Constant value should be a scalar, access data[0] - if (node.inputs.items[2].?.tag == globals.TensorTag.INITIALIZER) { - constant_value_alloc = try std.fmt.allocPrint(allocator, "param_lib.tensor_{s}.data[0]", .{const_val_name}); - } else { - constant_value_alloc = try std.fmt.allocPrint(allocator, "tensor_{s}.data[0]", .{const_val_name}); - } - constant_value_str = constant_value_alloc.?; - } - defer if (constant_value_alloc != null) allocator.free(constant_value_alloc.?); - - // Input 3: axes (optional, int64 or int32) - var axes_data_str: []const u8 = "null"; - var axes_alloc: ?[]u8 = null; - var axes_code_to_generate: ?[]u8 = null; // Holds the "utils.sliceToIsizeSlice(...)" string - var axes_var_name_arg: []const u8 = "null"; // Holds the argument for pads_lean ("null" or "axes_isize_...") - var axes_var_name_allocated = false; - var axes_code_allocated = false; - - if (node.inputs.items.len > 3 and node.inputs.items[3] != null) { - const axes_name = try utils.getSanitizedName(node.inputs.items[3].?.name); - const output_name_tmp = try utils.getSanitizedName(node.outputs.items[0].name); // Needed for var name - - if (node.inputs.items[3].?.tag == globals.TensorTag.INITIALIZER) { - axes_alloc = try std.fmt.allocPrint(allocator, "param_lib.tensor_{s}.data", .{axes_name}); - } else { - axes_alloc = try std.fmt.allocPrint(allocator, "tensor_{s}.data", .{axes_name}); - } - axes_data_str = axes_alloc.?; - // Generate code to convert axes data to isize slice - axes_code_to_generate = try std.fmt.allocPrint(allocator, "utils.sliceToIsizeSlice({s})", .{axes_data_str}); - axes_code_allocated = true; - axes_var_name_arg = try std.fmt.allocPrint(allocator, "axes_isize_{s}", .{output_name_tmp}); // Generate the variable name to pass - axes_var_name_allocated = true; - } - defer if (axes_alloc != null) allocator.free(axes_alloc.?); - defer if (axes_code_allocated) allocator.free(axes_code_to_generate.?); - defer if (axes_var_name_allocated) allocator.free(axes_var_name_arg); - - // Output tensor - const output_name = try utils.getSanitizedName(node.outputs.items[0].name); - - // Conditionally create the isize slice for axes if needed - if (axes_code_to_generate != null) { - _ = try writer.print( - \\ const {s} = {s}; - \\ defer allocator.free({s}); - , .{ axes_var_name_arg, axes_code_to_generate.?, axes_var_name_arg }); - } - - _ = try writer.print( - \\ - \\ tensMath.pads_lean( - \\ T, // type - \\ {s}, // data - \\ {s}, // pads (int64 slice) - \\ {s}, // mode - \\ {s}, // constant_value - \\ {s}, // axes (isize slice) - \\ &tensor_{s} // output - \\ ) - , .{ - data_tensor_string, - pads_data_string, - pad_mode_enum, - constant_value_str, - axes_var_name_arg, // Use the correct variable name or "null" - output_name, - }); -} - -inline fn write_clip(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Clip.html - // INPUTS: - // - input (heterogeneous) - T: Input tensor whose elements to be clipped. - // - min (optional, heterogeneous) - T: Minimum value, must be a scalar. - // - max (optional, heterogeneous) - T: Maximum value, must be a scalar. - // OUTPUTS: - // - output (heterogeneous) - T: Output tensor with clipped values. - - // Get sanitized names - const input_name = try utils.getSanitizedName(node.inputs.items[0].?.name); - const output_name = try utils.getSanitizedName(node.outputs.items[0].name); - - // Create input tensor string - var input_tensor_string: []u8 = undefined; - defer allocator.free(input_tensor_string); - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(¶m_lib.tensor_", input_name, ")" }); - } else { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "&tensor_", input_name }); - } - - // Create optional min tensor string - var min_tensor_string: []const u8 = "null"; - var min_alloc: ?[]u8 = null; - if (node.inputs.items.len > 1 and node.inputs.items[1] != null) { - const min_name = try utils.getSanitizedName(node.inputs.items[1].?.name); - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - min_alloc = try std.fmt.allocPrint(allocator, "@constCast(¶m_lib.tensor_{s})", .{min_name}); - } else { - min_alloc = try std.fmt.allocPrint(allocator, "&tensor_{s}", .{min_name}); - } - min_tensor_string = min_alloc.?; - } - defer if (min_alloc != null) allocator.free(min_alloc.?); - - // Create optional max tensor string - var max_tensor_string: []const u8 = "null"; - var max_alloc: ?[]u8 = null; - if (node.inputs.items.len > 2 and node.inputs.items[2] != null) { - const max_name = try utils.getSanitizedName(node.inputs.items[2].?.name); - if (node.inputs.items[2].?.tag == globals.TensorTag.INITIALIZER) { - max_alloc = try std.fmt.allocPrint(allocator, "@constCast(¶m_lib.tensor_{s})", .{max_name}); - } else { - max_alloc = try std.fmt.allocPrint(allocator, "&tensor_{s}", .{max_name}); - } - max_tensor_string = max_alloc.?; - } - defer if (max_alloc != null) allocator.free(max_alloc.?); - - // Write the lean_clip function call - _ = try writer.print( - \\ - \\ - \\ tensMath.clip_lean( - \\ T, // type - \\ {s}, // input tensor - \\ {s}, // min tensor (optional) - \\ {s}, // max tensor (optional) - \\ &tensor_{s} // output tensor - \\ ) - , .{ - input_tensor_string, - min_tensor_string, - max_tensor_string, - output_name, - }); -} - -inline fn write_dynamicQuantizeLinear(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx_aionnx_preview_training__DynamicQuantizeLinear.html - // INPUTS: - // - x (heterogeneous) - T1: Input tensor - // OUTPUTS: - // - y (heterogeneous) - T2: Quantized output tensor - // - y_scale (heterogeneous) - tensor(float): Output scale. It's a scalar. - // - y_zero_point (heterogeneous) - T2: Output zero point. It's a scalar. - - // Ensure correct number of inputs and outputs - if (node.inputs.items.len != 1) return error.InvalidInputCount; // Expects 1 input - if (node.outputs.items.len != 3) return error.InvalidOutputCount; // Expects 3 outputs - - // Get sanitized names - const input_x_name = try utils.getSanitizedName(node.inputs.items[0].?.name); - const output_y_name = try utils.getSanitizedName(node.outputs.items[0].name); - const output_scale_name = try utils.getSanitizedName(node.outputs.items[1].name); - const output_zp_name = try utils.getSanitizedName(node.outputs.items[2].name); - - // Create input tensor string (needs const cast as lean function expects *const) - var input_x_string: []u8 = undefined; - defer allocator.free(input_x_string); - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - input_x_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", input_x_name, ")", - }); - } else { - input_x_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(&tensor_", input_x_name, ")", - }); - } - - // Output tensors are always non-const variables in the generated code - const output_y_string = try std.fmt.allocPrint(allocator, "&tensor_{s}", .{output_y_name}); - defer allocator.free(output_y_string); - const output_scale_string = try std.fmt.allocPrint(allocator, "&tensor_{s}", .{output_scale_name}); - defer allocator.free(output_scale_string); - const output_zp_string = try std.fmt.allocPrint(allocator, "&tensor_{s}", .{output_zp_name}); - defer allocator.free(output_zp_string); - - // Write the lean function call - _ = try writer.print( - \\ tensMath.dynamicQuantizeLinear_lean( - \\ {s}, // x: *const Tensor(f32) - \\ {s}, // y: *Tensor(u8) - \\ {s}, // y_scale: *Tensor(f32) - \\ {s} // y_zero_point: *Tensor(u8) - \\ ) - , .{ - input_x_string, - output_y_string, - output_scale_string, - output_zp_string, - }); -} - -inline fn write_cast(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__Cast.html - // INPUTS: - // - input (heterogeneous) - T1: Input tensor to be cast. - // OUTPUTS: - // - output (heterogeneous) - T2: Output tensor with the same shape as input and specified type. - // ATTRIBUTES: - // - to (INT, required): The data type to cast to. - - // Get the target type from the attribute - var target_type: DataType = undefined; - var target_type_found = false; - for (node.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "to")) { - if (attr.type == AttributeType.INT) { - target_type = @enumFromInt(attr.i); - target_type_found = true; - break; - } else { - return error.CastToAttributeNotINT; - } - } - } - - if (!target_type_found) { - return error.CastToAttributeNotFound; - } - - // --- Safely get source type --- - var source_type: DataType = .UNDEFINED; - const input_ready_tensor_ptr = globals.tensorHashMap.getPtr(node.inputs.items[0].?.name); - - if (input_ready_tensor_ptr) |rt_ptr| { - // Prioritize ReadyTensor.dtype if it's valid - if (rt_ptr.dtype != DataType.UNDEFINED) { // Check if dtype is set - source_type = rt_ptr.dtype; - } else if (rt_ptr.tensorProto) |tp| { - // Fallback to tensorProto if dtype is not set - source_type = tp.data_type; - } else { - mathHandler_log.warn("Error: Could not determine source type for Cast input '{s}' from either dtype or tensorProto\n", .{node.inputs.items[0].?.name}); - return error.DataTypeNotFound; // Or another appropriate error - } - } else { - mathHandler_log.warn("Error: Cast input tensor '{s}' not found in map\n", .{node.inputs.items[0].?.name}); - return error.TensorNotFound; // Or another appropriate error - } - - if (source_type == DataType.UNDEFINED) { - mathHandler_log.warn("Error: Determined source type for Cast input '{s}' is UNDEFINED\n", .{node.inputs.items[0].?.name}); - return error.DataTypeNotFound; - } - // --- End safe source type retrieval --- - - const source_type_string = try utils.getTypeString(source_type); - const target_type_string = try utils.getTypeString(target_type); - - // Create input tensor string - var input_tensor_string: []u8 = undefined; - defer allocator.free(input_tensor_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - input_tensor_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name), ")" }); - } - - _ = try writer.print( - \\ - \\ - \\ @setEvalBranchQuota(10000); - \\ tensMath.cast_lean( - \\ {s}, // Source type T1 - \\ {s}, // Target type T2 - \\ {s}, // Input tensor (*const Tensor(T1)) - \\ &tensor_{s}, // Output tensor (*Tensor(T2)) - \\ zant.onnx.DataType.{s} // Target DataType enum - \\ ) - , .{ - source_type_string, // Pass source type string - target_type_string, // Pass target type string - input_tensor_string, - try utils.getSanitizedName(node.outputs.items[0].name), - @tagName(target_type), // Pass the DataType enum value as the 5th arg - }); -} - -inline fn write_convInteger(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // INPUTS: - // - x: Input tensor (u8 or i8) - // - w: Weight tensor (u8 or i8) - // - x_zero_point: Zero point for input tensor x (optional, u8 or i8) - // - w_zero_point: Zero point for weight tensor w (optional, u8 or i8) - // OUTPUTS: - // - y: Output tensor (i32) - // ATTRIBUTES: - // - auto_pad, dilations, group, kernel_shape, pads, strides (similar to Conv) - - var auto_pad: []const u8 = "NOTSET"; - var dilations: ?[]i64 = null; - var group: i64 = 1; - var kernel_shape: ?[]i64 = null; - var pads: ?[]i64 = null; - var strides: ?[]i64 = null; - - for (node.nodeProto.attribute) |attr| { - if (std.mem.indexOf(u8, attr.name, "auto_pad")) |_| { - if (attr.type == AttributeType.STRING) auto_pad = attr.s else return error.ConvAuto_padNotSTRING; - } else if (std.mem.indexOf(u8, attr.name, "dilations")) |_| { - if (attr.type == AttributeType.INTS) dilations = attr.ints else return error.ConvDilatationNoINTS; - } else if (std.mem.indexOf(u8, attr.name, "group")) |_| { - if (attr.type == AttributeType.INT) group = attr.i else return error.ConvGroupNotINT; - } else if (std.mem.indexOf(u8, attr.name, "kernel_shape")) |_| { - if (attr.type == AttributeType.INTS) kernel_shape = attr.ints else return error.ConvKernelShapeNotINTS; - } else if (std.mem.indexOf(u8, attr.name, "pads")) |_| { - if (attr.type == AttributeType.INTS) pads = attr.ints else return error.ConvPadsNotINTS; - } else if (std.mem.indexOf(u8, attr.name, "strides")) |_| { - if (attr.type == AttributeType.INTS) strides = attr.ints else return error.ConvStridesNotINTS; - } - } - - //----create tensor_x_string - var tensor_x_string: []u8 = undefined; - defer allocator.free(tensor_x_string); - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_x_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_x_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name), ")" }); - } - - //----create tensor_w_string - var tensor_w_string: []u8 = undefined; - defer allocator.free(tensor_w_string); - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - tensor_w_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[1].?.name), - ")", - }); - } else { - tensor_w_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[1].?.name), ")" }); - } - - //----create ?x_zero_point string - var x_zp_string: []u8 = undefined; - var free_x_zp = false; - if (node.inputs.items.len > 2 and node.inputs.items[2] != null) { // Index 2 might be x_zero_point - const x_zp_name = try utils.getSanitizedName(node.inputs.items[2].?.name); - if (node.inputs.items[2].?.tag == globals.TensorTag.INITIALIZER) { - x_zp_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(¶m_lib.tensor_", x_zp_name, ")" }); - } else { - x_zp_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", x_zp_name, ")" }); - } - free_x_zp = true; - } else { - x_zp_string = try allocator.dupe(u8, "null"); - free_x_zp = true; - } - defer if (free_x_zp) allocator.free(x_zp_string); - - //----create ?w_zero_point string - var w_zp_string: []u8 = undefined; - var free_w_zp = false; - if (node.inputs.items.len > 3 and node.inputs.items[3] != null) { // Index 3 might be w_zero_point - const w_zp_name = try utils.getSanitizedName(node.inputs.items[3].?.name); - if (node.inputs.items[3].?.tag == globals.TensorTag.INITIALIZER) { - w_zp_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(¶m_lib.tensor_", w_zp_name, ")" }); - } else { - w_zp_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", w_zp_name, ")" }); - } - free_w_zp = true; - } else { - w_zp_string = try allocator.dupe(u8, "null"); - free_w_zp = true; - } - defer if (free_w_zp) allocator.free(w_zp_string); - - //----create stride string (mandatory) - if (strides == null) return error.StrideNotFound; - const stride_string: []const u8 = try utils.i64SliceToUsizeArrayString(strides.?); - - //----create ?pads string - var pads_string: []const u8 = "null"; - if (pads != null) { - if (pads.?.len > 0) { // Check if the slice is actually non-empty - pads_string = try utils.i64SliceToUsizeArrayString(pads.?); - // Assuming no allocation needed to be freed, following write_conv - } else { - pads_string = "&[_]usize{}"; // Use explicit empty slice literal if input slice is empty - } - } // else pads_string remains "null" - - //----create ?dilations string - var dilat_string: []const u8 = "null"; - if (dilations != null) { - if (dilations.?.len > 0) { - dilat_string = try utils.i64SliceToUsizeArrayString(dilations.?); - } else { - dilat_string = "&[_]usize{}"; - } - } // else dilat_string remains "null" - - // Get the specific data types for input and weight tensors - const input_x_type = globals.tensorHashMap.get(node.inputs.items[0].?.name).?.dtype; - const input_w_type = globals.tensorHashMap.get(node.inputs.items[1].?.name).?.dtype; - - const type_str_x = try utils.getTypeString(input_x_type); - const type_str_w = try utils.getTypeString(input_w_type); - - _ = try writer.print( - \\ - \\ - \\ tensMath.convInteger_lean( - \\ {s}, // T1: Input data type (u8 or i8) - \\ {s}, // T2: Weight data type (u8 or i8) - \\ {s}, // x - \\ {s}, // w - \\ {s}, // x_zero_point - \\ {s}, // w_zero_point - \\ &tensor_{s}, // y (Output is always i32) - \\ {s}, // stride - \\ {s}, // pads - \\ {s}, // dilations - \\ {}, // group - \\ "{s}", // auto_pad - \\ ) - , .{ - type_str_x, // T1 type string - type_str_w, // T2 type string - tensor_x_string, // x - tensor_w_string, // w - x_zp_string, // x_zero_point - w_zp_string, // w_zero_point - try utils.getSanitizedName(node.outputs.items[0].name), // y - stride_string, // Strides - pads_string, // Pads - dilat_string, // Dilations - group, // Group - auto_pad, // auto_pad - }); -} - -// Helper function to safely get tensor type string -fn getSafeTensorTypeString(input_node_item: *globals.ReadyTensor, parent_node_name: []const u8) ![]const u8 { - const input_name = input_node_item.name; // Name is not optional on ReadyTensor - const tensor_global = input_node_item; // tensor_global is the input_node_item itself - - // Prioritize ReadyTensor.dtype if available and valid - if (tensor_global.dtype != .UNDEFINED) { - return utils.getTypeString(tensor_global.dtype); - } - - // Fallback to tensorProto if dtype is not available/valid - const onnx_tensor_proto = tensor_global.tensorProto orelse { - std.log.err( - \\Error in node '{s}': tensorProto is null AND ReadyTensor.dtype is UNDEFINED for input tensor '{s}'. - \\Tensor details: ready={}, tag={s}, shape={any}, dtype={s}. - \\This means type information is missing for this tensor. - , .{ - parent_node_name, - input_name, - tensor_global.ready, - @tagName(tensor_global.tag), - tensor_global.shape, - @tagName(tensor_global.dtype), // Also log the dtype - }); - return error.CodegenMissingTypeInformation; // New, more specific error (ensure this is defined) - }; - - return utils.getTypeString(onnx_tensor_proto.data_type); -} - -inline fn write_batch_norm(writer: std.fs.File.Writer, node: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__BatchNormalization.html - // INPUTS: - // - X (heterogeneous) - T: Input data tensor from the previous operator; dimensions are in the form of (N x C x D1 x D2 … Dn), where N is the batch size, C is the number of channels. Statistics are computed for every channel of C over N and D1 to Dn dimensions. For image data, input dimensions become (N x C x H x W). The op also accepts single dimension input of size N in which case C is assumed to be 1 - // - scale (heterogeneous) - T1: Scale tensor of shape ©. - // - B (heterogeneous) - T1: Bias tensor of shape ©. - // - input_mean (heterogeneous) - T2: running (training) or estimated (testing) mean tensor of shape ©. - // - input_var (heterogeneous) - T2: running (training) or estimated (testing) variance tensor of shape ©. - // OUTPUT: - // - Y (heterogeneous) - T: The output tensor of the same shape as X - // ATTRIBUTES: - // - epsilon - FLOAT (default is '1e-05'): The epsilon value to use to avoid division by zero. - // - momentum - FLOAT (default is '0.9'): Factor used in computing the running mean and variance.e.g., running_mean = running_mean * momentum + mean * (1 - momentum). - // - training_mode - INT (default is '0'): If set to true, it indicates BatchNormalization is being used for training, and outputs 1 and 2 are to be computed. - - var epsilon: f32 = 1e-05; - var momentum: f32 = 0.9; - // var training_mode: bool = false; -> NOT USED, ALWAYS FALSE for Zant - - for (node.nodeProto.attribute) |attr| { - if (std.mem.indexOf(u8, attr.name, "epsilon")) |_| { - if (attr.type == AttributeType.FLOAT) epsilon = attr.f else return error.BatchNorm_epsilon_NotFloat; - } else if (std.mem.indexOf(u8, attr.name, "momentum")) |_| { - if (attr.type == AttributeType.FLOAT) momentum = attr.f else return error.BatchNorm_momentum_NotFloat; - } else if (std.mem.indexOf(u8, attr.name, "training_mode")) |_| { - if (attr.type == AttributeType.INT) if (attr.i != 0) return error.BatchNorm_training_NotAvailable; - } - } - - //----create tensor_X_string - var tensor_X_string: []u8 = undefined; - defer allocator.free(tensor_X_string); - - if (node.inputs.items[0].?.tag == globals.TensorTag.INITIALIZER) { - tensor_X_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[0].?.name), - ")", - }); - } else { - tensor_X_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[0].?.name), ")" }); - } - - //----create tensor_scale_string - var tensor_scale_string: []u8 = undefined; - defer allocator.free(tensor_scale_string); - - if (node.inputs.items[1].?.tag == globals.TensorTag.INITIALIZER) { - tensor_scale_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[1].?.name), - ")", - }); - } else { - tensor_scale_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[1].?.name), ")" }); - } - - //----create tensor_scale_string - var tensor_B_string: []u8 = undefined; - defer allocator.free(tensor_B_string); - - if (node.inputs.items[2].?.tag == globals.TensorTag.INITIALIZER) { - tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[2].?.name), - ")", - }); - } else { - tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[2].?.name), ")" }); - } - - //----create tensor_input_mean_string - var tensor_input_mean_string: []u8 = undefined; - defer allocator.free(tensor_input_mean_string); - - if (node.inputs.items[3].?.tag == globals.TensorTag.INITIALIZER) { - tensor_input_mean_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[3].?.name), - ")", - }); - } else { - tensor_input_mean_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[3].?.name), ")" }); - } - - //----create tensor_input_var_string - var tensor_input_var_string: []u8 = undefined; - defer allocator.free(tensor_input_var_string); - - if (node.inputs.items[4].?.tag == globals.TensorTag.INITIALIZER) { - tensor_input_var_string = try std.mem.concat(allocator, u8, &[_][]const u8{ - "@constCast(¶m_lib.tensor_", - try utils.getSanitizedName(node.inputs.items[4].?.name), - ")", - }); - } else { - tensor_input_var_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(&tensor_", try utils.getSanitizedName(node.inputs.items[4].?.name), ")" }); - } - - // pub inline fn batchNormalization_lean( comptime T: anytype, comptime T1: anytype, comptime T2: anytype, input: *Tensor(T), scales: *Tensor(T1), B: *Tensor(T1), input_mean: Tensor(T2), input_var: Tensor(T2), epsilon: f32, momentum: f32, training_mode: bool, output: *Tensor(T)) - _ = try writer.print( - \\ - \\ - \\ tensMath.batchNormalization_lean( - \\ {s}, //type 0 - \\ {s}, //type 1 - \\ {s}, //type 2 - \\ {s}, //input - \\ {s}, //scales - \\ {s}, //B - \\ {s}, //input_mean - \\ {s}, //input_var - \\ {}, //epsilon - \\ {}, //momentum - \\ false, //training_mode - \\ &tensor_{s}, //output - \\ ) - , .{ - try getSafeTensorTypeString(node.inputs.items[0].?, node.nodeProto.name orelse "UnnamedBatchNormInput0"), // MODIFIED: Use helper for input X type - try getSafeTensorTypeString(node.inputs.items[1].?, node.nodeProto.name orelse "UnnamedBatchNormInput1"), // MODIFIED: Use helper for input scale type - try getSafeTensorTypeString(node.inputs.items[3].?, node.nodeProto.name orelse "UnnamedBatchNormInput3"), // MODIFIED: Use helper for input mean/var type (check ONNX spec for correct index if this is not mean's type) - tensor_X_string, - tensor_scale_string, - tensor_B_string, - tensor_input_mean_string, - tensor_input_var_string, - epsilon, - momentum, - try utils.getSanitizedName(node.outputs.items[0].name), - }); -} diff --git a/src/CodeGen/parameters.zig b/src/CodeGen/parameters.zig deleted file mode 100644 index eb226c2e..00000000 --- a/src/CodeGen/parameters.zig +++ /dev/null @@ -1,203 +0,0 @@ -const std = @import("std"); -const zant = @import("zant"); -const codegen = @import("codegen.zig"); -const utils = codegen.utils; -const onnx = zant.onnx; -const ModelOnnx = onnx.ModelProto; -const TensorProto = onnx.TensorProto; -const DataType = onnx.DataType; -const globals = codegen.globals; - -/// Writes the Zig code required to initialize all tensor initializers in the ONNX model. -/// This function generates declarations and definitions for each tensor. -/// -/// - `writer`: The file writer to output generated code. -/// - `model`: The ONNX model containing tensor initializers. -pub inline fn write_parameters(writer: std.fs.File.Writer, model: ModelOnnx) !void { - - //importing the libraries - try write_libraries_parameters(writer); - - try writer.print( - \\ - \\ - \\ // --------------------------------------------------- - \\ // + Initializing Weights and Biases + - \\ // --------------------------------------------------- - , .{}); - - // Iterate over all initializers in the ONNX model and generate code - for (model.graph.?.initializers) |tensorProtoInitializer| { - const dataTypeString: []const u8 = try utils.getTypeString(tensorProtoInitializer.data_type); - const name: []const u8 = try utils.getSanitizedName(tensorProtoInitializer.name.?); - - try writer.print( - \\ - \\ - \\ // ----------- Initializing tensor_{s}; - , .{name}); - - // Generate the shape array for the tensor - try wrtiteTensorShape(writer, tensorProtoInitializer, name); - - // Generate the data array for the tensor - try writeArray(writer, tensorProtoInitializer, name); - - // Create the tensor instance - try writer.print( - \\ - \\pub const tensor_{s} = Tensor({s}).fromConstBuffer(&allocator, &array_{s}, &shape_tensor_{s}); - , .{ name, dataTypeString, name, name }); - } -} - -/// Writes the required library imports to the generated Zig file for input tensor. -/// -/// This function ensures that the necessary standard and package libraries are -/// imported into the generated Zig source file. -/// -/// # Parameters -/// - `writer`: A file writer used to write the import statements. -/// -/// # Errors -/// This function may return an error if writing to the file fails. -fn write_libraries_parameters(writer: std.fs.File.Writer) !void { - _ = try writer.print( - \\ - \\ const std = @import("std"); - \\ const zant = @import("zant"); - \\ const Tensor = zant.core.tensor.Tensor; - \\ const pkgAllocator = zant.utils.allocator; - \\ const allocator = pkgAllocator.allocator; - \\ - , .{}); -} - -/// Writes the shape array for a tensor initializer. -/// -/// - `writer`: The file writer to output generated code. -/// - `t`: The tensor initializer. -/// - `name`: The sanitized name of the tensor. -pub inline fn wrtiteTensorShape(writer: std.fs.File.Writer, t: *TensorProto, name: []const u8) !void { - try writer.print( - \\ - \\ - \\const shape_tensor_{s} : [{}]usize = [_]usize{{ - , .{ name, t.dims.len }); - - for (0..t.dims.len) |i| { - if (i > 0) try writer.print( - \\, - , .{}); - - try writer.print( - \\ {} - , .{t.dims[i]}); - } - - try writer.print( - \\}} ; - , .{}); -} - -/// Writes the array for a tensor initializer based on its data type. -/// -/// - `writer`: The file writer to output generated code. -/// - `t`: The tensor initializer. -/// - `name`: The sanitized name of the tensor. -pub inline fn writeArray(writer: std.fs.File.Writer, t: *TensorProto, name: []const u8) !void { - std.log.info("\n[writeArray] Processing tensor: {s}, DataType: {any}", .{ name, t.data_type }); - - const dataTypeString: []const u8 = try utils.getTypeString(t.data_type); - - var size: i64 = 1; - for (t.dims) |dims_i| { - size *= dims_i; - } - try writer.print( - \\ - \\const array_{s} : [{d}]{s} linksection(".rodata") = [_]{s}{{ - , .{ name, size, dataTypeString, dataTypeString }); - - // Select appropriate data storage format - if (t.float_data) |d| { - writeArrayData(writer, f32, d) catch return error.f32DataUnavailable; - } else if (t.int32_data) |d| { - writeArrayData(writer, i32, d) catch return error.i32DataUnavailable; - } else if (t.int64_data) |d| { - writeArrayData(writer, i64, d) catch return error.i64DataUnavailable; - } else if (t.double_data) |d| { - writeArrayData(writer, f64, d) catch return error.f64DataUnavailable; - } else if (t.uint64_data) |d| { - writeArrayData(writer, u64, d) catch return error.u64DataUnavailable; - } else if (t.uint16_data) |d| { - writeArrayData(writer, u16, d) catch return error.u16DataUnavailable; - } else if (t.raw_data) |raw| { - // Handle raw data based on data_type - switch (t.data_type) { - .FLOAT => try writeRawData(writer, f32, raw), - .FLOAT16 => try writeRawData(writer, f16, raw), - .INT32 => try writeRawData(writer, i32, raw), - .INT8 => try writeRawData(writer, i8, raw), - .INT64 => try writeRawData(writer, i64, raw), - .DOUBLE => try writeRawData(writer, f64, raw), - .UINT64 => try writeRawData(writer, u64, raw), - .UINT16 => try writeRawData(writer, u16, raw), - .UINT8 => try writeRawData(writer, u8, raw), - // TODO: Add other types as needed (e.g., FLOAT16, INT8, etc.) - else => { - std.log.info("\n[writeArray] Error: Unsupported raw data type {any} for tensor {s}", .{ t.data_type, name }); - std.log.err("Unsupported raw data type: {any}", .{t.data_type}); - return error.DataTypeNotAvailable; - }, - } - } else { - std.log.info("\n[writeArray] Error: No recognized data field (float_data, int_data, raw_data, etc.) found for tensor {s} with DataType {any}", .{ name, t.data_type }); - return error.DataTypeNotAvailable; - } - - try writer.print( - \\}} ; - , .{}); -} - -/// Writes an array of tensor data from a raw byte slice. -/// Reads values one by one respecting alignment. -fn writeRawData(writer: std.fs.File.Writer, comptime T: type, raw_data: []const u8) !void { - const elem_size = @sizeOf(T); - const num_elements = raw_data.len / elem_size; - - // Ensure raw_data length is a multiple of element size - if (raw_data.len % elem_size != 0) { - std.log.err("Raw data length {d} is not a multiple of element size {d} for type {any}", .{ raw_data.len, elem_size, T }); - return error.InvalidRawDataLength; - } - - for (0..num_elements) |i| { - const offset = i * elem_size; - const value = std.mem.bytesToValue(T, raw_data[offset .. offset + elem_size]); - - if (i > 0) try writer.print( - \\, - , .{}); - try writer.print( - \\ {} - , .{value}); - } -} - -/// Writes an array of tensor data. -/// -/// - `writer`: The file writer to output generated code. -/// - `T`: The type of data in the tensor. -/// - `data`: The data array. -pub inline fn writeArrayData(writer: std.fs.File.Writer, comptime T: type, data: []const T) !void { - for (0..data.len) |i| { - if (i > 0) try writer.print( - \\, - , .{}); - try writer.print( - \\ {} - , .{data[i]}); - } -} diff --git a/src/CodeGen/predict.zig b/src/CodeGen/predict.zig deleted file mode 100644 index aa79ee12..00000000 --- a/src/CodeGen/predict.zig +++ /dev/null @@ -1,491 +0,0 @@ -const std = @import("std"); -const zant = @import("zant"); -const Tensor = zant.core.tensor.Tensor; -const onnx = zant.onnx; -const ModelOnnx = onnx.ModelProto; -const DataType = onnx.DataType; -const TensorProto = onnx.TensorProto; -const NodeProto = onnx.NodeProto; -const GraphProto = onnx.GraphProto; -const AttributeProto = onnx.AttributeProto; -const allocator = zant.utils.allocator.allocator; - -const codegen = @import("codegen.zig"); -const utils = codegen.utils; -const mathGen = codegen.math_handler; -const codegen_options = @import("codegen_options"); - -const globals = codegen.globals; -const ReadyNode = globals.ReadyNode; -const ReadyTensor = globals.ReadyTensor; - -// Writes the computation function for predicting outputs -pub inline fn writePredict(writer: std.fs.File.Writer, do_export: bool) !void { - // Static initialization for output tensors if not using dynamic allocation - if (!codegen_options.dynamic) { - // declare all the outputs of each node of the network - try write_outputsInitialization(writer); - // method to reset the tensors values - try write_outputsResetMethod(writer); - } - - _ = try writer.print( - \\ - \\ - \\ - \\pub {s} fn predict( - \\ input: [*]T, - \\ input_shape: [*]u32, - \\ shape_len: u32, - \\ result: *[*]T, - \\) void {{ - , .{if (do_export == true) "export" else ""}); - - if (codegen_options.log) { - _ = try writer.print( - \\ - \\ - \\ if (log_function) |log| {{ - \\ log(@constCast(@ptrCast("Starting prediction...\n"))); - \\ }} - , .{}); - } - - if (!codegen_options.dynamic) { - _ = try writer.print( - \\ - \\ // Reset all output tensors to zero before each prediction - \\ resetOutputTensors(); - , .{}); - } - - try write_checks(writer); - - try write_predictInitialization(writer); - - try write_graphSerialization(writer); - - try writeReturn(writer); - - _ = try writer.print( - \\ - \\}} - , .{}); -} - -// Processes and writes the computation graph -inline fn write_graphSerialization(writer: std.fs.File.Writer) !void { - var iteration: usize = 0; - var lastNode: *ReadyNode = undefined; - while (true) { - const computableNodes: std.ArrayList(*ReadyNode) = try utils.getComputableNodes(&globals.readyGraph); - - //DEBUG - // for (globals.readyGraph.items) |*readyNode| readyNode.print(false); - try utils.printComputableNodes(computableNodes, true); - - if (computableNodes.items.len == 0) break; - //else set the last node as the network output - lastNode = computableNodes.items[computableNodes.items.len - 1]; - - for (computableNodes.items) |node_ptr| { - //writing the operation - try writeOperation(writer, node_ptr); - //set the output as ready - try utils.setOutputsReady(node_ptr, &globals.tensorHashMap); - // Deallocate intermediate tensors after last use when dynamic allocation is enabled - if (codegen_options.dynamic) { - for (node_ptr.inputs.items) |input_opt| { - if (input_opt) |input| { - const name = input.name; - const san = try utils.getSanitizedName(name); - const rem = globals.decrementUseCount(name); - if (rem == 0 and input.tag == globals.TensorTag.LINK) { - _ = try writer.print(" tensor_{s}.deinit();", .{san}); - } - } - } - } - } - iteration += 1; - } - - // If this is the output node, we don't need to check its outputs - if (std.mem.eql(u8, try utils.getSanitizedName(lastNode.nodeProto.name.?), "output")) { - return; - } - - //check if it is different from the one already parsed, if not present, set to lastNode - if (lastNode.outputs.items.len == 0) { - return error.NoOutputsFound; - } - if (std.mem.eql(u8, globals.networkOutput.name, "")) { - //setting te network output - globals.networkOutput.name = lastNode.outputs.items[0].name; - globals.networkOutput.shape = lastNode.outputs.items[0].shape; - } else { - //check the output tensor name is the same - if (!std.mem.eql(u8, globals.networkOutput.name, lastNode.outputs.items[0].name)) { - std.log.warn("\n\n ERROR!!\n DifferentOutputNames: \n {s} vs {s}\n LastNode:{s}\n\n", .{ globals.networkOutput.name, lastNode.outputs.items[0].name, lastNode.nodeProto.name.? }); - lastNode.print(true); - return error.DifferentOutputNames; - } - } -} - -// -------------------------------- WRITE OUTPUTS -------------------------------- - -// Initializes output tensors in the computation graph -fn write_outputsInitialization(writer: std.fs.File.Writer) !void { - try writer.print( - \\ - \\ - \\ // --------------------------------------------------- - \\ // + Initializing output Tensors + - \\ // --------------------------------------------------- - , .{}); - - for (globals.readyGraph.items) |*node| { - - //writing the outputs, OSS: two nodes shpuld never have the same output by definition, so we don't need to check for duplicates - for (node.outputs.items) |output| { - if (std.mem.eql(u8, node.nodeProto.op_type, "Constant") and node.inputs.items.len == 0) { //A node is constant if it only has one output and no inputs - if (node.outputs.items.len > 1) return error.MultipleOutputConstant else { - try write_constantTensor(writer, node); - //set the node and tensor to Ready - var mutableNode: *ReadyNode = @constCast(node); - mutableNode.ready = true; - } - } else { - if (@as(?*ReadyTensor, output) == null) return error.InvalidOutput; - const size = try write_OutputShape( - writer, - output, - node, - ); - try write_OutputTensor( - writer, - output, - size, - ); - } - } - } -} - -fn write_OutputShape(writer: std.fs.File.Writer, output: *ReadyTensor, node: *const ReadyNode) !i64 { - if (@as(?*ReadyTensor, output) == null) return error.InvalidOutput; - const original_shape = output.shape; - var size: i64 = 1; - - // Check if it's a convolutional node - const op_type = node.nodeProto.op_type; - const is_conv = std.mem.eql(u8, op_type, "Conv") or std.mem.eql(u8, op_type, "ConvInteger"); - const is_cast = std.mem.eql(u8, op_type, "Cast"); // Check for Cast node - const is_add = std.mem.eql(u8, op_type, "Add"); // Check for Add node - - var shape_len_adj: usize = original_shape.len; - var needs_batch_dim: bool = false; - - // Determine if a batch dimension needs to be added - if (is_conv and (original_shape.len == 0 or original_shape[0] != 1)) { - needs_batch_dim = true; - shape_len_adj += 1; - } else if (is_conv and original_shape.len > 0 and original_shape[0] == 1) { - // Already has batch dim 1, no change needed for conv - } else if (is_cast and original_shape.len == 3) { // Add check for Cast with 3 dims - needs_batch_dim = true; - shape_len_adj += 1; - } else if (is_add and original_shape.len == 3) { // Add check for Add with 3 dims - needs_batch_dim = true; - shape_len_adj += 1; - } else { - // Not a conv/cast/add node needing adjustment, or already has batch dim - } - - try writer.print( - \\ - \\ - \\var shape_tensor_{s} : [{}]usize = [_]usize{{ - , .{ - try utils.getSanitizedName(output.name), - shape_len_adj, // Use adjusted length - }); - - var first_dim_written = false; - if (needs_batch_dim) { - try writer.print(" 1", .{}); // Add batch dimension of 1 - size *= 1; - first_dim_written = true; - } - - for (0..original_shape.len) |i| { - if (first_dim_written or i > 0) try writer.print(",", .{}); - try writer.print( - \\ {} - , .{original_shape[i]}); - size *= original_shape[i]; - first_dim_written = true; // Ensure comma is added after the first element (batch or original[0]) - } - - try writer.print( - \\}} ; - , .{}); - - return size; -} - -fn write_constantTensor(writer: std.fs.File.Writer, readyNode: *const ReadyNode) !void { - try writer.print( - \\ - \\ // ---- CONSTANT TENSOR ---- - , .{}); - - // Get the output tensor (constant nodes have exactly one output) - const output = readyNode.outputs.items[0]; - const sanitized_name = try utils.getSanitizedName(output.name); - - // Find the value attribute which contains the constant tensor - var value_attr: ?*AttributeProto = null; - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "value")) { - value_attr = attr; - break; - } - } - - if (value_attr == null or value_attr.?.t == null) return error.MissingConstantValue; - const tensor = value_attr.?.t.?; - - // Write shape array - try writer.print( - \\ - \\const shape_tensor_{s} : [{}]usize = [_]usize{{ - , .{ sanitized_name, output.shape.len }); - - for (0..output.shape.len) |i| { - if (i > 0) try writer.print(",", .{}); - try writer.print( - \\ {} - , .{output.shape[i]}); - } - - try writer.print( - \\}} ; - , .{}); - - // Write data array - var total_size: i64 = 1; - for (tensor.dims) |dim| { - total_size *= dim; - } - - //const dataTypeString = try utils.getTypeString(tensor.data_type); - const type_str_const = try utils.getTypeString(tensor.data_type); - try writer.print( - \\ - \\const array_{s} : [{d}]{s} = [_]{s}{{ - , .{ sanitized_name, total_size, type_str_const, type_str_const }); - - // Write the actual data values - if (tensor.float_data) |data| { - for (0..data.len) |i| { - if (i > 0) try writer.print(",", .{}); - try writer.print(" {d}", .{data[i]}); - } - } else if (tensor.int64_data) |data| { - for (0..data.len) |i| { - if (i > 0) try writer.print(",", .{}); - try writer.print(" {d}", .{data[i]}); - } - } else if (tensor.raw_data) |data| { - switch (tensor.data_type) { - .FLOAT => { - const float_data = @as([*]const f32, @alignCast(@ptrCast(data.ptr)))[0..@divExact(data.len, 4)]; - for (0..float_data.len) |i| { - if (i > 0) try writer.print(",", .{}); - try writer.print(" {d}", .{float_data[i]}); - } - }, - .INT64 => { - const int_data = @as([*]const i64, @alignCast(@ptrCast(data.ptr)))[0..@divExact(data.len, 8)]; - for (0..int_data.len) |i| { - if (i > 0) try writer.print(",", .{}); - try writer.print(" {d}", .{int_data[i]}); - } - }, - else => return error.UnsupportedDataType, - } - } else return error.NoDataAvailable; - - try writer.print( - \\ }}; - , .{}); - - // Write tensor initialization using fromArray - try writer.print( - \\ - \\const tensor_{s} = Tensor({s}).fromConstBuffer(&allocator, &array_{s}, &shape_tensor_{s}); - , .{ sanitized_name, type_str_const, sanitized_name, sanitized_name }); -} - -fn write_OutputTensor(writer: std.fs.File.Writer, output: *ReadyTensor, size: i64) !void { - const sanitized_name = try utils.getSanitizedName(output.name); - - // --- ADD CHECK FOR UNDEFINED TYPE --- - if (output.dtype == .UNDEFINED) { - std.log.warn("\n\nCODEGEN ERROR: Attempted to generate output tensor '{s}' but its data type is UNDEFINED. Check ONNX graph analysis in globals.zig.\n\n", .{output.name}); - return error.DataTypeNotAvailable; // Or a more specific error like CannotGenerateUndefinedType - } - // --- END CHECK --- - - const type_str = try utils.getTypeString(output.dtype); - if (codegen_options.dynamic) { - // Check if this is the final network output tensor - if (std.mem.eql(u8, output.name, globals.networkOutput.name)) { - // Network Output: Allocate but DO NOT defer free/deinit. Caller takes ownership. - _ = try writer.print( - \\ - \\ // Allocate final network output buffer (caller owns this memory) - \\ var array_{s} = allocator.alloc({s}, {d}) catch return; - \\ var tensor_{s} = Tensor({s}).fromArray(&allocator, array_{s}, &shape_tensor_{s}); - \\ // NOTE: No 'defer allocator.free(array_{s})' or 'defer tensor_{s}.deinit()' - \\ // The pointer returned by predict() must be freed by the caller. - , .{ sanitized_name, type_str, size, sanitized_name, type_str, sanitized_name }); - } else { - // Intermediate Tensor: Allocate AND defer free/deinit. - const code_str = try std.fmt.allocPrint(allocator, - \\ var array_{s} = allocator.alloc({s}, {d}) catch return; - \\ defer allocator.free(array_{s}); // Free intermediate array - \\ var tensor_{s} = Tensor({s}).fromArray(&allocator, array_{s}, &shape_tensor_{s}); - \\ defer tensor_{s}.deinit(); // Deinit intermediate tensor struct - , .{ sanitized_name, type_str, size, sanitized_name, type_str, sanitized_name }); - defer allocator.free(code_str); - try writer.writeAll(code_str); - } - } else { - // Static allocation: Use fromConstBuffer to allow mutation - try writer.print(" var array_{s}: [{d}]{s} = [_]{s}{{0}} ** {d};", .{ sanitized_name, size, type_str, type_str, size }); - try writer.print(" var tensor_{s} = Tensor({s}).fromConstBuffer(&fba, &array_{s}, &shape_tensor_{s});", .{ sanitized_name, type_str, sanitized_name, sanitized_name }); - } -} - -fn write_outputsResetMethod(writer: std.fs.File.Writer) !void { - try writer.print( - \\ - \\ - \\//Function to reset all output tensors to zero - \\fn resetOutputTensors() void {{ - , .{}); - - if (codegen_options.log) { - _ = try writer.print( - \\ - \\ if (log_function) |log| {{ - \\ log(@constCast(@ptrCast("Resetting output tensors...\n"))); - \\ }} - , .{}); - } - - for (globals.readyGraph.items) |*node| { - // Skip constant nodes - if (std.mem.eql(u8, node.nodeProto.op_type, "Constant") and node.inputs.items.len == 0) { - continue; - } - - for (node.outputs.items) |output| { - _ = try writer.print( - \\ - \\ @memset(array_{s}[0..], 0); - , .{try utils.getSanitizedName(output.name)}); - } - } - - if (codegen_options.log) { - _ = try writer.print( - \\ - \\ if (log_function) |log| {{ - \\ log(@constCast(@ptrCast("Output tensors reset.\n"))); - \\ }} - , .{}); - } - - try writer.print( - \\ - \\}} - , .{}); -} - -// -------------------------------- WRITE CHECKS -------------------------------- - -fn write_checks(writer: std.fs.File.Writer) !void { - // Autogen a check for the input shape as arg VS input shape as codegen option - - //check on the number of dims - _ = try writer.print( - \\ - \\ //checks on the input parameters - \\ if (shape_len == 0) return ; - \\ if(shape_len != {}) return ; - , .{globals.networkInput.shape.len}); - - //check on dims correspondance - for (globals.networkInput.shape, 0..) |dim, i| { - _ = try writer.print( - \\ - \\ if( input_shape[{}] != {}) return ; - , .{ i, dim }); - } -} - -// -------------------------------- WRITE PREDICT() -------------------------------- - -fn write_predictInitialization(writer: std.fs.File.Writer) !void { - _ = try writer.print( - \\ - \\ //computing the size of the input tensor - \\ var size: u32 = 1; - \\ for(0..shape_len) |dim_i| {{ - \\ size *= input_shape[dim_i]; - \\ }} - \\ - \\ //allocating space in memory for the data - \\ const data = allocator.alloc(T, size) catch return; - \\ defer allocator.free(data); - \\ for (0..size) |i| {{ - \\ data[i] = input[i]; // Copying input elements - \\ }} - \\ - \\ //converting the shape from [*]u32 to []usize - \\ const usized_shape: []usize = utils.u32ToUsize(allocator, input_shape, shape_len) catch return; - \\ var tensor_{s} = Tensor(T).fromShape(&allocator, @constCast(usized_shape)) catch return; - \\ defer allocator.free(usized_shape); - \\ defer tensor_{s}.deinit(); - \\ @memcpy(tensor_{s}.data, data); - , .{ - try utils.getSanitizedName(globals.networkInput.name), - try utils.getSanitizedName(globals.networkInput.name), - try utils.getSanitizedName(globals.networkInput.name), - }); -} - -fn writeOperation(writer: std.fs.File.Writer, readyNode: *ReadyNode) !void { - try mathGen.write_math_op(writer, readyNode); -} - -fn writeReturn(writer: std.fs.File.Writer) !void { - _ = try writer.print( - \\ - \\ result.* = tensor_{s}.data.ptr; - \\ - , .{try utils.getSanitizedName(globals.networkOutput.name)}); - - if (codegen_options.log) { - _ = try writer.print( - \\ - \\ if (log_function) |log| {{ - \\ log(@constCast(@ptrCast("Prediction completed.\n"))); - \\ }} - , .{}); - } -} diff --git a/src/CodeGen/shape_handler.zig b/src/CodeGen/shape_handler.zig deleted file mode 100644 index ff41d8b1..00000000 --- a/src/CodeGen/shape_handler.zig +++ /dev/null @@ -1,1818 +0,0 @@ -const std = @import("std"); -const os = std.os; - -const zant = @import("zant"); - -const Codegen_log = std.log.scoped(.shape_handler); - -const Tensor = zant.core.tensor.Tensor; -const tensorMath = zant.core.tensor.math_standard; -const onnx = zant.onnx; -const ModelOnnx = onnx.ModelProto; -const DataType = onnx.DataType; -const allocator = zant.utils.allocator.allocator; - -// --- proto libs -const TensorProto = onnx.TensorProto; -const NodeProto = onnx.NodeProto; -const GraphProto = onnx.GraphProto; -const AttributeType = onnx.AttributeType; - -// --- codeGen libs -const ReadyNode = @import("globals.zig").ReadyNode; -const ReadyTensor = @import("globals.zig").ReadyTensor; -const codegen = @import("codegen.zig"); -const utils = codegen.utils; -const codegen_options = @import("codegen_options"); -const globals = @import("globals.zig"); - -// ----------------------------------- SHAPE inference ----------------------------------- - -pub fn compute_output_shape(readyNode: *ReadyNode) !void { - // Ensure the node has a name for debugging purposes - if (readyNode.nodeProto.name == null) { - // Generate a name like "OpType_OutputName" - const op_type = readyNode.nodeProto.op_type; - const output_name = readyNode.outputs.items[0].name; // Directly assign since it's not optional - _ = try std.fmt.allocPrint(allocator, "{s}_{s}", .{ op_type, output_name }); // Keep allocation for potential local use or logging if needed, but don't assign. Free later if stored. - // Note: This allocated name needs to be managed if the NodeProto lifetime extends beyond this scope. - // Assuming the global allocator lives long enough or NodeProto is processed quickly. - } - - if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Add")) { - //https://onnx.ai/onnx/operators/onnx__Add.html - try compute_Add_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "AveragePool")) { - try compute_averagePool_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "BatchNormalization")) { - //https://onnx.ai/onnx/operators/onnx__BatchNormalization.html - try compute_batchNormalization_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Cast")) { - // https://onnx.ai/onnx/operators/onnx__Cast.html - try compute_cast_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Ceil")) { - //https://onnx.ai/onnx/operators/onnx__Ceil.html - try compute_ceil_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Clip")) { - //https://onnx.ai/onnx/operators/onnx__Clip.html - try compute_clip_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Concat")) { - //https://onnx.ai/onnx/operators/onnx__Concat.html - try compute_concat_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Constant")) { - //https://onnx.ai/onnx/operators/onnx__Constant.html - try compute_constant_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Conv")) { - //https://onnx.ai/onnx/operators/onnx__Conv.html - try compute_conv_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "ConvInteger")) { - //https://onnx.ai/onnx/operators/onnx__ConvInteger.html - try compute_convInteger_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Div")) { - //https://onnx.ai/onnx/operators/onnx__Div.html - try compute_Div_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "DynamicQuantizeLinear")) { - // https://onnx.ai/onnx/operators/onnx_aionnx_preview_training__DynamicQuantizeLinear.html - try compute_dynamicQuantizeLinear_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Elu")) { - //https://onnx.ai/onnx/operators/onnx__Elu.html - try compute_elu_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Flatten")) { - //https://onnx.ai/onnx/operators/onnx__Flatten.html - try compute_flatten_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Floor")) { - //https://onnx.ai/onnx/operators/onnx__Floor.html - try compute_floor_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Squeeze")) { - //https://onnx.ai/onnx/operators/onnx__Squeeze.html - try compute_squeeze_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Gather")) { - try compute_gather_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Gemm")) { - //https://onnx.ai/onnx/operators/onnx__Gemm.html - try compute_gemm_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Gelu")) { - //https://onnx.ai/onnx/operators/onnx__Gelu.html - try compute_gelu_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "LeakyRelu")) { - try compute_leaky_relu_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "LogSoftmax")) { - try compute_longsoftmax_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "MatMul")) { - try compute_matmul_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "MaxPool")) { - try compute_maxPool_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Mul")) { - //https://onnx.ai/onnx/operators/onnx__Mul.html - try compute_mul_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Neg")) { - //https://onnx.ai/onnx/operators/onnx__Neg.html - try compute_neg_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "OneHot")) { - //https://onnx.ai/onnx/operators/onnx__OneHot.html - try compute_oneHot_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Pad")) { - //https://onnx.ai/onnx/operators/onnx__Pad.html - try compute_pads_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "ReduceMean")) { - try compute_reducemean_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Relu")) { - //https://onnx.ai/onnx/operators/onnx__Relu.html - try compute_ReLU_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Reshape")) { - // https://onnx.ai/onnx/operators/onnx__Reshape.html - try compute_reshape_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Resize")) { - try compute_resize_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Shape")) { - try compute_shape_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Sigmoid")) { - try compute_sigmoid_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Softmax")) { - //https://onnx.ai/onnx/operators/onnx__Softmax.html - try compute_softmax_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Slice")) { - try compute_slice_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Split")) { - //https://onnx.ai/onnx/operators/onnx__Split.html - try compute_split_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Sqrt")) { - try compute_sqrt_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Sub")) { - //https://onnx.ai/onnx/operators/onnx__Sub.html - try compute_Sub_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Tanh")) { - try compute_tanh_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Transpose")) { - try compute_transpose_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Unsqueeze")) { - try compute_unsqueeze_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Identity")) { - //https://onnx.ai/onnx/operators/onnx__Identity.html - try compute_identity_output_shape(readyNode); - } else if (std.mem.eql(u8, readyNode.nodeProto.op_type, "Mean")) { - // https://onnx.ai/onnx/operators/onnx__Mean.html - try compute_mean_output_shape(readyNode); - } else { - Codegen_log.warn("\n\n ERROR! output shape computation for {s} is not available in codeGen_math_handler.compute_output_shape() \n\n", .{readyNode.nodeProto.op_type}); - return error.OperationNotSupported; - } -} - -// ---------------- SHAPE COMPUTATION METHODS ---------------- -inline fn compute_Add_output_shape(readyNode: *ReadyNode) !void { - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - shape = readyNode.inputs.items[0].?.shape; - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_batchNormalization_output_shape(readyNode: *ReadyNode) !void { - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - shape = try utils.usizeSliceToI64Slice(try tensorMath.get_batchNormalization_output_shape(try utils.i64SliceToUsizeSlice(readyNode.inputs.items[0].?.shape))); - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_cast_output_shape(readyNode: *ReadyNode) !void { - // Cast is an element-wise operation, output shape is identical to input shape - Codegen_log.info("\n====== compute_cast_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const input_shape = readyNode.inputs.items[0].?.shape; - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - // Cast operation preserves the input shape - shape = try allocator.dupe(i64, input_shape); - Codegen_log.info("\n output_shape: []i64 = {any}", .{shape}); - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_Sub_output_shape(readyNode: *ReadyNode) !void { - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - shape = readyNode.inputs.items[0].?.shape; - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_constant_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_constant_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - readyNode.outputs.items[0].shape = tensorShape; - return; - } else { - // Check each possible attribute type for the Constant node - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "value")) { - // Handle tensor value - use existing utility - const shape = try utils.getConstantTensorDims(readyNode.nodeProto); - - // If the shape is empty (scalar in ONNX), use [1] instead - if (shape.len == 0) { - readyNode.outputs.items[0].shape = try allocator.dupe(i64, &[_]i64{1}); - } else { - readyNode.outputs.items[0].shape = shape; - } - - Codegen_log.info("\n output_shape from tensor: []i64 = {any}", .{readyNode.outputs.items[0].shape}); - return; - } else if (std.mem.eql(u8, attr.name, "value_float") or std.mem.eql(u8, attr.name, "value_int") or - std.mem.eql(u8, attr.name, "value_string")) - { - // These are scalar values - output shape is [1] - readyNode.outputs.items[0].shape = try allocator.dupe(i64, &[_]i64{1}); - Codegen_log.info("\n output_shape scalar: []i64 = {any}", .{readyNode.outputs.items[0].shape}); - return; - } else if (std.mem.eql(u8, attr.name, "value_floats") or std.mem.eql(u8, attr.name, "value_ints")) { - // These are 1D arrays - shape is [length] - var length: i64 = 0; - if (attr.type == AttributeType.FLOATS) { - length = @intCast(attr.floats.len); - } else if (attr.type == AttributeType.INTS) { - length = @intCast(attr.ints.len); - } - readyNode.outputs.items[0].shape = try allocator.dupe(i64, &[_]i64{length}); - Codegen_log.info("\n output_shape 1D array: []i64 = {any}", .{readyNode.outputs.items[0].shape}); - return; - } else if (std.mem.eql(u8, attr.name, "value_strings")) { - // 1D array of strings - shape is [length] - const length: i64 = @intCast(attr.strings.len); - readyNode.outputs.items[0].shape = try allocator.dupe(i64, &[_]i64{length}); - Codegen_log.info("\n output_shape string array: []i64 = {any}", .{readyNode.outputs.items[0].shape}); - return; - } else if (std.mem.eql(u8, attr.name, "sparse_value")) { - // For sparse tensor, we need to handle it differently - Codegen_log.warn("\n Warning: Sparse tensor support is limited", .{}); - - // Use a placeholder shape for sparse tensors - assuming scalar for now - readyNode.outputs.items[0].shape = try allocator.dupe(i64, &[_]i64{1}); - Codegen_log.info("\n output_shape from sparse tensor (placeholder): []i64 = {any}", .{readyNode.outputs.items[0].shape}); - return; - } - } - } - - return error.ConstantValueNotFound; -} - -inline fn compute_ReLU_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_ReLU_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - Codegen_log.info("\n input_shape: []i64 = {any}", .{readyNode.inputs.items[0].?.shape}); - - var shape: []const i64 = undefined; - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - shape = readyNode.inputs.items[0].?.shape; - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_reshape_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_reshape_output_shape node: {s}======", .{readyNode.nodeProto.name orelse "(unnamed)"}); - const input_rt: *globals.ReadyTensor = readyNode.inputs.items[0].?; - const input_shape_i64 = input_rt.shape; - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape_i64}); - - var new_shape_spec: []const isize = undefined; // Use []const isize as required by get_reshape_output_shape - var shape_spec_found: bool = false; - var shape_input_needs_free = false; // Flag to track if we allocated new_shape_spec - var allow_zero: bool = false; - - // 1. Get allowzero attribute (default 0 -> false) - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "allowzero")) { - if (attr.type == AttributeType.INT and attr.i != 0) { - allow_zero = true; - } - break; // Found allowzero, no need to check other attributes for this - } - } - Codegen_log.debug("\n allowzero: {}", .{allow_zero}); - - // 2. Get the target shape spec (new_shape_spec) - // Try getting shape from the second input tensor first - if (readyNode.inputs.items.len > 1) { - const shape_input = readyNode.inputs.items[1].?; - if (shape_input.tensorProto != null and shape_input.tensorProto.?.int64_data != null) { - // Shape is in the tensorProto data (preferred) - new_shape_spec = shape_input.tensorProto.?.int64_data.?; - shape_spec_found = true; - Codegen_log.debug("\n new shape spec from input tensorProto: []i64 = {any}", .{new_shape_spec}); - } else if (shape_input.tensorProto != null and shape_input.tensorProto.?.int64_data == null) { - const proto = shape_input.tensorProto.?; - // Check data type - Reshape requires INT64 shape - if (proto.data_type != .INT64) { - Codegen_log.warn("ERROR: Reshape shape input tensorProto has incorrect data type: {any}. Expected INT64.", .{proto.data_type}); - return error.InvalidShapeDataType; - } - - // Try reading from raw_data if int64_data is null - if (proto.raw_data) |raw| { - Codegen_log.debug("\n Shape input tensorProto has raw_data ({} bytes), attempting to parse as i64...", .{raw.len}); - // Call a new utility function to parse raw_data - const parsed_shape = utils.parseI64RawData(raw) catch |err| { - Codegen_log.warn("\n ERROR: Failed to parse raw_data for shape tensor: {any}", .{err}); - return error.RawDataParseFailed; // Or specific error from parsing - }; - // Important: parsed_shape is allocated by the util func and needs freeing later. - // Convert []i64 to []const isize for new_shape_spec - var temp_shape_spec = try allocator.alloc(isize, parsed_shape.len); - for (parsed_shape, 0..) |dim, i| { - temp_shape_spec[i] = dim; - } - new_shape_spec = temp_shape_spec; // Assign the parsed shape - shape_spec_found = true; - shape_input_needs_free = true; // Mark that we allocated this spec - Codegen_log.debug("\n new shape spec parsed from raw_data: []isize = {any}", .{new_shape_spec}); - // We also need to free the intermediate parsed_shape ([]i64) - defer allocator.free(parsed_shape); - } else { - // Data type is INT64, but int64_data is null and raw_data is null/empty. - Codegen_log.warn("ERROR: Reshape shape input tensorProto is INT64 but contains no int64_data or raw_data.", .{}); - return error.ShapeDataMissing; - } - } else { - // If tensorProto is null, this input doesn't directly provide the shape data. - // shape_spec_found remains false, attributes will be checked. - Codegen_log.debug("\n Shape input tensorProto is null, will check attributes.", .{}); - } - } else { - // If no second input, try getting shape from the 'shape' attribute - // shape_spec_found remains false, attributes will be checked. - Codegen_log.debug("\n No second input for shape, will check attributes.", .{}); - } - - // If new_shape_spec is still null after checking input, check attributes - if (!shape_spec_found) { - var shape_attr: ?[]const i64 = null; - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "shape")) { - if (attr.type == AttributeType.INTS) { - shape_attr = attr.ints; - break; - } else { - Codegen_log.warn("ERROR: Reshape 'shape' attribute has unexpected type {}", .{attr.type}); - return error.InvalidAttributeType; - } - } - } - - if (shape_attr) |sa| { - var temp_shape_spec = try allocator.alloc(isize, sa.len); - for (sa, 0..) |dim, i| { - temp_shape_spec[i] = dim; - } - new_shape_spec = temp_shape_spec; - shape_input_needs_free = true; // Mark that we allocated this - shape_spec_found = true; - Codegen_log.debug("\n new shape spec from attribute: []isize = {any}", .{new_shape_spec}); - } else { - // Default behavior: use the inverse of the input shape - Codegen_log.debug("No shape tensor or attribute found, using inverse of input shape as default.", .{}); - - // Allocate space for the inverse shape - var temp_shape_spec = try allocator.alloc(isize, input_shape_i64.len); - - // Create inverse shape by reversing the dimensions - for (input_shape_i64, 0..) |dim, i| { - const inverse_index = input_shape_i64.len - 1 - i; - temp_shape_spec[inverse_index] = dim; - } - - new_shape_spec = temp_shape_spec; - shape_input_needs_free = true; // Mark that we allocated this - shape_spec_found = true; - Codegen_log.debug("\n using inverse shape as default: []isize = {any}", .{new_shape_spec}); - } - } else { - // Ensure cleanup if we allocated the shape spec FROM THE INPUT PATH - // The defer covers the attribute path if allocation happens there. - // NOTE: This defer placement might be tricky. Consider simplifying allocation management. - defer if (shape_input_needs_free) allocator.free(new_shape_spec); - } - - // If after all checks, shape_spec is still not found, something went wrong. - if (!shape_spec_found) { - Codegen_log.debug("Critical Error: Shape spec was not found after checking inputs and attributes.", .{}); - return error.ShapeNotFound; - } - - // 3. Convert input shape to usize - const input_shape_usize = try utils.i64SliceToUsizeSlice(input_shape_i64); - defer allocator.free(input_shape_usize); - Codegen_log.info("\n input_shape_usize: []usize = {any}", .{input_shape_usize}); - - // 4. Call the new shape calculation function - const output_shape_usize = try tensorMath.get_reshape_output_shape(input_shape_usize, new_shape_spec, allow_zero); - defer allocator.free(output_shape_usize); // Free the result from get_reshape_output_shape - Codegen_log.info("\n calculated output_shape_usize: []usize = {any}", .{output_shape_usize}); - - // 5. Convert result back to i64 - Codegen_log.debug("\n >>> DEBUG: output_shape_usize before conversion: {any}\n", .{output_shape_usize}); - const output_shape_i64 = try utils.usizeSliceToI64Slice(output_shape_usize); - // NOTE: utils.usizeSliceToI64Slice allocates, so the caller (or ReadyNode deinit) should free it. - - // 6. Assign the final shape - readyNode.outputs.items[0].shape = output_shape_i64; - Codegen_log.info("\n final output_shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); -} - -inline fn compute_softmax_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_softmax_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - Codegen_log.info("\n input_shape: []i64 = {any}", .{readyNode.inputs.items[0].?.shape}); - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - shape = readyNode.inputs.items[0].?.shape; - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_gemm_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_gemm_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - Codegen_log.info("\n input_shape: []i64 = {any}", .{readyNode.inputs.items[0].?.shape}); - Codegen_log.debug("\n weight_shape: []i64 = {any}", .{readyNode.inputs.items[1].?.shape}); - Codegen_log.debug("\n bias_shape: []i64 = {any}", .{readyNode.inputs.items[2].?.shape}); - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - shape = readyNode.inputs.items[2].?.shape; - } - - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_oneHot_output_shape(readyNode: *ReadyNode) !void { - std.debug.print("\n====== compute_oneHot_output_shape node: {s} ======\n", .{readyNode.nodeProto.name orelse "(unnamed)"}); - - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - // Verifica che ci siano esattamente 3 input: indices, depth, values - if (readyNode.inputs.items.len != 3) { - std.debug.print("\n ERROR: OneHot expects exactly 3 inputs, got {d}\n", .{readyNode.inputs.items.len}); - return error.InvalidNumberOfInputs; - } - - const indices = readyNode.inputs.items[0].?; - const depth_tensor = readyNode.inputs.items[1].?; - const values = readyNode.inputs.items[2].?; - - std.debug.print("\n indices_shape: []i64 = {any}", .{indices.shape}); - std.debug.print("\n depth_shape: []i64 = {any}", .{depth_tensor.shape}); - std.debug.print("\n values_shape: []i64 = {any}", .{values.shape}); - - // Verifica che depth sia uno scalare (forma [] o [1]) - const depth_shape_i64 = depth_tensor.shape; - const effective_depth_shape_i64 = if (depth_shape_i64.len == 0) &[_]i64{1} else depth_shape_i64; - if (effective_depth_shape_i64.len > 1 or effective_depth_shape_i64[0] != 1) { - std.debug.print("\n ERROR: depth must be a scalar, got shape {any}\n", .{effective_depth_shape_i64}); - return error.InvalidDepthShape; - } - - // Verifica che values abbia forma [2] - const values_shape_i64 = values.shape; - const effective_values_shape_i64 = if (values_shape_i64.len == 0) &[_]i64{1} else values_shape_i64; - if (effective_values_shape_i64.len != 1 or effective_values_shape_i64[0] != 2) { - std.debug.print("\n ERROR: values must have shape [2], got shape {any}\n", .{effective_values_shape_i64}); - return error.InvalidValuesShape; - } - - // Estrai il valore di depth - var depth: i64 = undefined; - if (depth_tensor.tensorProto != null and depth_tensor.tensorProto.?.int64_data != null) { - depth = depth_tensor.tensorProto.?.int64_data.?[0]; - } else if (depth_tensor.tensorProto != null and depth_tensor.tensorProto.?.raw_data != null) { - const raw = depth_tensor.tensorProto.?.raw_data.?; - if (raw.len < @sizeOf(i64)) { - std.debug.print("\n ERROR: depth raw_data is too small to contain an i64\n", .{}); - return error.InvalidDepthData; - } - depth = std.mem.readInt(i64, raw[0..@sizeOf(i64)], .little); - } else { - std.debug.print("\n ERROR: depth tensorProto is missing valid data\n", .{}); - return error.DepthDataMissing; - } - - // Verifica che depth sia positivo - if (depth <= 0) { - std.debug.print("\n ERROR: depth must be positive, got {d}\n", .{depth}); - return error.InvalidDepthValue; - } - - // Estrai l'attributo axis (default: -1) - var axis: i64 = -1; - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "axis")) { - if (attr.type != AttributeType.INT) { - std.debug.print("\n ERROR: axis attribute must be INT, got type {any}\n", .{attr.type}); - return error.InvalidAttributeType; - } - axis = attr.i; - break; - } - } - - const indices_shape_i64 = indices.shape; - const indices_shape_usize = try utils.i64SliceToUsizeSlice(indices_shape_i64); - defer allocator.free(indices_shape_usize); - - const output_shape_usize = try tensorMath.get_oneHot_output_shape(indices_shape_usize, depth, axis); - defer allocator.free(output_shape_usize); - - shape = try utils.usizeSliceToI64Slice(output_shape_usize); - } - - readyNode.outputs.items[0].shape = shape; - std.debug.print("\n output_shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); -} - -inline fn compute_mul_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_mul_output_shape node: {s} ======\n", .{readyNode.nodeProto.name.?}); - - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const input_a = readyNode.inputs.items[0]; - const input_b = readyNode.inputs.items[1]; - - Codegen_log.info("\n input_a_shape: []i64 = {any}", .{input_a.?.shape}); - Codegen_log.info("\n input_b_shape: []i64 = {any}", .{input_b.?.shape}); - - const shape_a_i64 = input_a.?.shape; - const shape_b_i64 = input_b.?.shape; - - // Handle empty shapes by treating them as {1} for broadcasting calculation - const effective_shape_a_i64 = if (shape_a_i64.len == 0) &[_]i64{1} else shape_a_i64; - const effective_shape_b_i64 = if (shape_b_i64.len == 0) &[_]i64{1} else shape_b_i64; - - // Convert effective shapes to usize - const shape_a_usize = try utils.i64SliceToUsizeSlice(effective_shape_a_i64); - const shape_b_usize = try utils.i64SliceToUsizeSlice(effective_shape_b_i64); - - // Use TensorMath to compute the output shape using effective shapes - const output_shape_usize = try tensorMath.get_mul_output_shape(shape_a_usize, shape_b_usize); - - // Defer freeing the intermediate usize slices *after* they've been used - if (shape_a_i64.len != 0) { - defer allocator.free(shape_a_usize); - } - if (shape_b_i64.len != 0) { - defer allocator.free(shape_b_usize); - } - // Defer freeing the result from get_mul_output_shape *after* it has been used for conversion - defer allocator.free(output_shape_usize); - - // Convert the result back to i64 - shape = try utils.usizeSliceToI64Slice(output_shape_usize); - // Note: The memory for 'shape' is now owned by the caller/ReadyNode management - } - - readyNode.outputs.items[0].shape = shape; - Codegen_log.info("\n output_shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); -} - -inline fn compute_conv_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_conv_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const input_shape: []const i64 = readyNode.inputs.items[0].?.shape; - const kernel_shape: []const i64 = readyNode.inputs.items[1].?.shape; - - var stride: ?[]i64 = null; - var dilation: ?[]i64 = null; - var auto_pad: []const u8 = "NOTSET"; - var pads: ?[]i64 = null; - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "strides")) { - if (attr.type == AttributeType.INTS) stride = attr.ints; - } else if (std.mem.eql(u8, attr.name, "dilations")) { - if (attr.type == AttributeType.INTS) dilation = attr.ints; - } else if (std.mem.eql(u8, attr.name, "auto_pad")) { - if (attr.type == AttributeType.STRING) auto_pad = attr.s; - } - if (std.mem.eql(u8, attr.name, "pads")) { - if (attr.type == AttributeType.INTS) pads = attr.ints; - } - } - - if (stride == null) return error.StridesNotFound; - if (dilation == null) return error.DilationsNotFound; - - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - Codegen_log.debug("\n kernel_shape: []i64 = {any}", .{kernel_shape}); - Codegen_log.debug("\n stride: []i64 = {any}", .{stride.?}); - //Codegen_log.debug("\n pads: []i64 = {any}", .{pads.?}); - shape = try utils.usizeSliceToI64Slice( - @constCast( - &try tensorMath.get_convolution_output_shape( - try utils.i64SliceToUsizeSlice(input_shape), - try utils.i64SliceToUsizeSlice(kernel_shape), - try utils.i64SliceToUsizeSlice(stride.?), - if (pads != null) try utils.i64SliceToUsizeSlice(pads.?) else null, - try utils.i64SliceToUsizeSlice(dilation.?), - auto_pad, - ), - ), - ); - } - readyNode.outputs.items[0].shape = shape; - Codegen_log.info("\n output_shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); -} - -inline fn compute_maxPool_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_maxPool_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - const input_shape: []const i64 = readyNode.inputs.items[0].?.shape; - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - var kernel_shape: ?[]i64 = null; - var stride: ?[]i64 = null; - - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "kernel_shape")) { - if (attr.type == AttributeType.INTS) kernel_shape = attr.ints; - } else if (std.mem.eql(u8, attr.name, "strides")) { - if (attr.type == AttributeType.INTS) stride = attr.ints; - } - } - - if (kernel_shape == null) return error.KernelShapeNotFound; - if (stride == null) return error.StridesNotFound; - - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - Codegen_log.debug("\n kernel_shape: []i64 = {any}", .{kernel_shape.?}); - Codegen_log.debug("\n stride: []i64 = {any}", .{stride.?}); - - const kernel_2d = [2]usize{ @intCast(kernel_shape.?[0]), @intCast(kernel_shape.?[1]) }; - const stride_2d = [2]usize{ @intCast(stride.?[0]), @intCast(stride.?[1]) }; - - shape = try utils.usizeSliceToI64Slice( - @constCast( - &try tensorMath.get_pooling_output_shape( - try utils.i64SliceToUsizeSlice(input_shape), - kernel_2d, - stride_2d, - ), - ), - ); - } - readyNode.outputs.items[0].shape = shape; - Codegen_log.info("\n output_shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); -} - -inline fn compute_averagePool_output_shape(readyNode: *ReadyNode) !void { - // https://onnx.ai/onnx/operators/onnx__AveragePool.html - // Computes the output shape for an AveragePool node based on input shape and attributes. - const input_shape: []const i64 = readyNode.inputs.items[0].?.shape; - var output_shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - output_shape = tensorShape; - } else { - var kernel_shape: ?[]i64 = null; - var stride: ?[]i64 = null; - var dilation: ?[]i64 = null; - var auto_pad: []const u8 = "NOTSET"; - var pads: ?[]i64 = null; - var ceil_mode: bool = false; - var count_include_pad: bool = false; - - // Extract attributes from node - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "kernel_shape")) { - if (attr.type == AttributeType.INTS) kernel_shape = attr.ints; - } else if (std.mem.eql(u8, attr.name, "strides")) { - if (attr.type == AttributeType.INTS) stride = attr.ints; - } else if (std.mem.eql(u8, attr.name, "dilations")) { - if (attr.type == AttributeType.INTS) dilation = attr.ints; - } else if (std.mem.eql(u8, attr.name, "auto_pad")) { - if (attr.type == AttributeType.STRING) auto_pad = attr.s; - } else if (std.mem.eql(u8, attr.name, "pads")) { - if (attr.type == AttributeType.INTS) pads = attr.ints; - } else if (std.mem.eql(u8, attr.name, "ceil_mode")) { - if (attr.type == AttributeType.INT) ceil_mode = attr.i != 0; - } else if (std.mem.eql(u8, attr.name, "count_include_pad")) { - if (attr.type == AttributeType.INT) count_include_pad = attr.i != 0; - } - } - - // Check mandatory attributes - if (kernel_shape == null) return error.KernelShapeNotFound; - if (stride == null) return error.StridesNotFound; - - // Create proper allocated slices for default values - var default_stride: []i64 = undefined; - var default_dilation: []i64 = undefined; - var default_pads: []i64 = undefined; - var should_free_stride = false; - var should_free_dilation = false; - var should_free_pads = false; - - if (stride == null) { - default_stride = try allocator.alloc(i64, 2); - default_stride[0] = 1; - default_stride[1] = 1; - stride = default_stride; - should_free_stride = true; - } - - if (dilation == null) { - default_dilation = try allocator.alloc(i64, 2); - default_dilation[0] = 1; - default_dilation[1] = 1; - dilation = default_dilation; - should_free_dilation = true; - } - - if (pads == null) { - default_pads = try allocator.alloc(i64, 4); - @memset(default_pads, 0); - pads = default_pads; - should_free_pads = true; - } - - defer { - if (should_free_stride) allocator.free(default_stride); - if (should_free_dilation) allocator.free(default_dilation); - if (should_free_pads) allocator.free(default_pads); - } - - // Convert AutoPadType from string - var auto_pad_type: tensorMath.AutoPadType = .NOTSET; - if (std.mem.eql(u8, auto_pad, "VALID")) { - auto_pad_type = .VALID; - } else if (std.mem.eql(u8, auto_pad, "SAME_UPPER")) { - auto_pad_type = .SAME_UPPER; - } else if (std.mem.eql(u8, auto_pad, "SAME_LOWER")) { - auto_pad_type = .SAME_LOWER; - } - - // Convert parameters to usize - const usize_input_shape = try utils.i64SliceToUsizeSlice(input_shape); - defer allocator.free(usize_input_shape); - - const usize_kernel_shape = try utils.i64SliceToUsizeSlice(kernel_shape.?); - defer allocator.free(usize_kernel_shape); - - const usize_stride = try utils.i64SliceToUsizeSlice(stride.?); - defer allocator.free(usize_stride); - - const usize_dilation = try utils.i64SliceToUsizeSlice(dilation.?); - defer allocator.free(usize_dilation); - - const usize_pads = try utils.i64SliceToUsizeSlice(pads.?); - defer allocator.free(usize_pads); - - // Call the AveragePool shape function - output_shape = try utils.usizeSliceToI64Slice(@constCast(try tensorMath.get_onnx_averagepool_output_shape(usize_input_shape, usize_kernel_shape, usize_stride, usize_dilation, usize_pads, auto_pad_type, ceil_mode))); - } - // Assign the output shape to the node - readyNode.outputs.items[0].shape = output_shape; - // Codegen_log.debug("\n output_shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); -} - -inline fn compute_reducemean_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_reducemean_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - var shape: []const i64 = undefined; - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const input_shape = try utils.i64SliceToUsizeSlice(readyNode.inputs.items[0].?.shape); - defer allocator.free(input_shape); - - // Get attributes - var keepdims: bool = true; - var noop_with_empty_axes: bool = false; - - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "keepdims")) { - if (attr.type == AttributeType.INT) keepdims = attr.i != 0; - } else if (std.mem.eql(u8, attr.name, "noop_with_empty_axes")) { - if (attr.type == AttributeType.INT) noop_with_empty_axes = attr.i != 0; - } - } - - // Get axes from second input if it exists - var axes: ?[]const i64 = null; - if (readyNode.inputs.items.len > 1 and - readyNode.inputs.items[1].?.tensorProto != null and - readyNode.inputs.items[1].?.tensorProto.?.int64_data != null) - { - axes = readyNode.inputs.items[1].?.tensorProto.?.int64_data.?; - } - - Codegen_log.info("\n input_shape: []usize = {any}", .{input_shape}); - Codegen_log.debug("\n axes: ?[]i64 = {any}", .{axes}); - Codegen_log.debug("\n keepdims: {}", .{keepdims}); - Codegen_log.debug("\n noop_with_empty_axes: {}", .{noop_with_empty_axes}); - - const output_shape = try tensorMath.get_reduce_mean_output_shape(input_shape, axes, keepdims, noop_with_empty_axes); - defer allocator.free(output_shape); - - shape = try utils.usizeSliceToI64Slice(output_shape); - Codegen_log.info("\n output_shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); - } - readyNode.outputs.items[0].shape = shape; -} -inline fn compute_slice_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_slice_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const input_shape = readyNode.inputs.items[0].?.shape; - const starts = readyNode.inputs.items[1].?.tensorProto.?.int64_data.?; - const ends = readyNode.inputs.items[2].?.tensorProto.?.int64_data.?; - - var axes: ?[]i64 = null; - var steps: ?[]i64 = null; - - // Get axes if provided (input 3) - if (readyNode.inputs.items.len > 3) { - axes = readyNode.inputs.items[3].?.tensorProto.?.int64_data.?; - } - - // Get steps if provided (input 4) - if (readyNode.inputs.items.len > 4) { - steps = readyNode.inputs.items[4].?.tensorProto.?.int64_data.?; - } - - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - Codegen_log.debug("\n starts: []i64 = {any}", .{starts}); - Codegen_log.debug("\n ends: []i64 = {any}", .{ends}); - Codegen_log.debug("\n axes: []i64 = {any}", .{axes}); - Codegen_log.debug("\n steps: []i64 = {any}", .{steps}); - - shape = try utils.usizeSliceToI64Slice(try tensorMath.get_slice_output_shape( - try utils.i64SliceToUsizeSlice(input_shape), - starts, - ends, - axes, - steps, - )); - } - - readyNode.outputs.items[0].shape = shape; - Codegen_log.info("\n output_shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); -} - -inline fn compute_shape_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_shape_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const input_shape = readyNode.inputs.items[0].?.shape; - - // Get start and end attributes if they exist - var start: ?i64 = null; - var end: ?i64 = null; - - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "start")) { - if (attr.type == AttributeType.INT) start = attr.i; - } else if (std.mem.eql(u8, attr.name, "end")) { - if (attr.type == AttributeType.INT) end = attr.i; - } - } - // Calculate output size - shape = try utils.usizeSliceToI64Slice(try tensorMath.get_shape_output_shape(try utils.i64SliceToUsizeSlice(input_shape), start, end)); - } - - // Shape operator always outputs a 1D tensor } - readyNode.outputs.items[0].shape = shape; - Codegen_log.info("\n output_shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); -} - -inline fn compute_gather_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_gather_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const data_shape = readyNode.inputs.items[0].?.shape; - const indices_shape = readyNode.inputs.items[1].?.shape; - - // Get axis attribute, default is 0 - var axis: i64 = 0; - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "axis")) { - if (attr.type == AttributeType.INT) axis = attr.i; - } - } - - // Handle negative axis - if (axis < 0) { - axis += @as(i64, @intCast(data_shape.len)); - } - - Codegen_log.debug("\n data_shape: []i64 = {any}", .{data_shape}); - Codegen_log.debug("\n indices_shape: []i64 = {any}", .{indices_shape}); - Codegen_log.debug("\n axis: {}", .{axis}); - - // Calculate output shape: - shape = try utils.usizeSliceToI64Slice(try tensorMath.get_gather_output_shape( - try utils.i64SliceToUsizeSlice(data_shape), - try utils.i64SliceToUsizeSlice(indices_shape), - axis, - )); - } - - readyNode.outputs.items[0].shape = shape; - Codegen_log.info("\n output_shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); -} - -inline fn compute_sigmoid_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_sigmoid_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const input_shape = readyNode.inputs.items[0].?.shape; - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - - shape = try utils.usizeSliceToI64Slice(try tensorMath.get_sigmoid_output_shape(try utils.i64SliceToUsizeSlice(input_shape))); - } - readyNode.outputs.items[0].shape = shape; - Codegen_log.info("\n output_shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); -} - -inline fn compute_transpose_output_shape(readyNode: *ReadyNode) !void { - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - //get perm - var perm: ?[]i64 = null; - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "perm")) { - if (attr.type == AttributeType.INTS) { - perm = attr.ints; - } - } - } - const input_shape = try utils.i64SliceToUsizeSlice(readyNode.inputs.items[0].?.shape); - - if (perm) |p| { - shape = try utils.usizeSliceToI64Slice(try tensorMath.get_transpose_output_shape(input_shape, try utils.i64SliceToUsizeSlice(p))); - } else { - const perm_usize: ?[]const usize = null; - shape = try utils.usizeSliceToI64Slice(try tensorMath.get_transpose_output_shape(input_shape, perm_usize)); - } - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_unsqueeze_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_unsqueeze_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - var shape: []const i64 = undefined; - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const input_shape = readyNode.inputs.items[0].?.shape; - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - - // Get axes from attributes or from the second input tensor - var axes: ?[]const i64 = null; - - // First check if axes is provided as an input tensor (ONNX opset 13+) - if (readyNode.inputs.items.len > 1 and readyNode.inputs.items[1].?.tensorProto != null) { - axes = readyNode.inputs.items[1].?.tensorProto.?.int64_data.?; - Codegen_log.debug("\n axes from input tensor: []i64 = {any}", .{axes.?}); - } else { - // Otherwise, check for axes attribute (ONNX opset < 13) - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "axes")) { - if (attr.type == AttributeType.INTS) { - axes = attr.ints; - Codegen_log.debug("\n axes from attribute: []i64 = {any}", .{axes.?}); - break; - } - } - } - } - - if (axes == null) return error.UnsqueezeAxesNotFound; - - // Calculate output shape - shape = try utils.usizeSliceToI64Slice(try tensorMath.get_unsqueeze_output_shape( - try utils.i64SliceToUsizeSlice(input_shape), - axes.?, - )); - } - - readyNode.outputs.items[0].shape = shape; - Codegen_log.info("\n output_shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); -} - -pub fn compute_concat_output_shape(readyNode: *ReadyNode) !void { - // Codegen_log.debug("\n compute_concat_output_shape for node: {s}", .{readyNode.nodeProto.name.?}); - - // Get the axis attribute (required) - var axis: i64 = 0; - var axis_found = false; - - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "axis")) { - if (attr.type == AttributeType.INT) { - axis = attr.i; - axis_found = true; - } else { - return error.ConcatAxisNotINT; - } - } - } - - if (!axis_found) { - return error.ConcatAxisNotFound; - } - - // Codegen_log.debug("\n axis: {}", .{axis}); - // Codegen_log.debug("\n number of inputs: {}", .{readyNode.inputs.items.len}); - - // Ensure there's at least one input tensor - if (readyNode.inputs.items.len == 0) { - return error.ConcatNoInputs; - } - - // Print input shapes - for (readyNode.inputs.items, 0..) |input, i| { - if (input) |in| Codegen_log.debug("\n input[{}] shape: []i64 = {any}", .{ i, in.shape }) else Codegen_log.debug("\n input[{}] is null", .{i}); - } - - // Convert input shapes to usize for get_concatenate_output_shape - Codegen_log.debug("\n Converting input shapes to usize...", .{}); - var input_shapes = try allocator.alloc([]const usize, readyNode.inputs.items.len); - errdefer { - Codegen_log.warn("\n Error occurred, cleaning up input_shapes...", .{}); - for (input_shapes) |shape| { - allocator.free(shape); - } - allocator.free(input_shapes); - } - - for (readyNode.inputs.items, 0..) |input, i| { - Codegen_log.debug("\n Converting input[{}] shape to usize...", .{i}); - // Handle negative values by using 1 as a placeholder - var shape = try allocator.alloc(usize, input.?.shape.len); - for (input.?.shape, 0..) |dim, j| { - shape[j] = if (dim < 0) 1 else @intCast(dim); - } - input_shapes[i] = shape; - Codegen_log.debug("\n Converted shape: []usize = {any}", .{input_shapes[i]}); - } - - // Get output shape using the existing function - Codegen_log.debug("\n Calling get_concatenate_output_shape...", .{}); - const output_shape = try tensorMath.get_concatenate_output_shape(input_shapes, axis); - errdefer { - Codegen_log.warn("\n Error occurred, cleaning up output_shape...", .{}); - allocator.free(output_shape); - } - Codegen_log.debug("\n Got output shape: []usize = {any}", .{output_shape}); - - // Convert back to i64 for storing in readyNode - Codegen_log.debug("\n Converting output shape back to i64...", .{}); - readyNode.outputs.items[0].shape = try utils.usizeSliceToI64Slice(output_shape); - Codegen_log.debug("\n Final output shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); - - // Clean up - Codegen_log.debug("\n Cleaning up temporary allocations...", .{}); - for (input_shapes) |shape| { - allocator.free(shape); - } - allocator.free(input_shapes); - allocator.free(output_shape); - // Codegen_log.debug("\n Cleanup complete", .{}); -} - -inline fn compute_sqrt_output_shape(readyNode: *ReadyNode) !void { - const input = readyNode.inputs.items[0] orelse { - return error.InputTensorIsNull; - }; - - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const input_shape = input.shape; - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - - shape = try utils.usizeSliceToI64Slice(try tensorMath.get_sqrt_output_shape(try utils.i64SliceToUsizeSlice(input_shape))); - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_tanh_output_shape(readyNode: *ReadyNode) !void { - const input = readyNode.inputs.items[0] orelse { - return error.InputTensorIsNull; - }; - - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const input_shape = input.shape; - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - - shape = try utils.usizeSliceToI64Slice(try tensorMath.get_tanh_output_shape(try utils.i64SliceToUsizeSlice(input_shape))); - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_gelu_output_shape(readyNode: *ReadyNode) !void { - const input = readyNode.inputs.items[0] orelse { - return error.InputTensorIsNull; - }; - - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const input_shape = input.shape; - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - - shape = try utils.usizeSliceToI64Slice(try tensorMath.get_gelu_output_shape(try utils.i64SliceToUsizeSlice(input_shape))); - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_floor_output_shape(readyNode: *ReadyNode) !void { - const input = readyNode.inputs.items[0] orelse { - return error.InputTensorIsNull; - }; - - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const input_shape = input.shape; - std.debug.print("\n input_shape: []i64 = {any}", .{input_shape}); - - shape = try utils.usizeSliceToI64Slice(try tensorMath.get_floor_output_shape(try utils.i64SliceToUsizeSlice(input_shape))); - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_elu_output_shape(readyNode: *ReadyNode) !void { - const input = readyNode.inputs.items[0] orelse { - return error.InputTensorIsNull; - }; - - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const input_shape = input.shape; - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - - shape = try utils.usizeSliceToI64Slice(try tensorMath.get_elu_output_shape(try utils.i64SliceToUsizeSlice(input_shape))); - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_ceil_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_ceil_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - const input = readyNode.inputs.items[0] orelse { - return error.InputTensorIsNull; - }; - - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const input_shape = input.shape; - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - - const output_shape = try tensorMath.get_ceil_output_shape(try utils.i64SliceToUsizeSlice(input_shape)); - shape = try utils.usizeSliceToI64Slice(output_shape); - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_clip_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_ceil_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - const input = readyNode.inputs.items[0] orelse { - return error.InputTensorIsNull; - }; - - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const input_shape = input.shape; - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - - const output_shape = try tensorMath.get_ceil_output_shape(try utils.i64SliceToUsizeSlice(input_shape)); - shape = try utils.usizeSliceToI64Slice(output_shape); - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_identity_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_identity_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - return; - } else { - const input_shape = readyNode.inputs.items[0].?.shape; - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - const output_shape = try tensorMath.get_identity_output_shape(try utils.i64SliceToUsizeSlice(input_shape)); - // Identity operation preserves the input shape - shape = try utils.usizeSliceToI64Slice(output_shape); - Codegen_log.info("\n output_shape: []i64 = {any}", .{shape}); - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_leaky_relu_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_leaky_relu_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - var shape: []const i64 = undefined; - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - return; - } else { - const input_shape = readyNode.inputs.items[0].?.shape; - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - const output_shape = try tensorMath.get_leaky_relu_output_shape(try utils.i64SliceToUsizeSlice(input_shape)); - // LeakyReLU is an element-wise operation, output shape is identical to input shape - shape = try utils.usizeSliceToI64Slice(output_shape); - Codegen_log.info("\n output_shape: []i64 = {any}", .{shape}); - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_longsoftmax_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_longsoftmax_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - var shape: []const i64 = undefined; - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - return; - } else { - const input_shape = readyNode.inputs.items[0].?.shape; - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - const output_shape = try tensorMath.get_longsoftmax_output_shape(try utils.i64SliceToUsizeSlice(input_shape)); - // LongSoftmax is an element-wise operation, output shape is identical to input shape - shape = try utils.usizeSliceToI64Slice(output_shape); - Codegen_log.info("\n output_shape: []i64 = {any}", .{shape}); - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_matmul_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_matmul_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - var shape: []const i64 = undefined; - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - const input_shape_a = readyNode.inputs.items[0].?.shape; - const input_shape_b = readyNode.inputs.items[1].?.shape; - Codegen_log.info("\n input_shape_a: []i64 = {any}", .{input_shape_a}); - Codegen_log.info("\n input_shape_b: []i64 = {any}", .{input_shape_b}); - - const output_shape = try tensorMath.get_mat_mul_output_shape(try utils.i64SliceToUsizeSlice(input_shape_a), try utils.i64SliceToUsizeSlice(input_shape_b)); - // MatMul is an element-wise operation, output shape is identical to input shape - shape = try utils.usizeSliceToI64Slice(output_shape); - Codegen_log.info("\n output_shape: []i64 = {any}", .{shape}); - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_split_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_split_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - readyNode.outputs.items[0].shape = tensorShape; - } else { - const input_shape = readyNode.inputs.items[0].?.shape; - - // Get axis attribute (default is 0) - var axis: i64 = 0; - var split_sizes: ?[]i64 = null; - - // Extract attributes - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "axis")) { - if (attr.type == AttributeType.INT) axis = attr.i; - } else if (std.mem.eql(u8, attr.name, "split")) { - if (attr.type == AttributeType.INTS) split_sizes = attr.ints; - } - } - - // Check if split_sizes is provided as an input (ONNX opset 13+) - if (readyNode.inputs.items.len > 1 and - readyNode.inputs.items[1].?.tensorProto != null and - readyNode.inputs.items[1].?.tensorProto.?.int64_data != null) - { - split_sizes = readyNode.inputs.items[1].?.tensorProto.?.int64_data.?; - } - - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - Codegen_log.debug("\n axis: {}", .{axis}); - Codegen_log.debug("\n split_sizes: {any}", .{split_sizes}); - Codegen_log.debug("\n num_outputs: {}", .{readyNode.outputs.items.len}); - - // Convert i64 split_sizes to usize if provided - var usize_split_sizes: ?[]usize = null; - defer if (usize_split_sizes != null) allocator.free(usize_split_sizes.?); - - if (split_sizes) |sizes| { - usize_split_sizes = try allocator.alloc(usize, sizes.len); - for (sizes, 0..) |size, i| { - usize_split_sizes.?[i] = @intCast(size); - } - } - - // Convert input_shape to usize - const usize_input_shape = try utils.i64SliceToUsizeSlice(input_shape); - defer allocator.free(usize_input_shape); - - // Get output shapes using the utility function - const output_shapes = try tensorMath.get_split_output_shapes(usize_input_shape, axis, usize_split_sizes, readyNode.outputs.items.len // Pass the number of outputs - ); - defer { - for (output_shapes) |shape| { - allocator.free(shape); - } - allocator.free(output_shapes); - } - - // Ensure we have enough output tensors - if (readyNode.outputs.items.len != output_shapes.len) { - return error.MismatchedOutputCount; - } - - // Set the output shapes - for (output_shapes, 0..) |shape, i| { - readyNode.outputs.items[i].shape = try utils.usizeSliceToI64Slice(shape); - Codegen_log.info("\n output[{}] shape: []i64 = {any}", .{ i, readyNode.outputs.items[i].shape }); - } - } -} - -pub fn compute_resize_output_shape(readyNode: *ReadyNode) !void { - var shape: []const i64 = undefined; - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |Shape| { - shape = Shape; - } else { - const input_shape = readyNode.inputs.items[0].?.shape; - var scales: ?[]const f32 = null; - var sizes: ?[]const i64 = null; - - const usize_input_shape = try utils.i64SliceToUsizeSlice(input_shape); - defer allocator.free(usize_input_shape); - - if (readyNode.inputs.items.len > 2 and readyNode.inputs.items[2].?.tensorProto != null) { - if (readyNode.inputs.items[2].?.tensorProto.?.float_data != null) { - scales = readyNode.inputs.items[2].?.tensorProto.?.float_data.?; - } - } - - if (readyNode.inputs.items.len > 3 and readyNode.inputs.items[3].?.tensorProto != null) { - if (readyNode.inputs.items[3].?.tensorProto.?.int64_data != null) { - sizes = readyNode.inputs.items[3].?.tensorProto.?.int64_data.?; - } - } - - const usize_sizes = try utils.i64SliceToUsizeSlice(sizes.?); - defer allocator.free(usize_sizes); - - const output_shape = try tensorMath.get_resize_output_shape(usize_input_shape, scales, usize_sizes); - - shape = try utils.usizeSliceToI64Slice(output_shape); - } - readyNode.outputs.items[0].shape = shape; -} - -pub fn compute_resize_output_shape_generic(comptime T: type, input_shape: []const T, scales: ?[]const f32, sizes: ?[]const T) ![]T { - // Make sure we support all parameter types - if (scales != null) { - // Calculate output shape based on scales - var output_shape = try allocator.alloc(T, input_shape.len); - - for (0..input_shape.len) |i| { - output_shape[i] = @intFromFloat(@as(f32, @floatFromInt(input_shape[i])) * scales.?[i]); - } - - return output_shape; - } - - if (sizes != null) { - // Use sizes directly - return sizes.?; - } - - // If neither scales nor sizes is provided, return the input shape - return input_shape; -} - -inline fn compute_neg_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_neg_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - var shape: []const i64 = undefined; - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - Codegen_log.info("\n input_shape: []i64 = {any}", .{readyNode.inputs.items[0].?.shape}); - const input_shape = readyNode.inputs.items[0].?.shape; - - const usize_input_shape = try utils.i64SliceToUsizeSlice(input_shape); - defer allocator.free(usize_input_shape); - - const output_shape = try tensorMath.get_neg_output_shape(usize_input_shape); - defer allocator.free(output_shape); - - shape = try utils.usizeSliceToI64Slice(output_shape); - } - readyNode.outputs.items[0].shape = shape; -} - -inline fn compute_Div_output_shape(readyNode: *ReadyNode) !void { - // Codegen_log.info("\n====== compute_Div_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - // Codegen_log.info("\n input[0] shape: []i64 = {any}", .{readyNode.inputs.items[0].?.shape}); - // Codegen_log.info("\n input[1] shape: []i64 = {any}", .{readyNode.inputs.items[1].?.shape}); - - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - Codegen_log.debug("\n Using shape from tensor: []i64 = {any}", .{tensorShape}); - // Use the shape with more dimensions between tensor shape and input shapes - const max_dim = @max(tensorShape.len, readyNode.inputs.items[0].?.shape.len, readyNode.inputs.items[1].?.shape.len); - if (tensorShape.len < max_dim) { - Codegen_log.debug("\n Tensor shape has fewer dimensions, using shape with {} dimensions", .{max_dim}); - // For element-wise operations, use input[0] shape and add first dimension from input[1] - if (readyNode.inputs.items[1].?.shape.len > readyNode.inputs.items[0].?.shape.len) { - var new_shape = try allocator.alloc(i64, readyNode.inputs.items[1].?.shape.len); - new_shape[0] = readyNode.inputs.items[1].?.shape[0]; // Use first dimension from input[1] - for (readyNode.inputs.items[0].?.shape, 0..) |dim, i| { - new_shape[i + 1] = dim; // Copy remaining dimensions from input[0] - } - shape = new_shape; - } else { - shape = readyNode.inputs.items[0].?.shape; - } - } else { - shape = tensorShape; - } - } else { - Codegen_log.debug("\n Using shape with more dimensions", .{}); - // For element-wise operations, use input[0] shape and add first dimension from input[1] - if (readyNode.inputs.items[1].?.shape.len > readyNode.inputs.items[0].?.shape.len) { - var new_shape = try allocator.alloc(i64, readyNode.inputs.items[1].?.shape.len); - new_shape[0] = readyNode.inputs.items[1].?.shape[0]; // Use first dimension from input[1] - for (readyNode.inputs.items[0].?.shape, 0..) |dim, i| { - new_shape[i + 1] = dim; // Copy remaining dimensions from input[0] - } - shape = new_shape; - } else { - shape = readyNode.inputs.items[0].?.shape; - } - } - readyNode.outputs.items[0].shape = shape; - // Codegen_log.info("\n Final output shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); -} - -inline fn compute_pads_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_pads_output_shape node: {s}=====", .{readyNode.nodeProto.name.?}); - - // Input 0: data - if (readyNode.inputs.items[0] == null) return error.InputTensorNotFound; - const data_shape_i64 = readyNode.inputs.items[0].?.shape; - Codegen_log.debug("\n data_shape: {any}", .{data_shape_i64}); - - // Input 1: pads (required, must be int64) - if (readyNode.inputs.items.len < 2 or readyNode.inputs.items[1] == null or readyNode.inputs.items[1].?.tensorProto == null or readyNode.inputs.items[1].?.tensorProto.?.int64_data == null) { - Codegen_log.warn("\nERROR: Pads input (index 1) is missing or not a constant int64 tensor.", .{}); - return error.PadsInputInvalid; - } - const pads_values_i64 = readyNode.inputs.items[1].?.tensorProto.?.int64_data.?; - Codegen_log.debug("\n pads_values: {any}", .{pads_values_i64}); - - // Input 2: constant_value (optional, shape not needed for output shape calculation) - - // Input 3: axes (optional, must be int64 or int32) - var axes_values_isize: ?[]const isize = null; - var axes_buffer: []isize = undefined; // Buffer for conversion - defer if (axes_values_isize != null) allocator.free(axes_buffer); - - if (readyNode.inputs.items.len > 3 and readyNode.inputs.items[3] != null and readyNode.inputs.items[3].?.tensorProto != null) { - const axes_proto = readyNode.inputs.items[3].?.tensorProto.?; - if (axes_proto.int64_data != null) { - const axes_i64 = axes_proto.int64_data.?; - axes_buffer = try allocator.alloc(isize, axes_i64.len); - for (axes_i64, 0..) |val, i| { - axes_buffer[i] = @intCast(val); - } - axes_values_isize = axes_buffer; - Codegen_log.debug("\n axes (from i64): {any}", .{axes_values_isize}); - } else if (axes_proto.int32_data != null) { - const axes_i32 = axes_proto.int32_data.?; - axes_buffer = try allocator.alloc(isize, axes_i32.len); - for (axes_i32, 0..) |val, i| { - axes_buffer[i] = @intCast(val); - } - axes_values_isize = axes_buffer; - Codegen_log.debug("\n axes (from i32): {any}", .{axes_values_isize}); - } else { - Codegen_log.warn("\nWARNING: Axes input (index 3) provided but is not int64 or int32 data.", .{}); - // Proceed without axes if the type is wrong - } - } else { - Codegen_log.debug("\n axes: not provided", .{}); - } - - // Convert data shape to usize - const data_shape_usize = try utils.i64SliceToUsizeSlice(data_shape_i64); - defer allocator.free(data_shape_usize); - - // Call the shape calculation function - const output_shape_usize = try tensorMath.get_pads_output_shape(allocator, data_shape_usize, pads_values_i64, axes_values_isize); - defer allocator.free(output_shape_usize); - - // Convert result back to i64 for storing in readyNode - readyNode.outputs.items[0].shape = try utils.usizeSliceToI64Slice(output_shape_usize); - Codegen_log.info("\n final output_shape: {any}", .{readyNode.outputs.items[0].shape}); -} - -inline fn compute_mean_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_mean_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - var shape: []const i64 = undefined; - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - if (readyNode.inputs.items.len == 0) { - return error.EmptyInputList; - } - - var input_shapes = try allocator.alloc([]usize, readyNode.inputs.items.len); - defer allocator.free(input_shapes); - for (readyNode.inputs.items, 0..) |input, i| { - Codegen_log.info("\n input_{}_shape: []i64 = {any}", .{ i, input.?.shape }); - input_shapes[i] = try utils.i64SliceToUsizeSlice(input.?.shape); - } - - const output_shape_usize = try tensorMath.get_mean_output_shape(input_shapes); - shape = try utils.usizeSliceToI64Slice(@constCast(output_shape_usize)); - } - readyNode.outputs.items[0].shape = shape; - Codegen_log.info("\n output_shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); -} - -inline fn compute_flatten_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_flatten_output_shape node: {s}======", .{readyNode.nodeProto.name orelse "(unnamed)"}); - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - if (readyNode.inputs.items.len == 0) { - return error.EmptyInputList; - } - const input_shape_i64 = readyNode.inputs.items[0].?.shape; - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape_i64}); - - var axis: i64 = 1; // Default ONNX - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "axis")) { - if (attr.type != AttributeType.INT) { - Codegen_log.warn("\n ERROR: Flatten 'axis' attribute has unexpected type {}", .{attr.type}); - return error.InvalidAttributeType; - } - axis = attr.i; - break; - } - } - Codegen_log.debug("\n axis: {}", .{axis}); - - const input_shape_usize = try utils.i64SliceToUsizeSlice(input_shape_i64); - defer allocator.free(input_shape_usize); - Codegen_log.debug("\n input_shape_usize: []usize = {any}", .{input_shape_usize}); - - const output_shape_usize = try tensorMath.get_flatten_output_shape(input_shape_usize, @intCast(axis)); - //defer allocator.free(output_shape_usize); // Libera il risultato di get_flatten_output_shape - Codegen_log.debug("\n output_shape_usize: []usize = {any}", .{output_shape_usize}); - - shape = try utils.usizeSliceToI64Slice(@constCast(output_shape_usize)); - } - - readyNode.outputs.items[0].shape = shape; - Codegen_log.info("\n output_shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); -} - -inline fn compute_squeeze_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_squeeze_output_shape node: {s}======", .{readyNode.nodeProto.name orelse "(unnamed)"}); - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - if (readyNode.inputs.items.len == 0) { - return error.EmptyInputList; - } - - const input_shape_i64 = readyNode.inputs.items[0].?.shape; - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape_i64}); - - const input_shape_usize = try utils.i64SliceToUsizeSlice(input_shape_i64); - defer allocator.free(input_shape_usize); - Codegen_log.debug("\n input_shape_usize: []usize = {any}", .{input_shape_usize}); - - var axes_values_isize: ?[]const isize = null; - var axes_buffer: []isize = undefined; // Buffer for conversion - defer if (axes_values_isize != null) allocator.free(axes_buffer); - - if (readyNode.inputs.items.len > 2 and readyNode.inputs.items[2] != null and readyNode.inputs.items[2].?.tensorProto != null) { - const axes_proto = readyNode.inputs.items[2].?.tensorProto.?; - if (axes_proto.int64_data != null) { - const axes_i64 = axes_proto.int64_data.?; - axes_buffer = try allocator.alloc(isize, axes_i64.len); - for (axes_i64, 0..) |val, i| { - axes_buffer[i] = @intCast(val); - } - axes_values_isize = axes_buffer; - Codegen_log.debug("\n axes (from i64): {any}", .{axes_values_isize}); - } else if (axes_proto.int32_data != null) { - const axes_i32 = axes_proto.int32_data.?; - axes_buffer = try allocator.alloc(isize, axes_i32.len); - for (axes_i32, 0..) |val, i| { - axes_buffer[i] = @intCast(val); - } - axes_values_isize = axes_buffer; - Codegen_log.debug("\n axes (from i32): {any}", .{axes_values_isize}); - } else { - Codegen_log.warn("\nWARNING: Axes input (index 3) provided but is not int64 or int32 data.", .{}); - // Proceed without axes if the type is wrong - } - } else { - Codegen_log.debug("\n axes: not provided", .{}); - } - - const output_shape_usize = try tensorMath.get_squeeze_output_shape(input_shape_usize, axes_values_isize); - Codegen_log.debug("\n output_shape_usize: []usize = {any}", .{output_shape_usize}); - - shape = try utils.usizeSliceToI64Slice(@constCast(output_shape_usize)); - } - - readyNode.outputs.items[0].shape = shape; - Codegen_log.info("\n output_shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); -} - -inline fn compute_dynamicQuantizeLinear_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_dynamicQuantizeLinear_output_shape node: {s}======", .{readyNode.nodeProto.name.?}); - const input_shape = readyNode.inputs.items[0].?.shape; - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - - // Ensure the correct number of outputs - if (readyNode.outputs.items.len != 3) { - Codegen_log.debug("ERROR: DynamicQuantizeLinear expects 3 outputs, but got {}.", .{readyNode.outputs.items.len}); - return error.MismatchedOutputCount; - } - - // Convert input shape to usize - const usize_input_shape = try utils.i64SliceToUsizeSlice(input_shape); - defer allocator.free(usize_input_shape); - - // Get output shapes using the utility function - const output_shapes = try tensorMath.get_dynamicQuantizeLinear_output_shape(usize_input_shape); - defer { - for (output_shapes) |shape| { - allocator.free(shape); - } - allocator.free(output_shapes); - } - - // Assign shapes to output tensors - // Output 0: y (quantized data) - shape is same as input - readyNode.outputs.items[0].shape = try utils.usizeSliceToI64Slice(output_shapes[0]); - Codegen_log.debug("\n output[0] (y) shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); - - // Output 1: y_scale (scalar) - shape is {1} - readyNode.outputs.items[1].shape = try utils.usizeSliceToI64Slice(output_shapes[1]); - Codegen_log.debug("\n output[1] (y_scale) shape: []i64 = {any}", .{readyNode.outputs.items[1].shape}); - - // Output 2: y_zero_point (scalar) - shape is {1} - readyNode.outputs.items[2].shape = try utils.usizeSliceToI64Slice(output_shapes[2]); - Codegen_log.debug("\n output[2] (y_zero_point) shape: []i64 = {any}", .{readyNode.outputs.items[2].shape}); -} - -inline fn compute_convInteger_output_shape(readyNode: *ReadyNode) !void { - Codegen_log.info("\n====== compute_convInteger_output_shape node: {s}=====", .{readyNode.nodeProto.name.?}); - var shape: []const i64 = undefined; - - if (utils.getTensorShape(readyNode.outputs.items[0].name)) |tensorShape| { - shape = tensorShape; - } else { - // ConvInteger shape calculation is the same as Conv - const input_shape: []const i64 = readyNode.inputs.items[0].?.shape; - const kernel_shape: []const i64 = readyNode.inputs.items[1].?.shape; - - // Extract attributes similar to compute_conv_output_shape - var stride: ?[]i64 = null; - var dilation: ?[]i64 = null; - var auto_pad: []const u8 = "NOTSET"; - var pads: ?[]i64 = null; - for (readyNode.nodeProto.attribute) |attr| { - if (std.mem.eql(u8, attr.name, "strides")) { - if (attr.type == AttributeType.INTS) stride = attr.ints; - } else if (std.mem.eql(u8, attr.name, "dilations")) { - if (attr.type == AttributeType.INTS) dilation = attr.ints; - } else if (std.mem.eql(u8, attr.name, "auto_pad")) { - if (attr.type == AttributeType.STRING) auto_pad = attr.s; - } else if (std.mem.eql(u8, attr.name, "pads")) { - if (attr.type == AttributeType.INTS) pads = attr.ints; - } - } - - // Defaults if not found (as per ONNX spec) - const default_stride = [_]i64{ 1, 1 }; // Assuming 2D for now - const default_dilation = [_]i64{ 1, 1 }; - - const stride_ref = stride orelse &default_stride; - const dilation_ref = dilation orelse &default_dilation; - - Codegen_log.info("\n input_shape: []i64 = {any}", .{input_shape}); - Codegen_log.debug("\n kernel_shape: []i64 = {any}", .{kernel_shape}); - Codegen_log.debug("\n stride: []i64 = {any}", .{stride_ref}); - Codegen_log.debug("\n dilation: []i64 = {any}", .{dilation_ref}); - Codegen_log.debug("\n pads: ?[]i64 = {any}", .{pads}); - Codegen_log.debug("\n auto_pad: {s}", .{auto_pad}); - - // Convert shapes and attributes to usize slices for the math function - const input_shape_usize = try utils.i64SliceToUsizeSlice(input_shape); - defer allocator.free(input_shape_usize); - const kernel_shape_usize = try utils.i64SliceToUsizeSlice(kernel_shape); - defer allocator.free(kernel_shape_usize); - const stride_usize = try utils.i64SliceToUsizeSlice(stride_ref); - defer allocator.free(stride_usize); - const dilation_usize = try utils.i64SliceToUsizeSlice(dilation_ref); - defer allocator.free(dilation_usize); - - var pads_usize: ?[]usize = null; - var pads_alloc: []usize = undefined; // Keep track of allocation - if (pads) |p| { - pads_alloc = try utils.i64SliceToUsizeSlice(p); - pads_usize = pads_alloc; - } - defer if (pads_usize != null) allocator.free(pads_alloc); - - // Call the existing convolution shape calculation function - const output_shape_usize_array = try tensorMath.get_convolution_output_shape( - input_shape_usize, - kernel_shape_usize, - stride_usize, - pads_usize, - dilation_usize, - auto_pad, - ); - - // Convert the [4]usize array back to []const i64 slice - // Pass a slice directly from the const array. usizeSliceToI64Slice takes []const usize. - shape = try utils.usizeSliceToI64Slice(@constCast(&output_shape_usize_array)); - } - readyNode.outputs.items[0].shape = shape; - Codegen_log.info("\n output_shape: []i64 = {any}", .{readyNode.outputs.items[0].shape}); -} diff --git a/src/CodeGen/skeleton.zig b/src/CodeGen/skeleton.zig deleted file mode 100644 index 97c418d3..00000000 --- a/src/CodeGen/skeleton.zig +++ /dev/null @@ -1,138 +0,0 @@ -const std = @import("std"); -const zant = @import("zant"); - -const Tensor = zant.core.tensor.Tensor; -const tensorMath = zant.core.tensor.math_standard; -const onnx = zant.onnx; -const ModelOnnx = onnx.ModelProto; -const DataType = onnx.DataType; -const TensorProto = onnx.TensorProto; -const allocator = zant.utils.allocator.allocator; -const codegen = @import("codegen.zig"); -// Access global codegen state and utilities -const globals = codegen.globals; -const utils = codegen.utils; -const codeGenInitializers = codegen.parameters; -const coddeGenPredict = codegen.predict; -const codegen_options = @import("codegen_options"); - -/// Writes a Zig source file containing the generated code for an ONNX model. -/// -/// This function generates the necessary Zig code to initialize tensors and -/// define the prediction logic based on the given ONNX model. -/// -/// # Parameters -/// - `file`: The file where the generated Zig code will be written. -/// - `model`: The ONNX model from which to generate the Zig code. -/// -/// # Errors -/// This function may return an error if writing to the file fails. -pub fn writeZigFile(model_name: []const u8, model_path: []const u8, model: ModelOnnx, do_export: bool) !void { - - //initializing writer for lib_operation file - const lib_file_path = try std.fmt.allocPrint(allocator, "{s}lib_{s}.zig", .{ model_path, model_name }); - defer allocator.free(lib_file_path); - var lib_file = try std.fs.cwd().createFile(lib_file_path, .{}); - std.log.info("\n .......... file created, path:{s}", .{lib_file_path}); - defer lib_file.close(); - - const lib_writer = lib_file.writer(); - - //initializing writer for static_parameters file - const params_file_path = try std.fmt.allocPrint(allocator, "{s}static_parameters.zig", .{model_path}); - defer allocator.free(params_file_path); - var param_file = try std.fs.cwd().createFile(params_file_path, .{}); - std.log.info("\n .......... file created, path:{s}", .{params_file_path}); - defer param_file.close(); - - const param_writer = param_file.writer(); - - // Write the necessary library imports to the generated Zig file - try write_libraries(lib_writer); - - if (codegen_options.log) { - //log function setting - try write_logFunction(lib_writer); - } - - //Fixed Buffer Allocator - try write_FBA(lib_writer); - - try write_type_T(lib_writer); - - // Generate tensor initialization code in the static_parameters.zig file - try codeGenInitializers.write_parameters(param_writer, model); - - // Generate prediction function code - try coddeGenPredict.writePredict(lib_writer, do_export); -} - -/// Writes the required library imports to the generated Zig file for predict function. -/// -/// This function ensures that the necessary standard and package libraries are -/// imported into the generated Zig source file. -/// -/// # Parameters -/// - `writer`: A file writer used to write the import statements. -/// -/// # Errors -/// This function may return an error if writing to the file fails. -fn write_libraries(writer: std.fs.File.Writer) !void { - _ = try writer.print( - \\ - \\ const std = @import("std"); - \\ const zant = @import("zant"); - \\ const Tensor = zant.core.tensor.Tensor; - \\ const tensMath = zant.core.tensor.math_standard; - \\ const pkgAllocator = zant.utils.allocator; - \\ const allocator = pkgAllocator.allocator; - \\ const codegen = @import("codegen"); - \\ const utils = codegen.utils; - \\ const param_lib = @import("static_parameters.zig"); - \\ - , .{}); -} - -fn write_logFunction(writer: std.fs.File.Writer) !void { - _ = try writer.print( - \\ - \\var log_function: ?*const fn ([*c]u8) callconv(.C) void = null; - \\ - \\pub export fn setLogFunction(func: ?*const fn ([*c]u8) callconv(.C) void) void {{ - \\ log_function = func; - \\}} - \\ - , .{}); -} - -fn write_FBA(writer: std.fs.File.Writer) !void { - // Select allocator strategy based on flag - if (codegen_options.dynamic) { - // Use heap-based dynamic allocation - try writer.writeAll( - \\ - \\ - \\ // Dynamic allocation: RawCAllocator - ); - } else { - // Use fixed buffer allocator for static allocations - try writer.writeAll( - \\ - \\ - \\ // Static allocation: FixedBufferAllocator - \\ var buf: [4096 * 10]u8 = undefined; - \\ var fba_state = std.heap.FixedBufferAllocator.init(&buf); - \\ const fba = fba_state.allocator(); - \\ - ); - } -} - -fn write_type_T(writer: std.fs.File.Writer) !void { - // Emit the tensor element type derived from the ONNX model input - const type_str = try utils.getTypeString(globals.networkInputDataType); - _ = try writer.print( - \\ - \\ const T = {s}; - , .{type_str}); -} diff --git a/src/CodeGen/utils.zig b/src/CodeGen/utils_old.zig similarity index 100% rename from src/CodeGen/utils.zig rename to src/CodeGen/utils_old.zig diff --git a/src/IR_codegen/IR_codegen.zig b/src/IR_zant/IR_codegen/IR_codegen.zig similarity index 94% rename from src/IR_codegen/IR_codegen.zig rename to src/IR_zant/IR_codegen/IR_codegen.zig index 56e854db..9030f78d 100644 --- a/src/IR_codegen/IR_codegen.zig +++ b/src/IR_zant/IR_codegen/IR_codegen.zig @@ -1,6 +1,6 @@ const std = @import("std"); const zant = @import("zant"); -const IR = @import("IR_zant"); +const IR = @import("../IR_zant.zig").IR_graph; // --- zant IR const GraphZant = IR.GraphZant; @@ -17,6 +17,8 @@ const allocator = zant.utils.allocator.allocator; const ParametersWriter = @import("parameter_writer.zig"); const PredictWriter = @import("predict_writer.zig"); +pub const codegen_options = @import("codegen_options"); + // -- testing pub const testWriter = @import("tests_writer.zig"); diff --git a/src/IR_codegen/main.zig b/src/IR_zant/IR_codegen/main.zig similarity index 84% rename from src/IR_codegen/main.zig rename to src/IR_zant/IR_codegen/main.zig index ef80df4f..30c97ad9 100644 --- a/src/IR_codegen/main.zig +++ b/src/IR_zant/IR_codegen/main.zig @@ -1,12 +1,13 @@ const std = @import("std"); const zant = @import("zant"); -const IR = @import("../IR_graph/IR_graph.zig"); -const codegen = @import("IR_codegen.zig"); - const onnx = zant.onnx; -const codegen_options = @import("codegen_options"); -const codeGen_tests = @import("tests_writer.zig"); +const IR_zant = @import("IR_zant"); +const IR_graph = IR_zant.IR_graph; +const IR_codegen = IR_zant.IR_codegen; +const codegen_options = IR_codegen.codegen_options; + +const codeGen_tests = IR_codegen.testWriter; // called by "zig build IR_codegen" optionals:" -Dlog -Dmodel="name" -D ..." see build.zig" pub fn main() !void { @@ -35,7 +36,7 @@ pub fn main() !void { //model.print(); - try codegen.codegnenerateFromOnnx(model_name, codegen_options.IR_generated_path, model); + try IR_codegen.codegnenerateFromOnnx(model_name, codegen_options.IR_generated_path, model); // Test the generated code try codeGen_tests.writeTestFile(model_name, codegen_options.IR_generated_path); diff --git a/src/IR_codegen/parameter_writer.zig b/src/IR_zant/IR_codegen/parameter_writer.zig similarity index 100% rename from src/IR_codegen/parameter_writer.zig rename to src/IR_zant/IR_codegen/parameter_writer.zig diff --git a/src/IR_codegen/parameters/parameters.zig b/src/IR_zant/IR_codegen/parameters/parameters.zig similarity index 94% rename from src/IR_codegen/parameters/parameters.zig rename to src/IR_zant/IR_codegen/parameters/parameters.zig index 7cf27a68..9d0a622b 100644 --- a/src/IR_codegen/parameters/parameters.zig +++ b/src/IR_zant/IR_codegen/parameters/parameters.zig @@ -1,15 +1,16 @@ const std = @import("std"); const zant = @import("zant"); -const IR = @import("IR_zant"); +const IR_zant = @import("../../IR_zant.zig"); -const IR_utils = IR.utils; +const IR_graph = IR_zant.IR_graph; +const IR_codegen = IR_zant.IR_codegen; -const IR_codegen = @import("../IR_codegen.zig"); // --- zant IR -const GraphZant = IR.GraphZant; -const TensorZant = IR.TensorZant; +const IR_utils = IR_graph.utils; +const GraphZant = IR_graph.GraphZant; +const TensorZant = IR_graph.TensorZant; -const tensorZantMap: *std.StringHashMap(TensorZant) = &IR.tensorZant_lib.tensorMap; +const tensorZantMap: *std.StringHashMap(TensorZant) = &IR_graph.tensorZant_lib.tensorMap; /// Writes the Zig code required to initialize all tensor initializers in the ONNX model. /// This function generates declarations and definitions for each tensor. diff --git a/src/IR_codegen/predict/predict.zig b/src/IR_zant/IR_codegen/predict/predict.zig similarity index 96% rename from src/IR_codegen/predict/predict.zig rename to src/IR_zant/IR_codegen/predict/predict.zig index 3c312f8d..cbfef50f 100644 --- a/src/IR_codegen/predict/predict.zig +++ b/src/IR_zant/IR_codegen/predict/predict.zig @@ -1,16 +1,17 @@ const std = @import("std"); const zant = @import("zant"); -const IR = @import("IR_zant"); -const IR_utils = IR.utils; +const IR_zant = @import("../../IR_zant.zig"); +const IR_graph = IR_zant.IR_graph; +const IR_codegen = IR_zant.IR_codegen; -const IR_codegen = @import("../IR_codegen.zig"); // --- zant IR -const GraphZant = IR.GraphZant; -const TensorZant = IR.TensorZant; -const NodeZant = IR.NodeZant; +const IR_utils = IR_graph.utils; +const GraphZant = IR_graph.GraphZant; +const TensorZant = IR_graph.TensorZant; +const NodeZant = IR_graph.NodeZant; -const tensorZantMap: *std.StringHashMap(TensorZant) = &IR.tensorZant_lib.tensorMap; +const tensorZantMap: *std.StringHashMap(TensorZant) = &IR_graph.tensorZant_lib.tensorMap; const allocator = std.heap.page_allocator; @@ -315,7 +316,7 @@ fn write_outputsInitialization(writer: std.fs.File.Writer) !void { // -------------------------------- WRITE PREDICT() -------------------------------- fn write_predictInitialization(writer: std.fs.File.Writer) !void { - const inputs: []TensorZant = try IR.utils.getInputs(tensorZantMap); + const inputs: []TensorZant = try IR_utils.getInputs(tensorZantMap); //checks if (inputs.len > 1) return error.MoreThanOneInput; @@ -405,7 +406,7 @@ fn write_TensorShape(writer: std.fs.File.Writer, tz: *TensorZant) !i64 { fn write_checks(writer: std.fs.File.Writer) !void { // Autogen a check for the input shape as arg VS input shape as codegen option - const inputs: []TensorZant = try IR.utils.getInputs(tensorZantMap); + const inputs: []TensorZant = try IR_utils.getInputs(tensorZantMap); //checks if (inputs.len > 1) return error.MoreThanOneInput; diff --git a/src/IR_codegen/predict_writer.zig b/src/IR_zant/IR_codegen/predict_writer.zig similarity index 90% rename from src/IR_codegen/predict_writer.zig rename to src/IR_zant/IR_codegen/predict_writer.zig index 12d86356..7a61851c 100644 --- a/src/IR_codegen/predict_writer.zig +++ b/src/IR_zant/IR_codegen/predict_writer.zig @@ -1,13 +1,17 @@ const std = @import("std"); const zant = @import("zant"); -const IR = @import("IR_zant"); +const IR_zant = @import("../IR_zant.zig"); + +const IR_graph = IR_zant.IR_graph; +const IR_codegen = IR_zant.IR_codegen; // --- zant IR -const GraphZant = IR.GraphZant; -const TensorZant = IR.TensorZant; -const NodeZant = IR.NodeZant; +const GraphZant = IR_graph.GraphZant; +const TensorZant = IR_graph.TensorZant; +const NodeZant = IR_graph.NodeZant; + // --- utils -pub const utils = @import("utils.zig"); +pub const utils = IR_codegen.utils; // --- onnx const onnx = zant.onnx; const ModelOnnx = onnx.ModelProto; @@ -64,7 +68,7 @@ fn write_libraries(writer: std.fs.File.Writer) !void { \\ const tensMath = zant.core.tensor.math_standard; \\ const pkgAllocator = zant.utils.allocator; \\ const allocator = pkgAllocator.allocator; - \\ const codegen = @import("codegen"); + \\ const codegen = @import("IR_zant").IR_codegen; \\ const utils = codegen.utils; \\ const param_lib = @import("static_parameters.zig"); \\ diff --git a/src/IR_codegen/tests_writer.zig b/src/IR_zant/IR_codegen/tests_writer.zig similarity index 92% rename from src/IR_codegen/tests_writer.zig rename to src/IR_zant/IR_codegen/tests_writer.zig index 5277fac4..d520e8d1 100644 --- a/src/IR_codegen/tests_writer.zig +++ b/src/IR_zant/IR_codegen/tests_writer.zig @@ -5,15 +5,18 @@ const ModelOnnx = onnx.ModelProto; const codegen_options = @import("codegen_options"); const allocator = zant.utils.allocator.allocator; -const IR = @import("IR_zant"); +const IR_zant = @import("../IR_zant.zig"); -const IR_utils = IR.utils; +const IR_graph = IR_zant.IR_graph; +const IR_codegen = IR_zant.IR_codegen; + +const IR_utils = IR_graph.utils; // --- zant IR -const GraphZant = IR.GraphZant; -const TensorZant = IR.TensorZant; +const GraphZant = IR_graph.GraphZant; +const TensorZant = IR_graph.TensorZant; -const tensorZantMap: *std.StringHashMap(TensorZant) = &IR.tensorZant_lib.tensorMap; +const tensorZantMap: *std.StringHashMap(TensorZant) = &IR_graph.tensorZant_lib.tensorMap; pub fn UserTest(comptime T: type) type { return struct { diff --git a/src/IR_codegen/utils.zig b/src/IR_zant/IR_codegen/utils.zig similarity index 98% rename from src/IR_codegen/utils.zig rename to src/IR_zant/IR_codegen/utils.zig index 018012dc..9c3b5f85 100644 --- a/src/IR_codegen/utils.zig +++ b/src/IR_zant/IR_codegen/utils.zig @@ -6,7 +6,11 @@ const GraphProto = onnx.GraphProto; const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; const allocator = zant.utils.allocator.allocator; -const testWriter = @import("tests_writer.zig"); + +const IR_zant = @import("../IR_zant.zig"); +const IR_graph = IR_zant.IR_graph; +const IR_codegen = IR_zant.IR_codegen; +const testWriter = IR_codegen.testWriter; // -------------------- GETTERS -------------------- diff --git a/src/IR_graph/Context.md b/src/IR_zant/IR_graph/Context.md similarity index 100% rename from src/IR_graph/Context.md rename to src/IR_zant/IR_graph/Context.md diff --git a/src/IR_graph/HOW_TO_ADD_MATHEMATICAL_OPERATIONS.md b/src/IR_zant/IR_graph/HOW_TO_ADD_MATHEMATICAL_OPERATIONS.md similarity index 100% rename from src/IR_graph/HOW_TO_ADD_MATHEMATICAL_OPERATIONS.md rename to src/IR_zant/IR_graph/HOW_TO_ADD_MATHEMATICAL_OPERATIONS.md diff --git a/src/IR_graph/IR_graph.zig b/src/IR_zant/IR_graph/IR_graph.zig similarity index 100% rename from src/IR_graph/IR_graph.zig rename to src/IR_zant/IR_graph/IR_graph.zig diff --git a/src/IR_graph/TODO b/src/IR_zant/IR_graph/TODO similarity index 75% rename from src/IR_graph/TODO rename to src/IR_zant/IR_graph/TODO index 6baf4740..c7c8ea01 100644 --- a/src/IR_graph/TODO +++ b/src/IR_zant/IR_graph/TODO @@ -9,12 +9,9 @@ IR_zant IR_CODEGEN - dynamicAllocation -- when running : `zig build IR_codegen -Dlog` and `zig build test-generated-lib` you obtain - [test_model] (info): - User tests are disabled for this model - It is related to the import of "codegen" inside *** GENERATED LIBRARY TESTS *** in build.zig +- It is related to the import of "codegen" inside *** GENERATED LIBRARY TESTS *** in build.zig -- check that the passed that the parameters passed to IR_codegen.generate_from_*() are correct, string format, no wite spaces, +- EASY: check that the passed that the parameters passed to IR_codegen.generate_from_*() are correct, string format, no wite spaces, generated_path ending with "/", ecc... - aggregate build steps where possible, too many . Write documentation about it, describin what does each build command - error on Split op when launching "zig build test-codegen-gen -Dlog", now Split is excluded from "available_operations" @@ -24,11 +21,10 @@ IR_CODEGEN thread 26012 panic: reached unreachable code /home/mirko/Documents/zig/zig-linux-x86_64-0.14.0/lib/std/posix.zig:1267:23: 0x109187e in write (oneop_codegen) .FAULT => unreachable, - -- codegenerating "T" as the input type of the lean math op is not possible anymore + - use TensorZant.getNameSanitized() and not utils.getSanitizedName() - remove "const utils = @import("codegen").utils;" from all op_*.zig and fix, -- do not codegenerate "const codegen = @import("codegen");" and use instead "const codegen = @import("codegen");" and fix by conseguence +- do not codegenerate "const codegen = @import("codegen");" and use instead "const IR_codegen = @import("IR_codegen");" and fix by conseguence OTHER : - check missing op between: python generator, available_operations.txt, IR_graph operators diff --git a/src/IR_graph/graphZant.zig b/src/IR_zant/IR_graph/graphZant.zig similarity index 100% rename from src/IR_graph/graphZant.zig rename to src/IR_zant/IR_graph/graphZant.zig diff --git a/src/IR_graph/nodeZant.zig b/src/IR_zant/IR_graph/nodeZant.zig similarity index 100% rename from src/IR_graph/nodeZant.zig rename to src/IR_zant/IR_graph/nodeZant.zig diff --git a/src/IR_graph/op_template.md b/src/IR_zant/IR_graph/op_template.md similarity index 100% rename from src/IR_graph/op_template.md rename to src/IR_zant/IR_graph/op_template.md diff --git a/src/IR_graph/op_union/op_union.zig b/src/IR_zant/IR_graph/op_union/op_union.zig similarity index 100% rename from src/IR_graph/op_union/op_union.zig rename to src/IR_zant/IR_graph/op_union/op_union.zig diff --git a/src/IR_graph/op_union/operators/op_add.zig b/src/IR_zant/IR_graph/op_union/operators/op_add.zig similarity index 98% rename from src/IR_graph/op_union/operators/op_add.zig rename to src/IR_zant/IR_graph/op_union/operators/op_add.zig index 8e78435d..e965aa44 100644 --- a/src/IR_graph/op_union/operators/op_add.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_add.zig @@ -14,7 +14,6 @@ const tensorZant = @import("../../tensorZant.zig"); const TensorZant = tensorZant.TensorZant; const TensorCategory = tensorZant.TensorCategory; const IR_utils = @import("../../utils.zig"); //this is IR utils -const utils = @import("codegen").utils; // https://onnx.ai/onnx/operators/onnx__Add.html // INPUTS: diff --git a/src/IR_graph/op_union/operators/op_averagePool.zig b/src/IR_zant/IR_graph/op_union/operators/op_averagePool.zig similarity index 92% rename from src/IR_graph/op_union/operators/op_averagePool.zig rename to src/IR_zant/IR_graph/op_union/operators/op_averagePool.zig index cbd7d824..6ae0bcef 100644 --- a/src/IR_graph/op_union/operators/op_averagePool.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_averagePool.zig @@ -1,6 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -10,13 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__AveragePool.html // INPUTS: @@ -45,8 +46,8 @@ pub const AveragePool = struct { strides: ?[]i64, // default = null; pub fn init(nodeProto: *NodeProto) !AveragePool { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; var auto_pad: []const u8 = "NOTSET"; var ceil_mode: i64 = 0; @@ -75,7 +76,7 @@ pub const AveragePool = struct { } //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return AveragePool{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_batchNormalization.zig b/src/IR_zant/IR_graph/op_union/operators/op_batchNormalization.zig similarity index 88% rename from src/IR_graph/op_union/operators/op_batchNormalization.zig rename to src/IR_zant/IR_graph/op_union/operators/op_batchNormalization.zig index 2cf0327f..ac93ed98 100644 --- a/src/IR_graph/op_union/operators/op_batchNormalization.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_batchNormalization.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__BatchNormalization.html // INPUTS: @@ -44,12 +46,12 @@ pub const BatchNormalization = struct { training_mode: bool, //default = flase; pub fn init(nodeProto: *NodeProto) !BatchNormalization { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const scale = if (tensorZant.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.scale_notFound; - const B = if (tensorZant.tensorMap.getPtr(nodeProto.input[2])) |ptr| ptr else return error.B_notFound; - const input_mean = if (tensorZant.tensorMap.getPtr(nodeProto.input[3])) |ptr| ptr else return error.input_mean_notFound; - const input_var = if (tensorZant.tensorMap.getPtr(nodeProto.input[4])) |ptr| ptr else return error.input_var_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const scale = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.scale_notFound; + const B = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[2])) |ptr| ptr else return error.B_notFound; + const input_mean = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[3])) |ptr| ptr else return error.input_mean_notFound; + const input_var = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[4])) |ptr| ptr else return error.input_var_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; var epsilon: f32 = 1e-05; var momentum: f32 = 0.9; @@ -66,7 +68,7 @@ pub const BatchNormalization = struct { } //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return BatchNormalization{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_ceil.zig b/src/IR_zant/IR_graph/op_union/operators/op_ceil.zig similarity index 79% rename from src/IR_graph/op_union/operators/op_ceil.zig rename to src/IR_zant/IR_graph/op_union/operators/op_ceil.zig index 4b512d6a..d31c74ff 100644 --- a/src/IR_graph/op_union/operators/op_ceil.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_ceil.zig @@ -1,6 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -10,13 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Ceil.html // INPUTS: @@ -28,11 +29,11 @@ pub const Ceil = struct { output_Y: *TensorZant, pub fn init(nodeProto: *NodeProto) !Ceil { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return Ceil{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_concat.zig b/src/IR_zant/IR_graph/op_union/operators/op_concat.zig similarity index 91% rename from src/IR_graph/op_union/operators/op_concat.zig rename to src/IR_zant/IR_graph/op_union/operators/op_concat.zig index 1e5a7064..c2cfb877 100644 --- a/src/IR_graph/op_union/operators/op_concat.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_concat.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,12 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; -const utils = @import("codegen").utils; +const tensorMath = zant.core.tensor.math_standard; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Concat.html // INPUTS: // - inputs (variadic, heterogeneous) - T: List of tensors for concatenation @@ -32,10 +33,10 @@ pub const Concat = struct { pub fn init(nodeProto: *NodeProto) !Concat { var inputs = std.ArrayList(*TensorZant).init(allocator); - const concat_result = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.concat_result_notFound; + const concat_result = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.concat_result_notFound; for (nodeProto.input) |input| { - const ptr = if (tensorZant.tensorMap.getPtr(input)) |ptr| ptr else return error.concat_result_notFound; + const ptr = if (tensorZant_lib.tensorMap.getPtr(input)) |ptr| ptr else return error.concat_result_notFound; try inputs.append(ptr); } var axis: i64 = 1.0; @@ -50,7 +51,7 @@ pub const Concat = struct { } //set the output type: - if (concat_result.ty == tensorZant.TensorType.undefined) concat_result.ty = inputs.items[0].ty; + if (concat_result.ty == tensorZant_lib.TensorType.undefined) concat_result.ty = inputs.items[0].ty; return Concat{ .inputs = inputs, diff --git a/src/IR_graph/op_union/operators/op_constant.zig b/src/IR_zant/IR_graph/op_union/operators/op_constant.zig similarity index 96% rename from src/IR_graph/op_union/operators/op_constant.zig rename to src/IR_zant/IR_graph/op_union/operators/op_constant.zig index 65e05401..5880a34e 100644 --- a/src/IR_graph/op_union/operators/op_constant.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_constant.zig @@ -1,6 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -10,13 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Constant.html // Outputs: @@ -44,7 +45,7 @@ pub const Constant = struct { value_strings: ?[][]const u8, pub fn init(nodeProto: *NodeProto) !Constant { - const output = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_notFound; + const output = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_notFound; var value: ?*TensorZant = null; var sparse_value: ?*TensorZant = null; var value_float: ?f32 = null; diff --git a/src/IR_graph/op_union/operators/op_conv.zig b/src/IR_zant/IR_graph/op_union/operators/op_conv.zig similarity index 91% rename from src/IR_graph/op_union/operators/op_conv.zig rename to src/IR_zant/IR_graph/op_union/operators/op_conv.zig index b90ec7e0..32680501 100644 --- a/src/IR_graph/op_union/operators/op_conv.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_conv.zig @@ -1,8 +1,7 @@ const std = @import("std"); +const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; - -const allocator = zant.utils.allocator.allocator; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -12,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Conv.html // INPUTS: @@ -47,10 +48,10 @@ pub const Conv = struct { strides: ?[]i64, pub fn init(nodeProto: *NodeProto) !Conv { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const input_W = if (tensorZant.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_W_notFound; - const input_B = if (nodeProto.input.len > 2) if (tensorZant.tensorMap.getPtr(nodeProto.input[2])) |ptr| ptr else return error.input_B_notFound else null; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const input_W = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_W_notFound; + const input_B = if (nodeProto.input.len > 2) if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[2])) |ptr| ptr else return error.input_B_notFound else null; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; var auto_pad: []const u8 = "NOTSET"; var dilations: ?[]i64 = null; @@ -76,7 +77,7 @@ pub const Conv = struct { } //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_W.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_W.ty; return Conv{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_div.zig b/src/IR_zant/IR_graph/op_union/operators/op_div.zig similarity index 79% rename from src/IR_graph/op_union/operators/op_div.zig rename to src/IR_zant/IR_graph/op_union/operators/op_div.zig index 5fb16d33..3e12374b 100644 --- a/src/IR_graph/op_union/operators/op_div.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_div.zig @@ -1,6 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -10,13 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; -const IR_utils = @import("../../utils.zig"); //this is IR utils -const utils = @import("codegen").utils; +const tensorMath = zant.core.tensor.math_standard; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Div.html // INPUTS: // - A (heterogeneous) - T: First operand. @@ -29,12 +30,12 @@ pub const Div = struct { output_C: *TensorZant, pub fn init(nodeProto: *NodeProto) !Div { - const input_A = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_A_notFound; - const input_B = if (tensorZant.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_B_notFound; - const output_C = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_C_notFound; + const input_A = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_A_notFound; + const input_B = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_B_notFound; + const output_C = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_C_notFound; //set the output type: - if (output_C.ty == tensorZant.TensorType.undefined) output_C.ty = input_B.ty; + if (output_C.ty == tensorZant_lib.TensorType.undefined) output_C.ty = input_B.ty; return Div{ .input_A = input_A, diff --git a/src/IR_graph/op_union/operators/op_elu.zig b/src/IR_zant/IR_graph/op_union/operators/op_elu.zig similarity index 82% rename from src/IR_graph/op_union/operators/op_elu.zig rename to src/IR_zant/IR_graph/op_union/operators/op_elu.zig index 65d7ef4b..0dbf1af7 100644 --- a/src/IR_graph/op_union/operators/op_elu.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_elu.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,12 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; -const utils = @import("codegen").utils; +const tensorMath = zant.core.tensor.math_standard; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Elu.html // INPUTS: // - X (heterogeneous) - T: Input tensor @@ -31,8 +32,8 @@ pub const Elu = struct { alpha: f32, // default = 1.0, pub fn init(nodeProto: *NodeProto) !Elu { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; var alpha: f32 = 1.0; @@ -46,7 +47,7 @@ pub const Elu = struct { } //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return Elu{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_flatten.zig b/src/IR_zant/IR_graph/op_union/operators/op_flatten.zig similarity index 81% rename from src/IR_graph/op_union/operators/op_flatten.zig rename to src/IR_zant/IR_graph/op_union/operators/op_flatten.zig index 08e21ea8..ab062ae9 100644 --- a/src/IR_graph/op_union/operators/op_flatten.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_flatten.zig @@ -1,6 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -10,10 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; + +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Flatten.html // INPUTS: @@ -29,8 +33,8 @@ pub const Flatten = struct { axis: i64 = 1, // default = 1, pub fn init(nodeProto: *NodeProto) !Flatten { - const data = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const output = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const data = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const output = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; var axis: i64 = 1; for (nodeProto.attribute) |attr| { @@ -43,7 +47,7 @@ pub const Flatten = struct { } //set the output type: - if (output.ty == tensorZant.TensorType.undefined) output.ty = data.ty; + if (output.ty == tensorZant_lib.TensorType.undefined) output.ty = data.ty; return Flatten{ .data = data, @@ -73,7 +77,7 @@ pub const Flatten = struct { var input_string: []u8 = undefined; defer allocator.free(input_string); - if (self.data.tc == tensorZant.TensorCategory.INITIALIZER) { + if (self.data.tc == tensorZant_lib.TensorCategory.INITIALIZER) { input_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(¶m_lib.tensor_", try utils.getSanitizedName(self.data.name), diff --git a/src/IR_graph/op_union/operators/op_floor.zig b/src/IR_zant/IR_graph/op_union/operators/op_floor.zig similarity index 79% rename from src/IR_graph/op_union/operators/op_floor.zig rename to src/IR_zant/IR_graph/op_union/operators/op_floor.zig index 0e308b9d..d3285fc9 100644 --- a/src/IR_graph/op_union/operators/op_floor.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_floor.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; -const IR_utils = @import("../../utils.zig"); //this is IR utils -const utils = @import("codegen").utils; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; + +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Floor.html // INPUTS: @@ -28,11 +30,11 @@ pub const Floor = struct { output_Y: *TensorZant, pub fn init(nodeProto: *NodeProto) !Floor { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return Floor{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_gather.zig b/src/IR_zant/IR_graph/op_union/operators/op_gather.zig similarity index 84% rename from src/IR_graph/op_union/operators/op_gather.zig rename to src/IR_zant/IR_graph/op_union/operators/op_gather.zig index a17abee9..6f4a609e 100644 --- a/src/IR_graph/op_union/operators/op_gather.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_gather.zig @@ -1,6 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -10,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; -const TensorCategory = tensorZant.TensorCategory; + +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Gather.html // INPUTS: @@ -33,9 +36,9 @@ pub const Gather = struct { axis: i64 = 0, // default = 0, pub fn init(nodeProto: *NodeProto) !Gather { - const input_A = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_A_notFound; - const input_B = if (tensorZant.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_B_notFound; - const output_C = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_C_notFound; + const input_A = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_A_notFound; + const input_B = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_B_notFound; + const output_C = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_C_notFound; var axis: i64 = 0; for (nodeProto.attribute) |attr| { @@ -45,7 +48,7 @@ pub const Gather = struct { } //set the output type: - if (output_C.ty == tensorZant.TensorType.undefined) output_C.ty = input_A.ty; + if (output_C.ty == tensorZant_lib.TensorType.undefined) output_C.ty = input_A.ty; return Gather{ .input_A = input_A, @@ -77,7 +80,7 @@ pub const Gather = struct { // Input A (data) var tensor_A_string: []u8 = undefined; defer allocator.free(tensor_A_string); - if (self.input_A.tc == tensorZant.TensorCategory.INITIALIZER) { + if (self.input_A.tc == tensorZant_lib.TensorCategory.INITIALIZER) { tensor_A_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(¶m_lib.tensor_", try utils.getSanitizedName(self.input_A.name), @@ -93,7 +96,7 @@ pub const Gather = struct { // Input B (indices) var tensor_B_string: []u8 = undefined; defer allocator.free(tensor_B_string); - if (self.input_B.tc == tensorZant.TensorCategory.INITIALIZER) { + if (self.input_B.tc == tensorZant_lib.TensorCategory.INITIALIZER) { tensor_B_string = try std.mem.concat(allocator, u8, &[_][]const u8{ "@constCast(¶m_lib.tensor_", try utils.getSanitizedName(self.input_B.name), diff --git a/src/IR_graph/op_union/operators/op_gelu.zig b/src/IR_zant/IR_graph/op_union/operators/op_gelu.zig similarity index 79% rename from src/IR_graph/op_union/operators/op_gelu.zig rename to src/IR_zant/IR_graph/op_union/operators/op_gelu.zig index c59c7e44..e40e2a62 100644 --- a/src/IR_graph/op_union/operators/op_gelu.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_gelu.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,11 +11,14 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; -const IR_utils = @import("../../utils.zig"); //this is IR utils -const utils = @import("codegen").utils; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; + +const utils = IR_zant.IR_codegen.utils; +const IR_utils = IR_zant.IR_graph.utils; //this is IR utils pub const Gelu = struct { input_X: *TensorZant, @@ -23,8 +26,8 @@ pub const Gelu = struct { approximate: []const u8, pub fn init(nodeProto: *NodeProto) !Gelu { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; var approximate: []const u8 = "none"; @@ -38,7 +41,7 @@ pub const Gelu = struct { } //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return Gelu{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_gemm.zig b/src/IR_zant/IR_graph/op_union/operators/op_gemm.zig similarity index 87% rename from src/IR_graph/op_union/operators/op_gemm.zig rename to src/IR_zant/IR_graph/op_union/operators/op_gemm.zig index 0e8dcaac..ade5ffac 100644 --- a/src/IR_graph/op_union/operators/op_gemm.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_gemm.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Gemm.html // INPUTS: @@ -42,10 +44,10 @@ pub const Gemm = struct { transB: bool, // = false; pub fn init(nodeProto: *NodeProto) !Gemm { - const input_A = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_A_notFound; - const input_B = if (tensorZant.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_B_notFound; - const input_C = if (nodeProto.input.len > 2) if (tensorZant.tensorMap.getPtr(nodeProto.input[2])) |ptr| ptr else return error.input_C_notFound else null; - const output = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_A = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_A_notFound; + const input_B = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_B_notFound; + const input_C = if (nodeProto.input.len > 2) if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[2])) |ptr| ptr else return error.input_C_notFound else null; + const output = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; var alpha: f32 = 1.0; var beta: f32 = 1.0; @@ -65,7 +67,7 @@ pub const Gemm = struct { } //set the output type: - if (output.ty == tensorZant.TensorType.undefined) output.ty = input_A.ty; + if (output.ty == tensorZant_lib.TensorType.undefined) output.ty = input_A.ty; return Gemm{ .input_A = input_A, diff --git a/src/IR_graph/op_union/operators/op_identity.zig b/src/IR_zant/IR_graph/op_union/operators/op_identity.zig similarity index 80% rename from src/IR_graph/op_union/operators/op_identity.zig rename to src/IR_zant/IR_graph/op_union/operators/op_identity.zig index 9d33b165..da1f0b0e 100644 --- a/src/IR_graph/op_union/operators/op_identity.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_identity.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,12 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; -const utils = @import("codegen").utils; +const tensorMath = zant.core.tensor.math_standard; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Identity.html#l-onnx-doc-identity // INPUTS: // - input (heterogeneous) - V: input tensor. @@ -28,11 +29,11 @@ pub const Identity = struct { output: *TensorZant, pub fn init(nodeProto: *NodeProto) !Identity { - const input = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_notFound; - const output = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_notFound; + const input = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_notFound; + const output = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_notFound; //set the output type: - if (output.ty == tensorZant.TensorType.undefined) output.ty = input.ty; + if (output.ty == tensorZant_lib.TensorType.undefined) output.ty = input.ty; return Identity{ .input = input, diff --git a/src/IR_graph/op_union/operators/op_leakyRelu.zig b/src/IR_zant/IR_graph/op_union/operators/op_leakyRelu.zig similarity index 82% rename from src/IR_graph/op_union/operators/op_leakyRelu.zig rename to src/IR_zant/IR_graph/op_union/operators/op_leakyRelu.zig index 8910b69b..f9617bb4 100644 --- a/src/IR_graph/op_union/operators/op_leakyRelu.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_leakyRelu.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__LeakyRelu.html#l-onnx-doc-leakyrelu // INPUTS: @@ -31,8 +33,8 @@ pub const LeakyRelu = struct { alpha: f32 = 0.01, // default value pub fn init(nodeProto: *NodeProto) !LeakyRelu { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; var alpha: f32 = 0.01; // default value for (nodeProto.attribute) |attr| { @@ -42,7 +44,7 @@ pub const LeakyRelu = struct { } //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return LeakyRelu{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_matMul.zig b/src/IR_zant/IR_graph/op_union/operators/op_matMul.zig similarity index 86% rename from src/IR_graph/op_union/operators/op_matMul.zig rename to src/IR_zant/IR_graph/op_union/operators/op_matMul.zig index 27c0c6df..54d0215b 100644 --- a/src/IR_graph/op_union/operators/op_matMul.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_matMul.zig @@ -1,8 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; -const mathHandler_log = std.log.scoped(.mathHandler); +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -12,11 +11,14 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; +const mathHandler_log = std.log.scoped(.mathHandler); -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__MatMul.html#l-onnx-doc-matmul // INPUTS: @@ -30,12 +32,12 @@ pub const MatMul = struct { output_C: *TensorZant, pub fn init(nodeProto: *NodeProto) !MatMul { - const input_A = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_A_notFound; - const input_B = if (tensorZant.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_B_notFound; - const output_C = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_C_notFound; + const input_A = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_A_notFound; + const input_B = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_B_notFound; + const output_C = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_C_notFound; //set the output type: - if (output_C.ty == tensorZant.TensorType.undefined) output_C.ty = input_A.ty; + if (output_C.ty == tensorZant_lib.TensorType.undefined) output_C.ty = input_A.ty; return MatMul{ .input_A = input_A, diff --git a/src/IR_graph/op_union/operators/op_maxPool.zig b/src/IR_zant/IR_graph/op_union/operators/op_maxPool.zig similarity index 91% rename from src/IR_graph/op_union/operators/op_maxPool.zig rename to src/IR_zant/IR_graph/op_union/operators/op_maxPool.zig index 657b42b2..f17949c5 100644 --- a/src/IR_graph/op_union/operators/op_maxPool.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_maxPool.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; //https://onnx.ai/onnx/operators/onnx__MaxPool.html // INPUTS: @@ -46,9 +48,9 @@ pub const MaxPool = struct { strides: ?[]i64, // default = null; pub fn init(nodeProto: *NodeProto) !MaxPool { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; - const output_indices = if (nodeProto.output.len > 1) if (tensorZant.tensorMap.getPtr(nodeProto.output[1])) |ptr| ptr else return error.output_indices_notFound else null; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const output_indices = if (nodeProto.output.len > 1) if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[1])) |ptr| ptr else return error.output_indices_notFound else null; var auto_pad: []const u8 = "NOTSET"; var ceil_mode: i64 = 0; @@ -77,7 +79,7 @@ pub const MaxPool = struct { } //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return MaxPool{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_mul.zig b/src/IR_zant/IR_graph/op_union/operators/op_mul.zig similarity index 81% rename from src/IR_graph/op_union/operators/op_mul.zig rename to src/IR_zant/IR_graph/op_union/operators/op_mul.zig index 7ab81cd0..b554854d 100644 --- a/src/IR_graph/op_union/operators/op_mul.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_mul.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Mul.html#l-onnx-doc-mul // INPUTS: @@ -30,12 +32,12 @@ pub const Mul = struct { output_C: *TensorZant, pub fn init(nodeProto: *NodeProto) !Mul { - const input_A = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_A_notFound; - const input_B = if (tensorZant.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_B_notFound; - const output_C = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_C_notFound; + const input_A = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_A_notFound; + const input_B = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_B_notFound; + const output_C = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_C_notFound; //set the output type: - if (output_C.ty == tensorZant.TensorType.undefined) output_C.ty = input_A.ty; + if (output_C.ty == tensorZant_lib.TensorType.undefined) output_C.ty = input_A.ty; return Mul{ .input_A = input_A, diff --git a/src/IR_graph/op_union/operators/op_neg.zig b/src/IR_zant/IR_graph/op_union/operators/op_neg.zig similarity index 79% rename from src/IR_graph/op_union/operators/op_neg.zig rename to src/IR_zant/IR_graph/op_union/operators/op_neg.zig index 1e3ab7ec..a86a26a8 100644 --- a/src/IR_graph/op_union/operators/op_neg.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_neg.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Neg.html#l-onnx-doc-neg // INPUTS: @@ -28,11 +30,11 @@ pub const Neg = struct { output_Y: *TensorZant, pub fn init(nodeProto: *NodeProto) !Neg { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return Neg{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_oneHot.zig b/src/IR_zant/IR_graph/op_union/operators/op_oneHot.zig similarity index 85% rename from src/IR_graph/op_union/operators/op_oneHot.zig rename to src/IR_zant/IR_graph/op_union/operators/op_oneHot.zig index 4816d90e..e023bb5f 100644 --- a/src/IR_graph/op_union/operators/op_oneHot.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_oneHot.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; -const IR_utils = @import("../../utils.zig"); //this is IR utils -const utils = @import("codegen").utils; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; + +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__OneHot.html // INPUTS: @@ -35,10 +37,10 @@ pub const OneHot = struct { axis: ?i64, pub fn init(nodeProto: *NodeProto) !OneHot { - const indices = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const depth = if (tensorZant.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.depth_notFound; - const values = if (tensorZant.tensorMap.getPtr(nodeProto.input[2])) |ptr| ptr else return error.values_notFound; - const output = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const indices = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const depth = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.depth_notFound; + const values = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[2])) |ptr| ptr else return error.values_notFound; + const output = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; var axis: i64 = -1; @@ -52,7 +54,7 @@ pub const OneHot = struct { } //set the output type: - if (output.ty == tensorZant.TensorType.undefined) output.ty = indices.ty; + if (output.ty == tensorZant_lib.TensorType.undefined) output.ty = indices.ty; return OneHot{ .indices = indices, diff --git a/src/IR_graph/op_union/operators/op_reduceMean.zig b/src/IR_zant/IR_graph/op_union/operators/op_reduceMean.zig similarity index 88% rename from src/IR_graph/op_union/operators/op_reduceMean.zig rename to src/IR_zant/IR_graph/op_union/operators/op_reduceMean.zig index 98b34ea6..21c26835 100644 --- a/src/IR_graph/op_union/operators/op_reduceMean.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_reduceMean.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__ReduceMean.html // INPUTS: @@ -35,9 +37,9 @@ pub const ReduceMean = struct { noop_with_empty_axes: bool, // defualt = false; pub fn init(nodeProto: *NodeProto) !ReduceMean { - const data = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const axes = if (nodeProto.input.len > 1) if (tensorZant.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.axes_notFound else null; - const reduced = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const data = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const axes = if (nodeProto.input.len > 1) if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.axes_notFound else null; + const reduced = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; var keepdims: bool = true; var noop_with_empty_axes: bool = false; @@ -51,7 +53,7 @@ pub const ReduceMean = struct { } //set the output type: - if (reduced.ty == tensorZant.TensorType.undefined) reduced.ty = data.ty; + if (reduced.ty == tensorZant_lib.TensorType.undefined) reduced.ty = data.ty; return ReduceMean{ .data = data, diff --git a/src/IR_graph/op_union/operators/op_relu.zig b/src/IR_zant/IR_graph/op_union/operators/op_relu.zig similarity index 79% rename from src/IR_graph/op_union/operators/op_relu.zig rename to src/IR_zant/IR_graph/op_union/operators/op_relu.zig index 98560667..b6a46e41 100644 --- a/src/IR_graph/op_union/operators/op_relu.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_relu.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Relu.html#l-onnx-doc-relu // INPUTS: @@ -28,11 +30,11 @@ pub const Relu = struct { output_Y: *TensorZant, pub fn init(nodeProto: *NodeProto) !Relu { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return Relu{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_reshape.zig b/src/IR_zant/IR_graph/op_union/operators/op_reshape.zig similarity index 90% rename from src/IR_graph/op_union/operators/op_reshape.zig rename to src/IR_zant/IR_graph/op_union/operators/op_reshape.zig index 6663cce1..fb6d38f1 100644 --- a/src/IR_graph/op_union/operators/op_reshape.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_reshape.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Reshape.html#l-onnx-doc-reshape // INPUTS: @@ -35,9 +37,9 @@ pub const Reshape = struct { shape_attribute: ?[]const i64, pub fn init(nodeProto: *NodeProto) !Reshape { - const data = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const shape = if (tensorZant.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.shape_notFound; - const reshaped = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const data = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const shape = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.shape_notFound; + const reshaped = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; var allowzer0: bool = false; var shape_attribute: ?[]const i64 = null; @@ -51,7 +53,7 @@ pub const Reshape = struct { } //set the output type: - if (reshaped.ty == tensorZant.TensorType.undefined) reshaped.ty = data.ty; + if (reshaped.ty == tensorZant_lib.TensorType.undefined) reshaped.ty = data.ty; return Reshape{ .data = data, diff --git a/src/IR_graph/op_union/operators/op_resize.zig b/src/IR_zant/IR_graph/op_union/operators/op_resize.zig similarity index 91% rename from src/IR_graph/op_union/operators/op_resize.zig rename to src/IR_zant/IR_graph/op_union/operators/op_resize.zig index 5a3e9d00..20355966 100644 --- a/src/IR_graph/op_union/operators/op_resize.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_resize.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Resize.html // INPUTS: @@ -54,14 +56,14 @@ pub const Resize = struct { nearest_mode: []const u8 = "round_prefer_floor", pub fn init(nodeProto: *NodeProto) !Resize { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; // ---- optional inputs - const input_roi: ?*TensorZant = if (nodeProto.input.len >= 2) if (tensorZant.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_X_notFound else null; - const input_scales: ?*TensorZant = if (nodeProto.input.len >= 3) if (tensorZant.tensorMap.getPtr(nodeProto.input[2])) |ptr| ptr else return error.input_roi_notFound else null; - const input_sizes: ?*TensorZant = if (nodeProto.input.len >= 4) if (tensorZant.tensorMap.getPtr(nodeProto.input[3])) |ptr| ptr else return error.input_sizes_notFound else null; + const input_roi: ?*TensorZant = if (nodeProto.input.len >= 2) if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_X_notFound else null; + const input_scales: ?*TensorZant = if (nodeProto.input.len >= 3) if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[2])) |ptr| ptr else return error.input_roi_notFound else null; + const input_sizes: ?*TensorZant = if (nodeProto.input.len >= 4) if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[3])) |ptr| ptr else return error.input_sizes_notFound else null; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; // ---- ATTRIBUTES from NodeProto var antialias: i64 = 0; @@ -97,7 +99,7 @@ pub const Resize = struct { } //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return Resize{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_shape.zig b/src/IR_zant/IR_graph/op_union/operators/op_shape.zig similarity index 85% rename from src/IR_graph/op_union/operators/op_shape.zig rename to src/IR_zant/IR_graph/op_union/operators/op_shape.zig index d94e5763..46cf2820 100644 --- a/src/IR_graph/op_union/operators/op_shape.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_shape.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Shape.html // INPUTS: @@ -33,8 +35,8 @@ pub const Shape = struct { end: ?i64 = null, pub fn init(nodeProto: *NodeProto) !Shape { - const data = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.data_notFound; - const shape = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.shape_notFound; + const data = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.data_notFound; + const shape = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.shape_notFound; var start: ?i64 = null; var end: ?i64 = null; @@ -48,7 +50,7 @@ pub const Shape = struct { } //set the output type: - if (shape.ty == tensorZant.TensorType.undefined) shape.ty = data.ty; + if (shape.ty == tensorZant_lib.TensorType.undefined) shape.ty = data.ty; return Shape{ .data = data, diff --git a/src/IR_graph/op_union/operators/op_sigmoid.zig b/src/IR_zant/IR_graph/op_union/operators/op_sigmoid.zig similarity index 80% rename from src/IR_graph/op_union/operators/op_sigmoid.zig rename to src/IR_zant/IR_graph/op_union/operators/op_sigmoid.zig index 90345a0a..97b10d5e 100644 --- a/src/IR_graph/op_union/operators/op_sigmoid.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_sigmoid.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; +const utils = IR_zant.IR_codegen.utils; //https://onnx.ai/onnx/operators/onnx__Sigmoid.html // INPUTS: @@ -27,11 +29,11 @@ pub const Sigmoid = struct { output_Y: *TensorZant, pub fn init(nodeProto: *NodeProto) !Sigmoid { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return Sigmoid{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_slice.zig b/src/IR_zant/IR_graph/op_union/operators/op_slice.zig similarity index 67% rename from src/IR_graph/op_union/operators/op_slice.zig rename to src/IR_zant/IR_graph/op_union/operators/op_slice.zig index 973cef38..7a3408bd 100644 --- a/src/IR_graph/op_union/operators/op_slice.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_slice.zig @@ -1,18 +1,23 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; +const ModelProto = onnx.ModelProto; +const GraphProto = onnx.GraphProto; const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + const tensorMath = zant.core.tensor.math_standard; -const TensorCategory = tensorZant.TensorCategory; -const utils = @import("codegen").utils; + +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Slice.html // INPUTS: @@ -33,16 +38,16 @@ pub const Slice = struct { output: *TensorZant, pub fn init(nodeProto: *NodeProto) !Slice { - const input = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const starts = if (tensorZant.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_X_notFound; - const ends = if (tensorZant.tensorMap.getPtr(nodeProto.input[2])) |ptr| ptr else return error.input_X_notFound; - const output = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const starts = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_X_notFound; + const ends = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[2])) |ptr| ptr else return error.input_X_notFound; + const output = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; // Optional inputs - const axes: ?*TensorZant = if (nodeProto.input.len >= 4) if (tensorZant.tensorMap.getPtr(nodeProto.input[3])) |ptr| ptr else return error.axes_notFound else null; - const steps: ?*TensorZant = if (nodeProto.input.len >= 4) if (tensorZant.tensorMap.getPtr(nodeProto.input[3])) |ptr| ptr else return error.steps_notFound else null; + const axes: ?*TensorZant = if (nodeProto.input.len >= 4) if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[3])) |ptr| ptr else return error.axes_notFound else null; + const steps: ?*TensorZant = if (nodeProto.input.len >= 4) if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[3])) |ptr| ptr else return error.steps_notFound else null; //set the output type: - if (output.ty == tensorZant.TensorType.undefined) output.ty = input.ty; + if (output.ty == tensorZant_lib.TensorType.undefined) output.ty = input.ty; return Slice{ .input = input, diff --git a/src/IR_graph/op_union/operators/op_softmax.zig b/src/IR_zant/IR_graph/op_union/operators/op_softmax.zig similarity index 77% rename from src/IR_graph/op_union/operators/op_softmax.zig rename to src/IR_zant/IR_graph/op_union/operators/op_softmax.zig index b4902be9..0c8fffa7 100644 --- a/src/IR_graph/op_union/operators/op_softmax.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_softmax.zig @@ -1,18 +1,23 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; +const ModelProto = onnx.ModelProto; +const GraphProto = onnx.GraphProto; const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + const tensorMath = zant.core.tensor.math_standard; -const TensorCategory = tensorZant.TensorCategory; -const utils = @import("codegen").utils; + +const utils = IR_zant.IR_codegen.utils; //https://onnx.ai/onnx/operators/onnx__Softmax.html // INPUTS: @@ -24,11 +29,11 @@ pub const Softmax = struct { output_Y: *TensorZant, pub fn init(nodeProto: *NodeProto) !Softmax { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return Softmax{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_split.zig b/src/IR_zant/IR_graph/op_union/operators/op_split.zig similarity index 84% rename from src/IR_graph/op_union/operators/op_split.zig rename to src/IR_zant/IR_graph/op_union/operators/op_split.zig index bf4832e5..29703ee6 100644 --- a/src/IR_graph/op_union/operators/op_split.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_split.zig @@ -1,18 +1,23 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; +const ModelProto = onnx.ModelProto; +const GraphProto = onnx.GraphProto; const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + const tensorMath = zant.core.tensor.math_standard; -const TensorCategory = tensorZant.TensorCategory; -const utils = @import("codegen").utils; + +const utils = IR_zant.IR_codegen.utils; //https://onnx.ai/onnx/operators/onnx__Split.html // INPUTS: @@ -31,9 +36,9 @@ pub const Split = struct { axis: i64 = 0, // default = 0, pub fn init(nodeProto: *NodeProto) !Split { - const input = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const splitTensor = if (nodeProto.input.len > 1) if (tensorZant.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.axes_notFound else null; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const splitTensor = if (nodeProto.input.len > 1) if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.axes_notFound else null; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; var axis: i64 = 0; @@ -44,7 +49,7 @@ pub const Split = struct { } //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input.ty; return Split{ .input = input, diff --git a/src/IR_graph/op_union/operators/op_sqrt.zig b/src/IR_zant/IR_graph/op_union/operators/op_sqrt.zig similarity index 79% rename from src/IR_graph/op_union/operators/op_sqrt.zig rename to src/IR_zant/IR_graph/op_union/operators/op_sqrt.zig index 09ca32b0..1f2d44e2 100644 --- a/src/IR_graph/op_union/operators/op_sqrt.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_sqrt.zig @@ -1,7 +1,7 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); -const tensorMath = zant.core.tensor.math_standard; +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,11 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const TensorCategory = tensorZant.TensorCategory; -const IR_utils = @import("../../utils.zig"); //this is IR utils -const utils = @import("codegen").utils; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; + +const utils = IR_zant.IR_codegen.utils; // https://onnx.ai/onnx/operators/onnx__Sqrt.html // INPUTS: @@ -28,11 +30,11 @@ pub const Sqrt = struct { output_Y: *TensorZant, pub fn init(nodeProto: *NodeProto) !Sqrt { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return Sqrt{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_sub.zig b/src/IR_zant/IR_graph/op_union/operators/op_sub.zig similarity index 80% rename from src/IR_graph/op_union/operators/op_sub.zig rename to src/IR_zant/IR_graph/op_union/operators/op_sub.zig index e9cfcf1d..c249825e 100644 --- a/src/IR_graph/op_union/operators/op_sub.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_sub.zig @@ -1,6 +1,7 @@ const std = @import("std"); -const zant = @import("zant"); const allocator = std.heap.page_allocator; +const zant = @import("zant"); +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -10,10 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; -const utils = @import("../../utils.zig"); -const TensorCategory = tensorZant.TensorCategory; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; + +const utils = IR_zant.IR_codegen.utils; //https://onnx.ai/onnx/operators/onnx__Sub.html // INPUTS: @@ -27,12 +31,12 @@ pub const Sub = struct { output_Y: *TensorZant, pub fn init(nodeProto: *NodeProto) !Sub { - const input_A = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_A_notFound; - const input_B = if (tensorZant.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_B_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_A = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_A_notFound; + const input_B = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_B_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_A.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_A.ty; return Sub{ .input_A = input_A, diff --git a/src/IR_graph/op_union/operators/op_tanh.zig b/src/IR_zant/IR_graph/op_union/operators/op_tanh.zig similarity index 78% rename from src/IR_graph/op_union/operators/op_tanh.zig rename to src/IR_zant/IR_graph/op_union/operators/op_tanh.zig index 76ea30ad..1f79af48 100644 --- a/src/IR_graph/op_union/operators/op_tanh.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_tanh.zig @@ -1,18 +1,23 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; +const ModelProto = onnx.ModelProto; +const GraphProto = onnx.GraphProto; const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + const tensorMath = zant.core.tensor.math_standard; -const TensorCategory = tensorZant.TensorCategory; -const utils = @import("codegen").utils; + +const utils = IR_zant.IR_codegen.utils; //https://onnx.ai/onnx/operators/onnx__Tanh.html // INPUTS: @@ -24,11 +29,11 @@ pub const Tanh = struct { output_Y: *TensorZant, pub fn init(nodeProto: *NodeProto) !Tanh { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return Tanh{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_transpose.zig b/src/IR_zant/IR_graph/op_union/operators/op_transpose.zig similarity index 84% rename from src/IR_graph/op_union/operators/op_transpose.zig rename to src/IR_zant/IR_graph/op_union/operators/op_transpose.zig index 0cc8949b..4e90a013 100644 --- a/src/IR_graph/op_union/operators/op_transpose.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_transpose.zig @@ -1,7 +1,7 @@ const std = @import("std"); -const zant = @import("zant"); const allocator = std.heap.page_allocator; -const utils = @import("codegen").utils; +const zant = @import("zant"); +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -11,10 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + const tensorMath = zant.core.tensor.math_standard; -const TensorCategory = tensorZant.TensorCategory; + +const utils = IR_zant.IR_codegen.utils; //https://onnx.ai/onnx/operators/onnx__Transpose.html // INPUTS: @@ -30,8 +33,8 @@ pub const Transpose = struct { perm: []i64, pub fn init(nodeProto: *NodeProto) !Transpose { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; // Get the perm attribute if it exists var perm: []i64 = undefined; @@ -44,7 +47,7 @@ pub const Transpose = struct { } //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return Transpose{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_unsqueeze.zig b/src/IR_zant/IR_graph/op_union/operators/op_unsqueeze.zig similarity index 80% rename from src/IR_graph/op_union/operators/op_unsqueeze.zig rename to src/IR_zant/IR_graph/op_union/operators/op_unsqueeze.zig index 12e2a1b7..e47e4fe8 100644 --- a/src/IR_graph/op_union/operators/op_unsqueeze.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_unsqueeze.zig @@ -1,18 +1,23 @@ const std = @import("std"); const allocator = std.heap.page_allocator; const zant = @import("zant"); +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; +const ModelProto = onnx.ModelProto; +const GraphProto = onnx.GraphProto; const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + const tensorMath = zant.core.tensor.math_standard; -const utils = @import("codegen").utils; -const TensorCategory = tensorZant.TensorCategory; + +const utils = IR_zant.IR_codegen.utils; //https://onnx.ai/onnx/operators/onnx__Unsqueeze.html // INPUTS: @@ -26,12 +31,12 @@ pub const Unsqueeze = struct { output_Y: *TensorZant, pub fn init(nodeProto: *NodeProto) !Unsqueeze { - const input_X = if (tensorZant.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; - const input_axes = if (tensorZant.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_axes_notFound; - const output_Y = if (tensorZant.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; + const input_X = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[0])) |ptr| ptr else return error.input_X_notFound; + const input_axes = if (tensorZant_lib.tensorMap.getPtr(nodeProto.input[1])) |ptr| ptr else return error.input_axes_notFound; + const output_Y = if (tensorZant_lib.tensorMap.getPtr(nodeProto.output[0])) |ptr| ptr else return error.output_Y_notFound; //set the output type: - if (output_Y.ty == tensorZant.TensorType.undefined) output_Y.ty = input_X.ty; + if (output_Y.ty == tensorZant_lib.TensorType.undefined) output_Y.ty = input_X.ty; return Unsqueeze{ .input_X = input_X, diff --git a/src/IR_graph/op_union/operators/op_useless.zig b/src/IR_zant/IR_graph/op_union/operators/op_useless.zig similarity index 70% rename from src/IR_graph/op_union/operators/op_useless.zig rename to src/IR_zant/IR_graph/op_union/operators/op_useless.zig index f8589717..928b3537 100644 --- a/src/IR_graph/op_union/operators/op_useless.zig +++ b/src/IR_zant/IR_graph/op_union/operators/op_useless.zig @@ -1,6 +1,7 @@ const std = @import("std"); -const zant = @import("zant"); const allocator = std.heap.page_allocator; +const zant = @import("zant"); +const IR_zant = @import("../../../IR_zant.zig"); // --- onnx --- const onnx = zant.onnx; @@ -10,9 +11,13 @@ const NodeProto = onnx.NodeProto; const TensorProto = onnx.TensorProto; // --- zant --- -const tensorZant = @import("../../tensorZant.zig"); -const TensorZant = tensorZant.TensorZant; +const tensorZant_lib = IR_zant.IR_graph.tensorZant_lib; +const TensorZant = tensorZant_lib.TensorZant; +const TensorCategory = tensorZant_lib.TensorCategory; + +const tensorMath = zant.core.tensor.math_standard; +const utils = IR_zant.IR_codegen.utils; pub const Useless = struct { pub fn init(nodeProto: *NodeProto) !Useless { _ = nodeProto; //"details" will be a onnx struct diff --git a/src/IR_graph/op_union/operators/operators.zig b/src/IR_zant/IR_graph/op_union/operators/operators.zig similarity index 100% rename from src/IR_graph/op_union/operators/operators.zig rename to src/IR_zant/IR_graph/op_union/operators/operators.zig diff --git a/src/IR_graph/tensorZant.zig b/src/IR_zant/IR_graph/tensorZant.zig similarity index 100% rename from src/IR_graph/tensorZant.zig rename to src/IR_zant/IR_graph/tensorZant.zig diff --git a/src/IR_graph/utils.zig b/src/IR_zant/IR_graph/utils.zig similarity index 100% rename from src/IR_graph/utils.zig rename to src/IR_zant/IR_graph/utils.zig diff --git a/src/IR_zant/IR_zant.zig b/src/IR_zant/IR_zant.zig new file mode 100644 index 00000000..49c03591 --- /dev/null +++ b/src/IR_zant/IR_zant.zig @@ -0,0 +1,5 @@ +const std = @import("std"); +const zant = @import("zant"); + +pub const IR_graph = @import("IR_graph/IR_graph.zig"); +pub const IR_codegen = @import("IR_codegen/IR_codegen.zig"); diff --git a/tests/CodeGen/Python-ONNX/available_operations.txt b/tests/CodeGen/Python-ONNX/available_operations.txt index 49a1c273..9f852a20 100644 --- a/tests/CodeGen/Python-ONNX/available_operations.txt +++ b/tests/CodeGen/Python-ONNX/available_operations.txt @@ -22,4 +22,4 @@ Sigmoid Sqrt Sub Tanh -Transpose \ No newline at end of file +Transpose \ No newline at end of file diff --git a/tests/CodeGen/test_model.slim.template.zig b/tests/CodeGen/test_model.slim.template.zig index a758b3b5..e4e2a508 100644 --- a/tests/CodeGen/test_model.slim.template.zig +++ b/tests/CodeGen/test_model.slim.template.zig @@ -1,6 +1,6 @@ const std = @import("std"); const zant = @import("zant"); -const codegen = @import("codegen"); +const codegen = @import("IR_codegen"); const utils = codegen.utils; const Tensor = zant.core.tensor.Tensor; const pkgAllocator = zant.utils.allocator; From 6eb5f61fc8f6ebdf0444dc780766836a882453a2 Mon Sep 17 00:00:00 2001 From: MirkoCalvi Date: Sat, 7 Jun 2025 15:29:18 +0200 Subject: [PATCH 8/9] all one op pass --- tests/CodeGen/oneOpModelGenerator.zig | 2 +- tests/CodeGen/test_model.slim.template.zig | 2 +- tests/CodeGen/test_model.template.zig | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/CodeGen/oneOpModelGenerator.zig b/tests/CodeGen/oneOpModelGenerator.zig index ec235a17..d79dad13 100644 --- a/tests/CodeGen/oneOpModelGenerator.zig +++ b/tests/CodeGen/oneOpModelGenerator.zig @@ -7,7 +7,7 @@ const pkgAllocator = zant.utils.allocator; const allocator = pkgAllocator.allocator; const onnx = zant.onnx; -const IR_codeGen = @import("IR_codegen"); +const IR_codeGen = @import("IR_zant").IR_codegen; // called by "zig build test-codegen-gen" optionals:" -Dlog -Dmodel="name" -D ..." see build.zig" pub fn main() !void { diff --git a/tests/CodeGen/test_model.slim.template.zig b/tests/CodeGen/test_model.slim.template.zig index e4e2a508..65e092bb 100644 --- a/tests/CodeGen/test_model.slim.template.zig +++ b/tests/CodeGen/test_model.slim.template.zig @@ -1,6 +1,6 @@ const std = @import("std"); const zant = @import("zant"); -const codegen = @import("IR_codegen"); +const codegen = @import("IR_zant").IR_codegen; const utils = codegen.utils; const Tensor = zant.core.tensor.Tensor; const pkgAllocator = zant.utils.allocator; diff --git a/tests/CodeGen/test_model.template.zig b/tests/CodeGen/test_model.template.zig index f3e89e39..3321c531 100644 --- a/tests/CodeGen/test_model.template.zig +++ b/tests/CodeGen/test_model.template.zig @@ -1,6 +1,6 @@ const std = @import("std"); const zant = @import("zant"); -const codegen = @import("codegen"); +const codegen = @import("IR_zant").IR_codegen; const utils = codegen.utils; const Tensor = zant.core.tensor.Tensor; const pkgAllocator = zant.utils.allocator; From ae8776506b59c4abef4454feb3d185ec562390f4 Mon Sep 17 00:00:00 2001 From: MirkoCalvi Date: Sat, 7 Jun 2025 15:59:08 +0200 Subject: [PATCH 9/9] debugging --- build.zig | 44 +-- src/CodeGen/tests.zig | 87 ----- src/CodeGen/utils_old.zig | 567 ------------------------------- src/IR_zant/IR_graph/TODO | 12 +- tests/IR_graph/graph_init.zig | 2 +- tests/IR_graph/linearization.zig | 2 +- tests/IR_graph/tensorZant.zig | 4 +- tests/IR_graph/utils.zig | 6 +- 8 files changed, 14 insertions(+), 710 deletions(-) delete mode 100644 src/CodeGen/tests.zig delete mode 100644 src/CodeGen/utils_old.zig diff --git a/build.zig b/build.zig index a7ac03f7..812566c3 100644 --- a/build.zig +++ b/build.zig @@ -146,48 +146,8 @@ pub fn build(b: *std.Build) void { const IR_codegen_step = b.step("IR_codegen", "code generation"); IR_codegen_step.dependOn(&IR_codegen_cmd.step); - //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ - - // // Define codegen options - // const codegen_options = b.addOptions(); // Model name option - // codegen_options.addOption([]const u8, "model", model_name_option); - // codegen_options.addOption([]const u8, "model_path", model_path_option); - // codegen_options.addOption([]const u8, "generated_path", generated_path_option); - // codegen_options.addOption([]const u8, "user_tests", user_tests_option); - // codegen_options.addOption(bool, "log", log_option); - // codegen_options.addOption([]const u8, "shape", shape_option); - // codegen_options.addOption([]const u8, "type", input_type_option); - // codegen_options.addOption([]const u8, "output_type", output_type_option); - // codegen_options.addOption(bool, "comm", comm_option); - // codegen_options.addOption(bool, "dynamic", dynamic_option); - - // // ************************************************CODEGEN EXECUTABLE************************************************ - // // Define the main executable with target architecture and optimization settings. - // const codeGen_exe = b.addExecutable(.{ - // .name = "Codegen", - // .root_source_file = b.path("src/CodeGen/main.zig"), - // .target = target, - // .optimize = optimize, - // }); - - // codeGen_exe.linkLibC(); - - // // Add necessary imports for the executable. - // codeGen_exe.root_module.addImport("zant", zant_mod); - // codeGen_exe.root_module.addOptions("codegen_options", codegen_options); - - // // Install the executable. - // b.installArtifact(codeGen_exe); - - // // Define the run command for the main executable. - // const codegen_cmd = b.addRunArtifact(codeGen_exe); - // if (b.args) |args| { - // codegen_cmd.addArgs(args); - // } - - // // Create a build step to run the application. - // const codegen_step = b.step("codegen", "code generation"); - // codegen_step.dependOn(&codegen_cmd.step); + //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\// + ////\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\ // ************************************************ STATIC LIBRARY CREATION ************************************************ diff --git a/src/CodeGen/tests.zig b/src/CodeGen/tests.zig deleted file mode 100644 index e8f36786..00000000 --- a/src/CodeGen/tests.zig +++ /dev/null @@ -1,87 +0,0 @@ -const std = @import("std"); -const zant = @import("zant"); -const codegen = @import("codegen.zig"); -const globals = codegen.globals; -const utils = codegen.utils; -const onnx = zant.onnx; -const ModelOnnx = onnx.ModelProto; -const codegen_options = @import("codegen_options"); -const allocator = zant.utils.allocator.allocator; - -pub fn UserTest(comptime T: type) type { - return struct { - name: []u8, - type: []u8, - input: []T, - output: []T, - expected_class: usize, - }; -} - -fn writeModelOptionsFile(model_name: []const u8, model_path: []const u8) !void { - // Generate model_options.zig - - const model_options_path = try std.fmt.allocPrint(allocator, "{s}model_options.zig", .{model_path}); - const model_options_file = try std.fs.cwd().createFile(model_options_path, .{}); - defer model_options_file.close(); - - const writer = model_options_file.writer(); - - //////////// - - var output_data_len: i64 = 1; - - for (globals.networkOutput.shape) |dim| { - output_data_len *= dim; - } - - _ = try writer.print( - \\ - \\pub const lib = @import("lib_{s}.zig"); - \\pub const name = "{s}"; - \\pub const input_shape = [{d}]u32{any}; - \\pub const output_data_len = {d}; - \\pub const data_type = {s}; - \\pub const enable_user_tests : bool = {any}; - \\pub const user_tests_path = "{s}"; - , .{ - model_name, - model_name, - globals.networkInput.shape.len, - globals.networkInput.shape, - output_data_len, - codegen_options.type, - codegen_options.user_tests.len > 0, - try std.fmt.allocPrint(allocator, "{s}user_tests.json", .{model_path}), - }); - - //////////// -} - -pub fn writeTestFile(model_name: []const u8, model_path: []const u8) !void { - - // Copy test file template into the generated test file - const test_file_path = try std.fmt.allocPrint(allocator, "{s}test_{s}.zig", .{ model_path, model_name }); - - try utils.copyFile("tests/CodeGen/test_model.template.zig", test_file_path); - std.log.info("\n\nGenerated test file: {s}\n", .{test_file_path}); - - // Copy user test file into the generated test file - if (codegen_options.user_tests.len > 0) { - const provided_user_tests_path = codegen_options.user_tests; - const user_tests_path = try std.fmt.allocPrint(allocator, "{s}user_tests.json", .{model_path}); - try utils.copyFile(provided_user_tests_path, user_tests_path); - } - - try writeModelOptionsFile(model_name, model_path); -} - -pub fn writeSlimTestFile(model_name: []const u8, model_path: []const u8) !void { - // Copy test file template into the generated test file - const test_file_path = try std.fmt.allocPrint(allocator, "{s}test_{s}.zig", .{ model_path, model_name }); - - try utils.copyFile("tests/CodeGen/test_model.slim.template.zig", test_file_path); - std.log.info("\n\nGenerated test file: {s}\n", .{test_file_path}); - - try writeModelOptionsFile(model_name, model_path); -} diff --git a/src/CodeGen/utils_old.zig b/src/CodeGen/utils_old.zig deleted file mode 100644 index aff90f64..00000000 --- a/src/CodeGen/utils_old.zig +++ /dev/null @@ -1,567 +0,0 @@ -const std = @import("std"); -const zant = @import("zant"); -const onnx = zant.onnx; -const DataType = onnx.DataType; -const GraphProto = onnx.GraphProto; -const NodeProto = onnx.NodeProto; -const TensorProto = onnx.TensorProto; -const allocator = zant.utils.allocator.allocator; -const codegen = @import("codegen.zig"); -const globals = codegen.globals; -const tests = codegen.tests; -const ReadyNode = globals.ReadyNode; -const ReadyTensor = globals.ReadyTensor; - -// -------------------- GETTERS -------------------- - -//Given an element from DataType Enum in onnx.zig returns the equivalent zig type -pub inline fn getType(data_type: DataType) !type { - switch (data_type) { - .FLOAT => { - return f32; - }, - .UINT8 => { - return u8; - }, - .INT8 => { - return i8; - }, - .UINT16 => { - return u16; - }, - .INT16 => { - return i16; - }, - .INT32 => { - return i32; - }, - .INT64 => { - return i64; - }, - .FLOAT16 => { - return f16; - }, - .DOUBLE => { - return f64; - }, - .UNIT32 => { - return u32; - }, - .UINT64 => { - return u64; - }, - else => return error.DataTypeNotAvailable, - } -} - -//Given an element from DataType Enum in onnx.zig returns the equivalent string of a zig type -pub inline fn getTypeString(data_type: DataType) ![]const u8 { - switch (data_type) { - .FLOAT => { - return "f32"; - }, - .UINT8 => { - return "u8"; - }, - .INT8 => { - return "i8"; - }, - .UINT16 => { - return "u16"; - }, - .INT16 => { - return "i16"; - }, - .INT32 => { - return "i32"; - }, - .INT64 => { - return "i64"; - }, - .FLOAT16 => { - return "f16"; - }, - .DOUBLE => { - return "f64"; - }, - .UINT32 => { - return "u32"; - }, - .UINT64 => { - return "u64"; - }, - else => return error.DataTypeNotAvailable, - } -} - -//Returns the sanitized tensor's name, removes all non alphanumeric chars -pub inline fn getSanitizedName(name: []const u8) ![]const u8 { - var sanitized = try allocator.alloc(u8, name.len); - - for (name, 0..) |char, i| { - sanitized[i] = if (std.ascii.isAlphanumeric(char) or char == '_') - std.ascii.toLower(char) - else - '_'; - } - - //std.log.debug("\nfrom {s} to {s} ", .{ name, sanitized }); - - return sanitized; -} - -/// Returns a List of Ready nodes -/// A node is considered "computable" if all the node's input Tensors are set as ready -pub inline fn getComputableNodes(readyGraph: *std.ArrayList(ReadyNode)) !std.ArrayList(*ReadyNode) { - var set: std.ArrayList(*ReadyNode) = std.ArrayList(*ReadyNode).init(allocator); - var ready_input_counter: i8 = 0; - var null_input_counter: i8 = 0; - - for (readyGraph.items) |*node| { - if (!node.ready) { - for (node.inputs.items) |input| { - if (input == null) null_input_counter += 1 else if (input.?.ready) ready_input_counter += 1; - } - for (node.outputs.items) |output| { - if (output.ready) return error.OutputReadyTooEarly; - } - if (ready_input_counter + null_input_counter == node.inputs.items.len) { - try set.append(node); - //std.log.debug("\n --- {s} is computable", .{node.nodeProto.name.?}); - } - ready_input_counter = 0; - null_input_counter = 0; - } - } - - return set; -} - -pub inline fn getConstantTensorDims(nodeProto: *NodeProto) ![]const i64 { - //check the node is a Constant - if (std.mem.indexOf(u8, try getSanitizedName(nodeProto.op_type), "constant")) |_| {} else return error.NodeNotConstant; - - return if (nodeProto.attribute[0].t) |tensorProto| tensorProto.dims else error.ConstantTensorAttributeNotAvailable; -} - -/// This method search for the existance of a Tensor named "tensorName" inside the onnx model.graph.value_info array. -/// If founded return its shape, else returns null. -pub fn getTensorShape(tensorName: []const u8) ?[]i64 { - for (globals.onnxModel.graph.?.value_info) |vi| { - if (std.mem.eql(u8, vi.name.?, tensorName)) { - return vi.type.?.tensor_type.?.shape.?.shape; - } - } - - return null; -} -// -------------------- SETTERS -------------------- - -// Marks output tensors as ready for computation in all the graph -pub fn setOutputsReady(completedNode: *ReadyNode, tensorHashMap: *std.StringHashMap(ReadyTensor)) !void { - std.log.info("\n -----> set {s} outputs to ready", .{completedNode.nodeProto.name orelse "(unnamed)"}); - completedNode.ready = true; - for (completedNode.outputs.items) |ready_output_tensor| { //for each output tensor of the completed node - var mutablePtr: *ReadyTensor = if (tensorHashMap.getPtr(ready_output_tensor.name)) |V_ptr| V_ptr else return error.keyNotAvailable; - mutablePtr.ready = true; - std.log.info("\n {s} --> ready", .{mutablePtr.name}); - } -} - -// -------------------- BOOLEANS -------------------- - -// returns true if all the inputs are ready -pub inline fn areAllInputsReady(node: *ReadyNode) bool { - for (node.inputs.items) |input| { - if (!input.ready) return false; - } - return true; -} - -//returns true if all the inputs and all the outputs of a node are set as ready -pub inline fn isComputed(readyNode: *ReadyNode) !bool { - for (readyNode.inputs.items) |input| { - if (!input.ready) return false; - } - for (readyNode.outputs.items) |output| { - if (!output.ready) return false; - } - return true; -} - -//return true if the first parameter is an initializer -pub fn isInitializer(name: []const u8, initializers: []*TensorProto) bool { - for (initializers) |init| { - if (std.mem.eql(u8, init.name.?, name)) return true; - } - return false; -} - -//return true if the name is an input of the nn -pub fn isInput(name: []const u8) bool { - for (globals.onnxModel.graph.?.inputs) |input| { - if (std.mem.eql(u8, input.name.?, name)) return true; - } - return false; -} - -//return true if the name is an output of the nn -pub fn isOutput(name: []const u8) bool { - for (globals.onnxModel.graph.?.outputs) |output| { - if (std.mem.eql(u8, output.name.?, name)) return true; - } - return false; -} -// -------------------- PRINTERS -------------------- - -// Prints the list of nodes in the given computation graph. -// Outputs each node's name along with its input and output tensors and their readiness status. -pub fn printNodeList(graph: std.ArrayList(ReadyNode)) !void { - std.debug.print("\n-------------------------------------------------------------", .{}); - std.debug.print("\n+ READY GRAPH +", .{}); - std.debug.print("\n-------------------------------------------------------------\n", .{}); - for (graph.items) |node| { - std.log.info("\n ----- node: {s}", .{node.nodeProto.name.?}); - - std.log.info("\n inputs: ", .{}); - // Write the inputs - for (node.inputs.items) |input| { - std.log.info("\n ->{s} {s}", .{ input.name, if (input.ready) "--->ready" else "" }); - } - - std.log.info("\n outputs:", .{}); - // Write the outputs - for (node.outputs.items) |output| { - std.log.info("\n -> {s} {s}", .{ output.name, if (output.ready) "--->ready" else "" }); - } - } -} - -// Prints the list of nodes that are ready for computation. -// Outputs each node's name, operation type, inputs, and outputs along with their readiness status. -pub fn printComputableNodes(computableNodes: std.ArrayList(*ReadyNode), details: bool) !void { - std.debug.print("\n------------------------------------------------------------", .{}); - std.debug.print("\n+ COMPUTABLE NODES n:{} +", .{computableNodes.items.len}); - std.debug.print("\n------------------------------------------------------------\n", .{}); - - for (computableNodes.items) |node| { - node.print(details); - } -} - -// Prints the list of unique ONNX operations present in the given graph. -// Outputs each operation type only once. -pub fn printOperations(graph: *GraphProto) !void { - std.debug.print("\n", .{}); - std.debug.print("\n-------------------------------------------------", .{}); - std.debug.print("\n+ ONNX operations +", .{}); - std.debug.print("\n-------------------------------------------------\n", .{}); - - var op_set = std.StringHashMap(void).init(std.heap.page_allocator); - defer op_set.deinit(); - - for (graph.nodes) |node| { - try op_set.put(node.op_type, {}); - } - - var it = op_set.iterator(); - while (it.next()) |entry| { - std.log.debug("\n- {s}", .{entry.key_ptr.*}); - } - - std.debug.print("\n-------------------------------------------------\n", .{}); -} - -// Function to print all entries in the tensorHashMap -pub fn printTensorHashMap(map: std.StringHashMap(ReadyTensor)) void { - std.debug.print("\n-------------------------------------------------------------", .{}); - std.debug.print("\n+ READY HASHMAP +", .{}); - std.debug.print("\n-------------------------------------------------------------\n", .{}); - - var it = map.iterator(); - while (it.next()) |entry| { - const key = entry.key_ptr.*; - const tensor = entry.value_ptr.*; - std.log.info("\nTensor Name: {s}", .{key}); - std.log.info("\n Ready: {}", .{tensor.ready}); - std.log.info("\n Shape: [{any}]", .{tensor.shape}); - } -} - -// ----------------- DATA TYPE management ------------- - -pub inline fn i64SliceToUsizeSlice(input: []const i64) ![]usize { - var output = try allocator.alloc(usize, input.len); - - const maxUsize = std.math.maxInt(usize); - - for (input, 0..) |value, index| { - if (value < 0) { - return error.NegativeValue; - } - if (value > maxUsize) { - return error.ValueTooLarge; - } - output[index] = @intCast(value); - } - - return output; -} - -pub fn usizeSliceToI64Slice(input: []usize) ![]const i64 { - var output = try allocator.alloc(i64, input.len); - - for (input, 0..) |value, index| { - if (value > std.math.maxInt(i64)) { - return error.ValueTooLarge; - } - output[index] = @intCast(value); - } - - return output; -} - -/// Converts any integer value to usize with proper bounds checking -/// Returns error.NegativeValue if the input is negative (for signed types) -/// Returns error.ValueTooLarge if the input exceeds the maximum usize value -pub inline fn toUsize(comptime T: type, value: T) !usize { - // Ensure T is an integer type - comptime { - if (@typeInfo(T) != .Int) { - @compileError("toUsize only supports integer types"); - } - } - - // Check for negative values if T is signed - if (@typeInfo(T).Int.signedness == .signed and value < 0) { - return error.NegativeValue; - } - - // Check if value exceeds maximum usize - const maxUsize = std.math.maxInt(usize); - if (@as(u128, @intCast(if (@typeInfo(T).Int.signedness == .signed) @as(u128, @intCast(@max(0, value))) else @as(u128, @intCast(value)))) > maxUsize) { - return error.ValueTooLarge; - } - - return @intCast(value); -} - -pub inline fn sliceToUsizeSlice(slice: anytype) []usize { - const T = @TypeOf(slice); - const info = @typeInfo(T); - - switch (info) { - .pointer => { - const child = info.pointer.child; - const child_info = @typeInfo(child); - - var output = allocator.alloc(usize, slice.len) catch @panic("Out of memory in sliceToUsizeSlice"); - const maxUsize = std.math.maxInt(usize); - - for (slice, 0..) |value, index| { - if (child_info == .int) { - // Handle integer types - if (value < 0) { - if (value == -1) { - output[index] = std.math.maxInt(usize); - } else { - @panic("Invalid negative value in sliceToUsizeSlice (only -1 is allowed)"); - } - } else { - if (@as(u128, @intCast(value)) > maxUsize) { - @panic("Value too large in sliceToUsizeSlice"); - } - output[index] = @intCast(value); - } - } else if (child_info == .float) { - // Handle float types - if (value < 0) { - if (value == -1.0) { - output[index] = std.math.maxInt(usize); - } else { - @panic("Invalid negative value in sliceToUsizeSlice (only -1 is allowed)"); - } - } else { - if (value > @as(f64, @floatFromInt(maxUsize))) { - @panic("Value too large in sliceToUsizeSlice"); - } - output[index] = @intFromFloat(value); - } - } else { - @compileError("Unsupported element type for sliceToUsizeSlice: " ++ @typeName(child)); - } - } - - return output; - }, - else => { - @compileError("Unsupported type for sliceToUsizeSlice: " ++ @typeName(T)); - }, - } -} - -// Modify signature to accept allocator -pub inline fn sliceToIsizeSlice(alloc: std.mem.Allocator, slice: anytype) []isize { - const T = @TypeOf(slice); - const info = @typeInfo(T); - - switch (info) { - .pointer => { - const child = info.pointer.child; - const child_info = @typeInfo(child); - - // Use the passed allocator - var output = alloc.alloc(isize, slice.len) catch @panic("Out of memory in sliceToIsizeSlice"); - const maxIsize = std.math.maxInt(isize); - const minIsize = std.math.minInt(isize); - - for (slice, 0..) |value, index| { - if (child_info == .int) { - // Handle integer types - if (value < minIsize or value > maxIsize) { - @panic("Value out of isize range in sliceToIsizeSlice"); - } - output[index] = @intCast(value); - } else if (child_info == .float) { - // Handle float types - if (value < @as(f64, @floatFromInt(minIsize)) or value > @as(f64, @floatFromInt(maxIsize))) { - @panic("Value out of isize range in sliceToIsizeSlice"); - } - output[index] = @intFromFloat(value); - } else { - @compileError("Unsupported element type for sliceToIsizeSlice: " ++ @typeName(child)); - } - } - - return output; - }, - else => { - @compileError("Unsupported type for sliceToIsizeSlice: " ++ @typeName(T)); - }, - } -} - -pub fn i64ToI64ArrayString(values: []const i64) ![]const u8 { - var buffer: [20]u8 = undefined; - var res_string = try std.mem.concat(allocator, u8, &[_][]const u8{"&[_]i64{"}); - for (values, 0..) |val, i| { - if (i > 0) res_string = try std.mem.concat(allocator, u8, &[_][]const u8{ res_string, "," }); - const val_string = std.fmt.bufPrint(&buffer, "{}", .{val}) catch unreachable; - res_string = try std.mem.concat(allocator, u8, &[_][]const u8{ res_string, val_string }); - } - res_string = try std.mem.concat(allocator, u8, &[_][]const u8{ res_string, "}" }); - - return res_string; -} - -pub fn u32ToUsize(alloc: std.mem.Allocator, input: [*]u32, size: u32) ![]usize { - var output = try alloc.alloc(usize, size); - - const maxUsize = std.math.maxInt(usize); - - for (0..size) |i| { - if (input[i] < 0) { - return error.NegativeValue; - } - if (input[i] > maxUsize) { - return error.ValueTooLarge; - } - output[i] = @intCast(input[i]); - } - - return output; -} - -pub fn parseNumbers(input: []const u8) ![]i64 { - var list = std.ArrayList(i64).init(allocator); - errdefer list.deinit(); - - if (input.len == 0) return list.toOwnedSlice(); - - var it = std.mem.splitScalar(u8, input, ','); - while (it.next()) |num_str| { - const num = try std.fmt.parseInt(i64, num_str, 10); - try list.append(num); - } - - return list.toOwnedSlice(); -} - -pub fn i64SliceToUsizeArrayString(values: []const i64) ![]const u8 { - var list = std.ArrayList(u8).init(allocator); - defer list.deinit(); // Frees all memory - - try list.appendSlice("&[_]usize{"); - for (values, 0..) |val, i| { - if (i > 0) try list.append(','); - try list.writer().print("{}", .{val}); - } - try list.append('}'); - - return try list.toOwnedSlice(); // Caller must free this! -} - -// ----------------- FILE MANAGEMENT ----------------- -// Copy file from src to dst -pub fn copyFile(src_path: []const u8, dst_path: []const u8) !void { - var src_file = try std.fs.cwd().openFile(src_path, .{}); - defer src_file.close(); - - var dst_file = try std.fs.cwd().createFile(dst_path, .{}); - defer dst_file.close(); - - // Use a buffer to copy in chunks - var buf: [4096]u8 = undefined; - while (true) { - const bytes_read = try src_file.read(&buf); - if (bytes_read == 0) break; - _ = try dst_file.write(buf[0..bytes_read]); - } -} - -// Read the user_tests json file and return a list of test cases -pub fn loadUserTests(comptime T: type, user_tests_path: []const u8) !std.json.Parsed([]tests.UserTest(T)) { - const user_tests_file = try std.fs.cwd().openFile(user_tests_path, .{}); - defer user_tests_file.close(); - - const user_tests_content: []const u8 = try user_tests_file.readToEndAlloc(allocator, 1024 * 1024); - defer allocator.free(user_tests_content); - - const parsed_user_tests = try std.json.parseFromSlice([]tests.UserTest(T), allocator, user_tests_content, .{}); - - return parsed_user_tests; -} - -/// Parses a raw byte slice (expected to be little-endian) into an allocated slice of i64. -pub fn parseI64RawData(raw_data: []const u8) ![]i64 { - const element_size = @sizeOf(i64); - if (raw_data.len % element_size != 0) { - std.log.warn("ERROR: Raw data length ({}) is not a multiple of i64 size ({})\n", .{ raw_data.len, element_size }); - return error.InvalidRawDataSize; - } - - const num_elements = raw_data.len / element_size; - if (num_elements == 0) { - // Return an empty slice if raw_data is empty (and length is valid multiple of 0) - return try allocator.alloc(i64, 0); - } - - // Allocate the result slice. - const result = try allocator.alloc(i64, num_elements); - errdefer allocator.free(result); - - // Fallback: Use pointer casting to interpret raw bytes as i64 (assumes alignment and little-endian) - // Ensure alignment (optional, might panic on some archs if unaligned) - // if (@alignOf(i64) > @alignOf(u8) and @ptrToInt(raw_data.ptr) % @alignOf(i64) != 0) { - // std.log.warn("ERROR: Raw data pointer is not aligned for i64 read.\n", .{}); - // return error.UnalignedRawData; - // } - - // Cast the byte slice pointer to an i64 slice pointer - const i64_ptr: [*]const i64 = @ptrCast(@alignCast(raw_data.ptr)); - - // Copy the data from the cast pointer into the result slice - @memcpy(result, i64_ptr[0..num_elements]); - - return result; -} diff --git a/src/IR_zant/IR_graph/TODO b/src/IR_zant/IR_graph/TODO index c7c8ea01..66587216 100644 --- a/src/IR_zant/IR_graph/TODO +++ b/src/IR_zant/IR_graph/TODO @@ -3,14 +3,12 @@ IR_zant - getInputs() - getOutputs() - once done above complete write_op_info() -- implement mean node( first check Merge MirkoGraphZant -> GraphZant ) -- implement slice node( first check Merge MirkoGraphZant -> GraphZant ) -- implement squeeze node( first check Merge MirkoGraphZant -> GraphZant ) +- implement mean node( first check Merge GraphZantTransition -> GraphZant ) +- implement slice node( first check Merge GraphZantTransition -> GraphZant ) +- implement squeeze node( first check Merge GraphZantTransition -> GraphZant ) IR_CODEGEN - dynamicAllocation -- It is related to the import of "codegen" inside *** GENERATED LIBRARY TESTS *** in build.zig - - EASY: check that the passed that the parameters passed to IR_codegen.generate_from_*() are correct, string format, no wite spaces, generated_path ending with "/", ecc... - aggregate build steps where possible, too many . Write documentation about it, describin what does each build command @@ -23,8 +21,6 @@ IR_CODEGEN .FAULT => unreachable, - use TensorZant.getNameSanitized() and not utils.getSanitizedName() -- remove "const utils = @import("codegen").utils;" from all op_*.zig and fix, -- do not codegenerate "const codegen = @import("codegen");" and use instead "const IR_codegen = @import("IR_codegen");" and fix by conseguence OTHER : - check missing op between: python generator, available_operations.txt, IR_graph operators @@ -32,3 +28,5 @@ OTHER : - Gathers leaks - Gelu loops when called "zig build test-codegen" - add comments on available_operation.txt, something like ?op_name is ignored +- softmax is bugged and shape dependend, fix it. To see the bug lunch `zig build IR_codegen -Dlog -Dmodel="debug_model" ` than ` zig build test-generated-lib -Dlog -Dmodel="debug_model"` +- diff --git a/tests/IR_graph/graph_init.zig b/tests/IR_graph/graph_init.zig index a447ba5f..2912ab40 100644 --- a/tests/IR_graph/graph_init.zig +++ b/tests/IR_graph/graph_init.zig @@ -17,7 +17,7 @@ test "parsing mnist-8 graphZant" { //model.print(); - var graphZant: IR_zant.GraphZant = try IR_zant.init(&model); + var graphZant: IR_zant.IR_graph.GraphZant = try IR_zant.IR_graph.init(&model); defer graphZant.deinit(); //USELESS SHIT FOR DEBUG diff --git a/tests/IR_graph/linearization.zig b/tests/IR_graph/linearization.zig index 52272d7f..34e5dfc3 100644 --- a/tests/IR_graph/linearization.zig +++ b/tests/IR_graph/linearization.zig @@ -26,7 +26,7 @@ test "linearizing mnist-8 " { //model.print(); - var graphZant: IR_zant.GraphZant = try IR_zant.init(&model); + var graphZant: IR_zant.IR_graph.GraphZant = try IR_zant.IR_graph.init(&model); defer graphZant.deinit(); const linearizedGraph = try graphZant.linearize(allocator); diff --git a/tests/IR_graph/tensorZant.zig b/tests/IR_graph/tensorZant.zig index b1a27fd2..689eba9d 100644 --- a/tests/IR_graph/tensorZant.zig +++ b/tests/IR_graph/tensorZant.zig @@ -4,7 +4,7 @@ const std = @import("std"); const zant = @import("zant"); const IR_zant = @import("IR_zant"); -const TensorZant = IR_zant.TensorZant; +const TensorZant = IR_zant.IR_graph.TensorZant; const allocator = zant.utils.allocator.allocator; const onnx = zant.onnx; @@ -12,7 +12,7 @@ const TensorProto = onnx.TensorProto; const Tensor = zant.core.tensor.Tensor; -const protoTensor2AnyTensor = IR_zant.utils.protoTensor2AnyTensor; +const protoTensor2AnyTensor = IR_zant.IR_graph.utils.protoTensor2AnyTensor; // Test for raw data not available test "protoTensor2AnyTensor: float32 parsing" { diff --git a/tests/IR_graph/utils.zig b/tests/IR_graph/utils.zig index 31c47627..bd62919c 100644 --- a/tests/IR_graph/utils.zig +++ b/tests/IR_graph/utils.zig @@ -18,15 +18,15 @@ test "getInitializers() mnist-8 and TensorZant.getters" { //model.print(); - var graphZant: IR_zant.GraphZant = try IR_zant.init(&model); + var graphZant: IR_zant.IR_graph.GraphZant = try IR_zant.IR_graph.init(&model); defer graphZant.deinit(); const linearizedGraph = try graphZant.linearize(allocator); defer linearizedGraph.deinit(); - var tensorMap = IR_zant.tensorZant_lib.tensorMap; + var tensorMap = IR_zant.IR_graph.tensorZant_lib.tensorMap; - const initializers = try IR_zant.utils.getInitializers(&tensorMap); + const initializers = try IR_zant.IR_graph.utils.getInitializers(&tensorMap); for (initializers) |*init| { std.debug.print("\nname: {s} ", .{init.name});