From 9c398c01214496caa9c724ea5d146896f0a3f27e Mon Sep 17 00:00:00 2001
From: Tian Jin <tjingrant@gmail.com>
Date: Mon, 24 Feb 2020 23:46:48 +0800
Subject: [PATCH 01/10] Support Optional Inputs (#94)

* 1. Combine variadicIn/Out with expectedNumOperands/Results to simplify import function arguments.
2. Generic improvements to code readability in gen_doc.py.

* Update ONNX Dialect doc.

* Remove redundant code in ImportNode.

* Prettify op_build_table.inc.

* 1. Remove irrelevant code in gen_doc.py

* Refactor code to be more readable.

* Further refactoring for readability improvements.

* Allow gemm to have an optional operand (bias term), and include an example of declarative optimization pattern targeting gemm with bias term ommitted.

* Make shape inference/lowering of gemm op compatible with optional operand declaration.

* Apply canonicalization again after lowering from onnx -> std dialects.

* Make hasBias compatible with the situation of GemmNoBias op.

* Update doc.

* Add a canonicalization test.

* Remove special handler for importing Gemm op, as it's redundant now.
---
 doc/Dialects/onnx.md                          |  224 +-
 doc/gen_doc.py                                | 1041 ++--
 src/builder/frontend_dialect_transformer.cpp  |  137 +-
 src/builder/op_build_table.inc                |  633 ++-
 .../rewrite_patterns/math/gemm.inc            |   42 +-
 src/dialect/onnx/onnx_ops.cpp                 |  143 +-
 src/dialect/onnx/onnxop.inc                   | 4989 ++++++++---------
 src/main.cpp                                  |    4 +
 src/pass/onnx_combine.cpp                     |    5 +
 src/pass/onnx_combine.td                      |    6 +
 test/mlir/onnx/onnx_canonicalization.mlir     |   11 +
 11 files changed, 3431 insertions(+), 3804 deletions(-)

diff --git a/doc/Dialects/onnx.md b/doc/Dialects/onnx.md
index d1da4d6..95746f6 100644
--- a/doc/Dialects/onnx.md
+++ b/doc/Dialects/onnx.md
@@ -327,10 +327,10 @@ ONNX BatchNormalization operation
 #### Results:
 
 1. `Y`: memref of any type values or tensor of any type values
-1. `out_mean`: memref of any type values or tensor of any type values
-1. `out_var`: memref of any type values or tensor of any type values
-1. `saved_mean`: memref of any type values or tensor of any type values
-1. `saved_var`: memref of any type values or tensor of any type values
+1. `out_mean`: memref of any type values or tensor of any type values or none type
+1. `out_var`: memref of any type values or tensor of any type values or none type
+1. `saved_mean`: memref of any type values or tensor of any type values or none type
+1. `saved_var`: memref of any type values or tensor of any type values or none type
 
 ### onnx.BatchNormalizationTestMode (ONNXBatchNormalizationTestModeOp)
 ONNX BatchNormalization operation in test mode
@@ -375,12 +375,12 @@ ONNX BitShift operation
 
 
 "Bitwise shift operator performs element-wise operation. For each input element, if the"
-" attribute "direction" is "RIGHT", this operator moves its binary representation toward"
-" the right side so that the input value is effectively decreased. If the attribute "direction""
-" is "LEFT", bits of binary representation moves toward the left side, which results the"
+" attribute \"direction\" is \"RIGHT\", this operator moves its binary representation toward"
+" the right side so that the input value is effectively decreased. If the attribute \"direction\""
+" is \"LEFT\", bits of binary representation moves toward the left side, which results the"
 " increase of its actual value. The input X is the tensor to be shifted and another input"
-" Y specifies the amounts of shifting. For example, if "direction" is "Right", X is [1, 4],"
-" and S is [1, 1], the corresponding output Z would be [0, 2]. If "direction" is "LEFT" with"
+" Y specifies the amounts of shifting. For example, if \"direction\" is \"Right\", X is [1, 4],"
+" and S is [1, 1], the corresponding output Z would be [0, 2]. If \"direction\" is \"LEFT\" with"
 " X=[1, 2] and S=[1, 2], the corresponding output Y would be [2, 8]."
 " "
 " Because this operator supports Numpy-style broadcasting, X's and Y's shapes are"
@@ -413,15 +413,15 @@ ONNX Cast operation
 "the converted type. The 'to' argument must be one of the data types specified"
 "in the 'DataType' enum field in the TensorProto message."
 ""
-"Casting from string tensor in plain (e.g., "3.14" and "1000") and scientific numeric representations"
-"(e.g., "1e-5" and "1E8") to float types is supported. For example, converting string "100.5" to an integer may"
+"Casting from string tensor in plain (e.g., \"3.14\" and \"1000\") and scientific numeric representations"
+"(e.g., \"1e-5\" and \"1E8\") to float types is supported. For example, converting string \"100.5\" to an integer may"
 "result 100. There are some string literals reserved for special floating-point values;"
-""+INF" (and "INF"), "-INF", and "NaN" are positive infinity, negative infinity, and not-a-number, respectively."
-"Any string which can exactly match "+INF" in a case-insensitive way would be mapped to positive infinite. Similarly,"
-"this case-insensitive rule is applied to "INF" and "NaN". When casting from numeric tensors"
-"to string tensors, plain floating-point representation (such as "314.15926") would be used. "
-"Converting non-numerical-literal string such as "Hello World!" is an undefined behavior. Cases "
-"of converting string representing floating-point arithmetic value, such as "2.718", to INT is an undefined behavior."
+"\"+INF\" (and \"INF\"), \"-INF\", and \"NaN\" are positive infinity, negative infinity, and not-a-number, respectively."
+"Any string which can exactly match \"+INF\" in a case-insensitive way would be mapped to positive infinite. Similarly,"
+"this case-insensitive rule is applied to \"INF\" and \"NaN\". When casting from numeric tensors"
+"to string tensors, plain floating-point representation (such as \"314.15926\") would be used. "
+"Converting non-numerical-literal string such as \"Hello World!\" is an undefined behavior. Cases "
+"of converting string representing floating-point arithmetic value, such as \"2.718\", to INT is an undefined behavior."
 ""
 "Conversion from a numerical type to any numerical type is always allowed."
 "User must be aware of precision loss and value change caused by range difference between two types."
@@ -476,8 +476,8 @@ ONNX Clip operation
 #### Operands:
 
 1. `input`: memref of any type values or tensor of any type values
-1. `min`: memref of any type values or tensor of any type values
-1. `max`: memref of any type values or tensor of any type values
+1. `min`: memref of any type values or tensor of any type values or none type
+1. `max`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -618,8 +618,8 @@ ONNX ConvInteger operation
 
 1. `x`: memref of any type values or tensor of any type values
 1. `w`: memref of any type values or tensor of any type values
-1. `x_zero_point`: memref of any type values or tensor of any type values
-1. `w_zero_point`: memref of any type values or tensor of any type values
+1. `x_zero_point`: memref of any type values or tensor of any type values or none type
+1. `w_zero_point`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -678,7 +678,7 @@ ONNX Conv operation
 
 1. `X`: memref of any type values or tensor of any type values
 1. `W`: memref of any type values or tensor of any type values
-1. `B`: memref of any type values or tensor of any type values
+1. `B`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -720,7 +720,7 @@ ONNX ConvTranspose operation
 
 1. `X`: memref of any type values or tensor of any type values
 1. `W`: memref of any type values or tensor of any type values
-1. `B`: memref of any type values or tensor of any type values
+1. `B`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -884,7 +884,7 @@ ONNX DequantizeLinear operation
 
 1. `x`: memref of any type values or tensor of any type values
 1. `x_scale`: memref of any type values or tensor of any type values
-1. `x_zero_point`: memref of any type values or tensor of any type values
+1. `x_zero_point`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -964,7 +964,7 @@ ONNX Dropout operation
 #### Results:
 
 1. `output`: memref of any type values or tensor of any type values
-1. `mask`: memref of any type values or tensor of any type values
+1. `mask`: memref of any type values or tensor of any type values or none type
 
 ### onnx.DynamicQuantizeLinear (ONNXDynamicQuantizeLinearOp)
 ONNX DynamicQuantizeLinear operation
@@ -1297,9 +1297,9 @@ ONNX GRU operation
 1. `X`: memref of any type values or tensor of any type values
 1. `W`: memref of any type values or tensor of any type values
 1. `R`: memref of any type values or tensor of any type values
-1. `B`: memref of any type values or tensor of any type values
-1. `sequence_lens`: memref of any type values or tensor of any type values
-1. `initial_h`: memref of any type values or tensor of any type values
+1. `B`: memref of any type values or tensor of any type values or none type
+1. `sequence_lens`: memref of any type values or tensor of any type values or none type
+1. `initial_h`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -1315,8 +1315,8 @@ ONNX GRU operation
 
 #### Results:
 
-1. `Y`: memref of any type values or tensor of any type values
-1. `Y_h`: memref of any type values or tensor of any type values
+1. `Y`: memref of any type values or tensor of any type values or none type
+1. `Y_h`: memref of any type values or tensor of any type values or none type
 
 ### onnx.GatherElements (ONNXGatherElementsOp)
 ONNX GatherElements operation
@@ -1609,7 +1609,7 @@ ONNX Gemm operation
 
 1. `A`: memref of any type values or tensor of any type values
 1. `B`: memref of any type values or tensor of any type values
-1. `C`: memref of any type values or tensor of any type values
+1. `C`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -2013,11 +2013,11 @@ ONNX LSTM operation
 1. `X`: memref of any type values or tensor of any type values
 1. `W`: memref of any type values or tensor of any type values
 1. `R`: memref of any type values or tensor of any type values
-1. `B`: memref of any type values or tensor of any type values
-1. `sequence_lens`: memref of any type values or tensor of any type values
-1. `initial_h`: memref of any type values or tensor of any type values
-1. `initial_c`: memref of any type values or tensor of any type values
-1. `P`: memref of any type values or tensor of any type values
+1. `B`: memref of any type values or tensor of any type values or none type
+1. `sequence_lens`: memref of any type values or tensor of any type values or none type
+1. `initial_h`: memref of any type values or tensor of any type values or none type
+1. `initial_c`: memref of any type values or tensor of any type values or none type
+1. `P`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -2033,9 +2033,9 @@ ONNX LSTM operation
 
 #### Results:
 
-1. `Y`: memref of any type values or tensor of any type values
-1. `Y_h`: memref of any type values or tensor of any type values
-1. `Y_c`: memref of any type values or tensor of any type values
+1. `Y`: memref of any type values or tensor of any type values or none type
+1. `Y_h`: memref of any type values or tensor of any type values or none type
+1. `Y_c`: memref of any type values or tensor of any type values or none type
 
 ### onnx.LeakyRelu (ONNXLeakyReluOp)
 ONNX LeakyRelu operation
@@ -2160,24 +2160,24 @@ ONNX Loop operation
 ""
 "    Operator inputs defined as (max_trip_count, condition_var)."
 ""
-"    input ("", ""):"
+"    input (\"\", \"\"):"
 "        for (int i=0; ; ++i) {"
 "          cond = ... // Note this value is ignored, but is required in the body"
 "        }"
 ""
-"    input ("", cond) // Note this is analogous to a while loop"
+"    input (\"\", cond) // Note this is analogous to a while loop"
 "        bool cond = ...;"
 "        for (int i=0; cond; ++i) {"
 "          cond = ...;"
 "        }"
 ""
-"    input ("", 1) // Note this is analogous to a do-while loop"
+"    input (\"\", 1) // Note this is analogous to a do-while loop"
 "        bool cond = true"
 "        for (int i=0; cond; ++i) {"
 "          cond = ...;"
 "        }"
 ""
-"    input (trip_count, "") // Note this is analogous to a for loop"
+"    input (trip_count, \"\") // Note this is analogous to a for loop"
 "        int trip_count = ..."
 "        for (int i=0; i < trip_count; ++i) {"
 "          cond = ...; // ignored"
@@ -2203,15 +2203,15 @@ ONNX Loop operation
 "    }"
 ""
 "    graph body-net ("
-"      %i[INT32, scalar]           // iteration number"
-"      %keepgoing_in[BOOL, scalar] // incoming loop-termination-condition; not used"
-"      %b_in[INT32, scalar]        // incoming value of loop-carried-dependency b"
+"      %i[INT32, scalar]"
+"      %keepgoing[BOOL, scalar]"
+"      %b[INT32, scalar]"
 "    ) {"
-"      %my_local = Add(%a, %b_in)"
-"      %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-dependency b"
-"      %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-termination-condition"
-"      %user_defined_val = Add(%b_in, %b_in) // scan-output value to be accumulated"
-"      return %keepgoing_out, %b_out, %user_defined_val"
+"      %my_local = Add(%a, %b)"
+"      %b_out = Sub(%a, %b)"
+"      %keepgoing_out = Greater(%my_local, %b_out)"
+"      %user_defined_vals = Add(%b, %b)"
+"      return %keepgoing_out, %b_out, %user_defined_vals"
 "    }"
 ""
 "*Sample equivalent C code*"
@@ -2226,51 +2226,31 @@ ONNX Loop operation
 "      const int max_trip_count = 10; // Analogous to input M"
 "      int user_defined_vals[]; // Imagine this is resizable"
 "      /* End implicitly-defined code */"
-"      /* initialize loop-carried variables and scan-output variables */"
-"      bool keepgoing_out = keepgoing"
-"      int b_out = b"
-""
-"      for (int i=0; i < max_trip_count && keepgoing_out; ++i) {"
-"        /* Implicitly-defined code: bind actual parameter values"
-"           to formal parameter variables of loop-body */"
-"        bool keepgoing_in = keepgoing_out; "
-"        bool b_in = b_out;"
-""
+"      for (int i=0; i < max_trip_count && keepgoing; ++i) {"
 "        /* User-defined code (loop body) */"
-"        int my_local = a + b_in; // Reading value "a" from the enclosing scope is fine"
-"        b_out = a - b_in;"
-"        keepgoing_out = my_local > b_out; "
-"        user_defined_val = b_in + b_in; // b_in and b_out are different variables"
+"        int my_local = a + b; // Reading values in the enclosing scope is fine"
+"        b = a - b; // writes fine if we specify b as a loop-carried dependency"
+"        keepgoing = my_local > b; // keepgoing is a loop-carried dependency"
+"        user_defined_vals[i] = b + b;"
 "        /* End user-defined code */"
-""
-"        /* Implicitly defined-code */"
-"        user_defined_vals[i] = user_defined_val // accumulate scan-output values"
 "      }"
-"      // int t = my_local; // Can't do this. my_local is not accessible here."
+"      // my_local = 123; // Can't do this. my_local was defined in the the body"
 ""
-"      // The values below are bound to the output variables of the loop and therefore accessible"
-"      // b_out; user_defined_vals; keepgoing_out;"
+"      // These below values are live-out from the loop and therefore accessible"
+"      b_out; user_defined_vals; keepgoing_out;"
 "    }"
 ""
 "There are several things of note in this code snippet:"
 ""
-"1) Values from the enclosing scope (i.e. variable "a" here) are in scope and can"
+"1) Values from the enclosing scope (i.e. variable a here) are in scope and can"
 "   be referenced in the inputs of the loop."
-"2) Any values computed in the loop body that needs to be used in a subsequent"
-"   iteration or after the loop are modelled using a pair of variables in the loop-body,"
-"   consisting of an input variable (eg., b_in) and an output variable (eg., b_out)."
-"   These are referred to as loop-carried dependences. The loop operation node"
-"   supplies the input value of the input variable for the first iteration, and"
-"   returns the output value of the output variable produced by the final"
-"   iteration."
-"3) Scan_output variables are used to implicitly concatenate values computed across"
-"   all the iterations. In the above example, the value of user_defined_val computed"
-"   over all iterations are concatenated and returned as the value of user_defined_vals"
-"   after the loop."
-"4) Values created in the body cannot be accessed in the enclosing scope,"
-"   except using the mechanism described above."
+"2) Any variables which you wish to make available in the enclosing scope (i.e."
+"   the variables b and keepgoing) must be declared as either loop-carried"
+"   dependencies (both at the op inputs and output and at the body net input and"
+"   output) or scan_outputs."
+"3) Values created in the body cannot be accessed in the enclosing scope."
 ""
-"Note that the semantics of this op support "diagonal" or "wavefront" execution."
+"Note that the semantics of this op support \"diagonal\" or \"wavefront\" execution."
 "(See Step 3 here for an example:"
 "https://devblogs.nvidia.com/optimizing-recurrent-neural-networks-cudnn-5/)."
 "Frontends should emit multi-layer RNNs as a series of While operators (with"
@@ -2280,8 +2260,8 @@ ONNX Loop operation
 
 #### Operands:
 
-1. `M`: memref of any type values or tensor of any type values
-1. `cond`: memref of any type values or tensor of any type values
+1. `M`: memref of any type values or tensor of any type values or none type
+1. `cond`: memref of any type values or tensor of any type values or none type
 1. `v_initial`: memref of any type values or tensor of any type values
 
 #### Attributes:
@@ -2360,8 +2340,8 @@ ONNX MatMulInteger operation
 
 1. `A`: memref of any type values or tensor of any type values
 1. `B`: memref of any type values or tensor of any type values
-1. `a_zero_point`: memref of any type values or tensor of any type values
-1. `b_zero_point`: memref of any type values or tensor of any type values
+1. `a_zero_point`: memref of any type values or tensor of any type values or none type
+1. `b_zero_point`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -2444,7 +2424,7 @@ ONNX MaxPool operation
 " ```"
 " pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + ((kernel_spatial_shape[i] - 1) * dilations[i] + 1) - input_spatial_shape[i]"
 " ```"
-" The output of each pooling window is maximum number of elements exclude pad. "
+" The output of each pooling window is maximum number of elements exclude pad."
 " "
 
 #### Operands:
@@ -2466,7 +2446,7 @@ ONNX MaxPool operation
 #### Results:
 
 1. `Y`: memref of any type values or tensor of any type values
-1. `Indices`: memref of any type values or tensor of any type values
+1. `Indices`: memref of any type values or tensor of any type values or none type
 
 ### onnx.MaxPoolSingleOut (ONNXMaxPoolSingleOutOp)
 ONNX MaxPool operation with a single output.
@@ -2552,7 +2532,7 @@ ONNX MaxUnpool operation
 
 1. `X`: memref of any type values or tensor of any type values
 1. `I`: memref of any type values or tensor of any type values
-1. `output_shape`: memref of any type values or tensor of any type values
+1. `output_shape`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -2752,9 +2732,9 @@ ONNX NonMaxSuppression operation
 
 1. `boxes`: memref of any type values or tensor of any type values
 1. `scores`: memref of any type values or tensor of any type values
-1. `max_output_boxes_per_class`: memref of any type values or tensor of any type values
-1. `iou_threshold`: memref of any type values or tensor of any type values
-1. `score_threshold`: memref of any type values or tensor of any type values
+1. `max_output_boxes_per_class`: memref of any type values or tensor of any type values or none type
+1. `iou_threshold`: memref of any type values or tensor of any type values or none type
+1. `score_threshold`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -3041,7 +3021,7 @@ ONNX Pad operation
 
 1. `data`: memref of any type values or tensor of any type values
 1. `pads`: memref of any type values or tensor of any type values
-1. `constant_value`: memref of any type values or tensor of any type values
+1. `constant_value`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -3098,7 +3078,7 @@ ONNX QLinearConv operation
 1. `w_zero_point`: memref of any type values or tensor of any type values
 1. `y_scale`: memref of any type values or tensor of any type values
 1. `y_zero_point`: memref of any type values or tensor of any type values
-1. `B`: memref of any type values or tensor of any type values
+1. `B`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -3162,7 +3142,7 @@ ONNX QuantizeLinear operation
 
 1. `x`: memref of any type values or tensor of any type values
 1. `y_scale`: memref of any type values or tensor of any type values
-1. `y_zero_point`: memref of any type values or tensor of any type values
+1. `y_zero_point`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -3244,9 +3224,9 @@ ONNX RNN operation
 1. `X`: memref of any type values or tensor of any type values
 1. `W`: memref of any type values or tensor of any type values
 1. `R`: memref of any type values or tensor of any type values
-1. `B`: memref of any type values or tensor of any type values
-1. `sequence_lens`: memref of any type values or tensor of any type values
-1. `initial_h`: memref of any type values or tensor of any type values
+1. `B`: memref of any type values or tensor of any type values or none type
+1. `sequence_lens`: memref of any type values or tensor of any type values or none type
+1. `initial_h`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -3261,8 +3241,8 @@ ONNX RNN operation
 
 #### Results:
 
-1. `Y`: memref of any type values or tensor of any type values
-1. `Y_h`: memref of any type values or tensor of any type values
+1. `Y`: memref of any type values or tensor of any type values or none type
+1. `Y_h`: memref of any type values or tensor of any type values or none type
 
 ### onnx.RandomNormalLike (ONNXRandomNormalLikeOp)
 ONNX RandomNormalLike operation
@@ -3787,14 +3767,14 @@ ONNX Resize operation
 
 "Resize the input tensor. In general, it calculates every value in the output tensor as a weighted average of neighborhood (a.k.a. sampling locations) in the input tensor."
 "Each dimension value of the output tensor is:"
-"  output_dimension = floor(input_dimension * (roi_end - roi_start) * scale) if input \"sizes\" is not specified."
+"  output_dimension = floor(input_dimension * (roi_end - roi_start) * scale) if input \\"sizes\\" is not specified."
 
 #### Operands:
 
 1. `X`: memref of any type values or tensor of any type values
 1. `roi`: memref of any type values or tensor of any type values
 1. `scales`: memref of any type values or tensor of any type values
-1. `sizes`: memref of any type values or tensor of any type values
+1. `sizes`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -4412,7 +4392,7 @@ ONNX SequenceErase operation
 #### Operands:
 
 1. `input_sequence`: memref of any type values or tensor of any type values
-1. `position`: memref of any type values or tensor of any type values
+1. `position`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -4437,7 +4417,7 @@ ONNX SequenceInsert operation
 
 1. `input_sequence`: memref of any type values or tensor of any type values
 1. `tensor`: memref of any type values or tensor of any type values
-1. `position`: memref of any type values or tensor of any type values
+1. `position`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -4654,8 +4634,8 @@ ONNX Slice operation
 1. `data`: memref of any type values or tensor of any type values
 1. `starts`: memref of any type values or tensor of any type values
 1. `ends`: memref of any type values or tensor of any type values
-1. `axes`: memref of any type values or tensor of any type values
-1. `steps`: memref of any type values or tensor of any type values
+1. `axes`: memref of any type values or tensor of any type values or none type
+1. `steps`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -4808,7 +4788,7 @@ ONNX SplitToSequence operation
 #### Operands:
 
 1. `input`: memref of any type values or tensor of any type values
-1. `split`: memref of any type values or tensor of any type values
+1. `split`: memref of any type values or tensor of any type values or none type
 
 #### Attributes:
 
@@ -4876,9 +4856,9 @@ ONNX StringNormalizer operation
 "StringNormalization performs string operations for basic cleaning."
 "This operator has only one input (denoted by X) and only one output"
 "(denoted by Y). This operator first examines the elements in the X,"
-"and removes elements specified in "stopwords" attribute."
+"and removes elements specified in \"stopwords\" attribute."
 "After removing stop words, the intermediate result can be further lowercased,"
-"uppercased, or just returned depending the "case_change_action" attribute."
+"uppercased, or just returned depending the \"case_change_action\" attribute."
 "This operator only accepts [C]- and [1, C]-tensor."
 "If all elements in X are dropped, the output will be the empty value of string tensor with shape [1]"
 "if input shape is [C] and shape [1, 1] if input shape is [1, C]."
@@ -5008,8 +4988,8 @@ ONNX TfIdfVectorizer operation
 "respectively. An n-gram which cannot be found in pool_strings/pool_int64s should be ignored and has no effect on the output."
 "Note that we may consider all skips up to S when generating the n-grams."
 ""
-"The examples used above are true if mode is "TF". If mode is "IDF", all the counts larger than 1 would be truncated to 1 and"
-"the i-th element in weights would be used to scale (by multiplication) the count of the i-th n-gram in pool. If mode is "TFIDF","
+"The examples used above are true if mode is \"TF\". If mode is \"IDF\", all the counts larger than 1 would be truncated to 1 and"
+"the i-th element in weights would be used to scale (by multiplication) the count of the i-th n-gram in pool. If mode is \"TFIDF\","
 "this operator first computes the counts of all n-grams and then scale them by the associated values in the weights attribute."
 ""
 "Only one of pool_strings and pool_int64s can be set. If pool_int64s is set, the input should be an integer tensor."
@@ -5097,9 +5077,9 @@ ONNX TopK operation
 "   contains the indices of the top k elements (original indices from the input"
 "   tensor)."
 ""
-"If "largest" is 1 (the default value) then the k largest elements are returned."
-"If "sorted" is 1 (the default value) then the resulting k elements will be sorted."
-"If "sorted" is 0, order of returned 'Values' and 'Indices' are undefined."
+"If \"largest\" is 1 (the default value) then the k largest elements are returned."
+"If \"sorted\" is 1 (the default value) then the resulting k elements will be sorted."
+"If \"sorted\" is 0, order of returned 'Values' and 'Indices' are undefined."
 ""
 "Given two equivalent values, this operator uses the indices along the axis as"
 " a tiebreaker. That is, the element with the lower index will appear first."
@@ -5158,7 +5138,7 @@ ONNX Unique operation
 "This operator returns the unique values or sliced unique subtensors of the input tensor and three optional outputs. "
 "The first output tensor 'Y' contains all unique values or subtensors of the input. "
 "The second optional output tensor 'indices' contains indices of 'Y' elements' first occurance in 'X'.. "
-"The third optional output tensor 'inverse_indices' contains, for elements of 'X', its corresponding indices in 'Y'. ". "
+"The third optional output tensor 'inverse_indices' contains, for elements of 'X', its corresponding indices in 'Y'. \". "
 "The fourth optional output tensor 'counts' contains the count of each element of 'Y' in the input. "
 ""
 "Outputs are either sorted in ascending order or optionally in the order of the first occurrence of the values in the input. "
@@ -5242,9 +5222,9 @@ ONNX Unique operation
 #### Results:
 
 1. `Y`: memref of any type values or tensor of any type values
-1. `indices`: memref of any type values or tensor of any type values
-1. `inverse_indices`: memref of any type values or tensor of any type values
-1. `counts`: memref of any type values or tensor of any type values
+1. `indices`: memref of any type values or tensor of any type values or none type
+1. `inverse_indices`: memref of any type values or tensor of any type values or none type
+1. `counts`: memref of any type values or tensor of any type values or none type
 
 ### onnx.Unsqueeze (ONNXUnsqueezeOp)
 ONNX Unsqueeze operation
diff --git a/doc/gen_doc.py b/doc/gen_doc.py
index d42eb27..1c593a5 100644
--- a/doc/gen_doc.py
+++ b/doc/gen_doc.py
@@ -4,10 +4,11 @@ from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals
 
-from collections import defaultdict
+from collections import defaultdict, OrderedDict
 import io
 import os
 import sys
+import datetime
 
 import numpy as np  # type: ignore
 
@@ -17,59 +18,53 @@ from onnx.backend.test.case import collect_snippets
 from onnx.backend.sample.ops import collect_sample_implementations
 from typing import Any, Text, Sequence, Dict, List, Type, Set, Tuple
 
-
-#controls on ONNF code gen
-#specify attr default value 
+# Manual specification of attribute defaults.
 special_attr_defaults = dict([
-#        ("AveragePool "+"kernel_shape", ('ints', '{}')),
-#        ("MaxPool "+"kernel_shape", ('ints', '{}')),
-#        ("Cast "+"to", ('int', '0')),
-#        ("Concat "+"axis", ('int', '0')),
-#        ("Conv "+"group", ('int', '1')),
-#        ("Unsqueeze "+"axes", ('ints', '{}')),
-#        ("RNN "+"activation_alpha", ('floats', '{}')),
-#        ("RNN "+"activation_beta", ('floats', '{}')),
-        ])
+    # ("AveragePool.kernel_shape", ('ints', '{}')),
+    # ("MaxPool.kernel_shape", ('ints', '{}')),
+    # ("Cast.to", ('int', '0')),
+    # ("Concat.axis", ('int', '0')),
+    # ("Conv.group", ('int', '1')),
+    # ("Unsqueeze.axes", ('ints', '{}')),
+    # ("RNN.activation_alpha", ('floats', '{}')),
+    # ("RNN.activation_beta", ('floats', '{}')),
+])
 
-#specify the function name in src/builder/frontend_dialect_transformer.cpp
-#the reason for Conv and MaPool is to handled optional arguments
+# Special operation importing handlers.
 special_op_handler = dict([
-        ("Conv", "ImportNodeConv"),
-        ("MaxPool", "ImportNodeMaxPool"),
-        ("BatchNormalization", "ImportNodeBatchNormalization"),
-        ("Gemm", "ImportNodeGemm"),
-        ("Pad", "ImportNodePad"),
-        #("Transpose", "ImportNodeTranspose")
-        ])
+    ("Conv", "ImportNodeConv"),
+    ("MaxPool", "ImportNodeMaxPool"),
+    ("BatchNormalization", "ImportNodeBatchNormalization"),
+    ("Pad", "ImportNodePad"),
+    #("Transpose", "ImportNodeTranspose")
+])
 
-#add an Op in this list if ShapeInterference is defined for this Op
-ShapeInferenceList=['Exp', 'Tanh', 'Sinh', 'Cosh', 'Sigmoid', 'Relu',
-                   'Add', 'Mul', 'Div', 'Sub', 'And', 'Or', 'Xor',
-                   'Sum', 'Max', 'Min', 'MatMul', 'Gemm', 'LeakyRelu',
-                   'Elu', 'Selu', 'HardSigmoid', 'Reshape', 'Reciprocal',
-                   'Identity', 'Cos', 'Log', 'Transpose', 'Softmax',
-                   'ReduceMax', 'ReduceMin', 'ReduceProd', 'ReduceSum',
-                   'Softplus', 'Softsign', 'Sqrt', 'Unsqueeze', 'Sign']
+# Operations supporting shape inference.
+OpsWithShapeInference = [
+    'Exp', 'Tanh', 'Sinh', 'Cosh', 'Sigmoid', 'Relu', 'Add', 'Mul', 'Div',
+    'Sub', 'And', 'Or', 'Xor', 'Sum', 'Max', 'Min', 'MatMul', 'Gemm',
+    'LeakyRelu', 'Elu', 'Selu', 'HardSigmoid', 'Reshape', 'Reciprocal',
+    'Identity', 'Cos', 'Log', 'Transpose', 'Softmax', 'ReduceMax', 'ReduceMin',
+    'ReduceProd', 'ReduceSum', 'Softplus', 'Softsign', 'Sqrt', 'Unsqueeze',
+    'Sign'
+]
 
-CanonicalList=['Add', 'Identity', 'ReduceL1', 'ReduceL2', 'ReduceLogSum',
-               'ReduceLogSumExp', 'ReduceSumSquare']
+# Operations supporting canonicalization.
+OpsWithCanonicalizer = [
+    'Add', 'Identity', 'ReduceL1', 'ReduceL2', 'ReduceLogSum',
+    'ReduceLogSumExp', 'ReduceSumSquare', 'Gemm'
+]
 
-#add an Op in this list if the Op needs result type deduction which is required
-#when writing declarative rewriting rules. Deduced type is always
-#an UnrankedTensorType whose element type is the same as the first operand's
-#element type.
-#currenlty, there are only two build methods generated:
-# - one with operands and attributes having a separate parameter, and
-# - one with operands and attributes having aggregated parameters.
+# Add an Op in this list if the Op needs result type deduction which is required
+# when writing declarative rewriting rules. Deduced type is always
+# an UnrankedTensorType whose element type is the same as the first operand's
+# element type.
+#
+# Currenlty, there are only two build methods generated:
+#  - one with operands and attributes having a separate parameter, and
+#  - one with operands and attributes having aggregated parameters.
 custom_builder_ops_list = ['Abs', 'Mul', 'Exp', 'ReduceSum', 'ReduceSumSquare']
 
-manual_code_in_op_def = dict([
-      ('DummyExample', '  let extraClassDeclaration = [{ \n'+
-                    '    static StringRef getPermAttrName() { return "perm"; }\n'+
-                    '    }];\n')
-      ])
-
-
 SNIPPETS = collect_snippets()
 SAMPLE_IMPLEMENTATIONS = collect_sample_implementations()
 ONNX_ML = not bool(os.getenv('ONNX_ML') == '0')
@@ -77,19 +72,12 @@ ONNX_ML = not bool(os.getenv('ONNX_ML') == '0')
 ONNX_ML = False
 print("ONNX_ML", ONNX_ML)
 
-
 if ONNX_ML:
     ext = '-ml.md'
 else:
     ext = '.md'
 
 
-def display_number(v):  # type: (int) -> Text
-    if defs.OpSchema.is_infinite(v):
-        return '&#8734;'
-    return Text(v)
-
-
 def should_render_domain(domain):  # type: (Text) -> bool
     if domain == ONNX_ML_DOMAIN and not ONNX_ML:
         return False
@@ -98,13 +86,6 @@ def should_render_domain(domain):  # type: (Text) -> bool
     return True
 
 
-def format_name_with_domain(domain, schema_name):  # type: (Text, Text) -> Text
-    if domain:
-        return '{}.{}'.format(domain, schema_name)
-    else:
-        return schema_name
-
-
 def display_attr_type(v):  # type: (OpSchema.AttrType) -> Text
     assert isinstance(v, OpSchema.AttrType)
     s = Text(v)
@@ -114,354 +95,315 @@ def display_attr_type(v):  # type: (OpSchema.AttrType) -> Text
     return s
 
 
-def display_domain(domain):  # type: (Text) -> Text
-    if domain:
-        return "the '{}' operator set".format(domain)
-    else:
-        return "the default ONNX operator set"
-
-
-def display_domain_short(domain):  # type: (Text) -> Text
-    if domain:
-        return domain
-    else:
-        return 'ai.onnx (default)'
-
-
-def display_version_link(name, version):  # type: (Text, int) -> Text
-    changelog_md = 'Changelog' + ext
-    name_with_ver = '{}-{}'.format(name, version)
-    return '<a href="{}#{}">{}</a>'.format(changelog_md, name_with_ver, name_with_ver)
-
 def get_unique_output_name(schema, name):
-    for input in schema.inputs :
-        if input.name == name :
-            return 'out_'+name
+    for input in schema.inputs:
+        if input.name == name:
+            return 'out_' + name
     return name
 
-def display_schema(schema, versions):  # type: (OpSchema, Sequence[OpSchema]) -> Text
-    s = ''
 
-    # doc
-    if schema.doc:
-        s += '\n'
-        s += '\n'.join('  ' + line
-                       for line in schema.doc.lstrip().splitlines())
-        s += '\n'
+def onnx_attr_type_to_mlir_attr_type(t):
+    onnx_attr_type = Text(t)
+    onnx_attr_type = onnx_attr_type[onnx_attr_type.rfind('.') + 1:].lower()
 
-    # since version
-    s += '\n#### Version\n'
-    if schema.support_level == OpSchema.SupportType.EXPERIMENTAL:
-        s += '\nNo versioning maintained for experimental ops.'
+    if onnx_attr_type == 'int':
+        mlir_attr_type = 'I64Attr'
+    elif onnx_attr_type == 'float':
+        mlir_attr_type = 'F32Attr'
+    elif onnx_attr_type == 'ints':
+        mlir_attr_type = 'I64ArrayAttr'
+    elif onnx_attr_type == 'floats':
+        mlir_attr_type = 'F32ArrayAttr'
+    elif onnx_attr_type == "string":
+        mlir_attr_type = 'StrAttr'
+    elif onnx_attr_type == "strings":
+        mlir_attr_type = 'StrArrayAttr'
     else:
-        s += '\nThis version of the operator has been ' + ('deprecated' if schema.deprecated else 'available') + ' since version {}'.format(schema.since_version)
-        s += ' of {}.\n'.format(display_domain(schema.domain))
-        if len(versions) > 1:
-            # TODO: link to the Changelog.md
-            s += '\nOther versions of this operator: {}\n'.format(
-                ', '.join(display_version_link(format_name_with_domain(v.domain, v.name),
-                                               v.since_version) for v in versions[:-1]))
-
-    # If this schema is deprecated, don't display any of the following sections
-    if schema.deprecated:
-        return s
-
-    # attributes
-    if schema.attributes:
-        s += '\n#### Attributes\n\n'
-        s += '<dl>\n'
-        for _, attr in sorted(schema.attributes.items()):
-            # option holds either required or default value
-            opt = ''
-            if attr.required:
-                opt = 'required'
-            elif attr.default_value.name:
-                default_value = helper.get_attribute_value(attr.default_value)
-
-                def format_value(value):  # type: (Any) -> Text
-                    if isinstance(value, float):
-                        formatted = str(np.round(value, 5))
-                        # use default formatting, unless too long.
-                        if (len(formatted) > 10):
-                            formatted = str("({:e})".format(value))
-                        return formatted
-                    elif isinstance(value, (bytes, bytearray)) and sys.version_info[0] == 3:
-                        return str(value.decode('utf-8'))
-                    return str(value)
-
-                if isinstance(default_value, list):
-                    default_value = [format_value(val) for val in default_value]
-                else:
-                    default_value = format_value(default_value)
-                opt = 'default is {}'.format(default_value)
-
-            s += '<dt><tt>{}</tt> : {}{}</dt>\n'.format(
-                attr.name,
-                display_attr_type(attr.type),
-                ' ({})'.format(opt) if opt else '')
-            s += '<dd>{}</dd>\n'.format(attr.description)
-        s += '</dl>\n'
-
-    # inputs
-    s += '\n#### Inputs'
-    if schema.min_input != schema.max_input:
-        s += ' ({} - {})'.format(display_number(schema.min_input),
-                                 display_number(schema.max_input))
-    s += '\n\n'
-    if schema.inputs:
-        s += '<dl>\n'
-        for input in schema.inputs:
-            option_str = ""
-            if OpSchema.FormalParameterOption.Optional == input.option:
-                option_str = " (optional)"
-            elif OpSchema.FormalParameterOption.Variadic == input.option:
-                if input.isHomogeneous:
-                    option_str = " (variadic)"
-                else:
-                    option_str = " (variadic, heterogeneous)"
-            s += '<dt><tt>{}</tt>{} : {}</dt>\n'.format(input.name, option_str, input.typeStr)
-            s += '<dd>{}</dd>\n'.format(input.description)
-        s += '</dl>\n'
-
-    # outputs
-    s += '\n#### Outputs'
-    if schema.min_output != schema.max_output:
-        s += ' ({} - {})'.format(display_number(schema.min_output),
-                                 display_number(schema.max_output))
-    s += '\n\n'
-
-    if schema.outputs:
-        s += '<dl>\n'
-        for output in schema.outputs:
-            option_str = ""
-            if OpSchema.FormalParameterOption.Optional == output.option:
-                option_str = " (optional)"
-            elif OpSchema.FormalParameterOption.Variadic == output.option:
-                if output.isHomogeneous:
-                    option_str = " (variadic)"
-                else:
-                    option_str = " (variadic, heterogeneous)"
-            s += '<dt><tt>{}</tt>{} : {}</dt>\n'.format(get_unique_output_name(schema, output.name), option_str, output.typeStr)
-            s += '<dd>{}</dd>\n'.format(output.description)
-        s += '</dl>\n'
-
-    # type constraints
-    s += '\n#### Type Constraints'
-    s += '\n\n'
-    if schema.type_constraints:
-        s += '<dl>\n'
-        for type_constraint in schema.type_constraints:
-            allowedTypes = type_constraint.allowed_type_strs
-            if (len(allowedTypes) > 0):
-                allowedTypeStr = allowedTypes[0]
-            for allowedType in allowedTypes[1:]:
-                allowedTypeStr += ', ' + allowedType
-            s += '<dt><tt>{}</tt> : {}</dt>\n'.format(
-                type_constraint.type_param_str, allowedTypeStr)
-            s += '<dd>{}</dd>\n'.format(type_constraint.description)
-        s += '</dl>\n'
-
-    # Function Body
-    if schema.has_function:  # type: ignore
-        s += '\n#### Function\n'
-        s += '\nThe Function can be represented as a function.\n'
-
-    return s
+        mlir_attr_type = 'AnyAttr'
+    #TODO: tensor and sparse tensor
+    return mlir_attr_type
 
 
-def support_level_str(level):  # type: (OpSchema.SupportType) -> Text
-    return \
-        "<sub>experimental</sub> " if level == OpSchema.SupportType.EXPERIMENTAL else ""
+#TODO: any better way to do this.
+def tblgen_attr_type_to_cpp_type(t):
+    if 'I64Attr' in t:
+        cpp_type = 'IntegerAttr'
+    elif 'F32Attr' in t:
+        cpp_type = 'FloatAttr'
+    elif 'I64ArrayAttr' in t or 'F32ArrayAttr' in t:
+        cpp_type = 'ArrayAttr'
+    elif 'StrAttr' in t:
+        cpp_type = 'StringAttr'
+    elif 'strings' in t:
+        cpp_type = 'ArrayAttr'
+    else:
+        cpp_type = 'Attribute'
+    return cpp_type
 
-def convert_type(tstr) :
-    tfrom = np.array(['bool', 'int8', 'int16', 'int32', 'int64',
-            'unkown', 'float16', 'float', 'double'])
-    tto =np.array(['I1', 'I8', 'I16', 'I32', 'I64',
-         'BF16', 'F16', 'F32', 'F64'])
+
+def tblgen_operand_type_to_cpp_type(op_type):
+    if op_type.startswith('Variadic'):
+        mytype = 'ValueRange'
+    else:
+        mytype = 'Value'
+    return mytype
+
+
+def np_type_to_tblgen_attr_type(tstr):
+    tfrom = np.array([
+        'bool', 'int8', 'int16', 'int32', 'int64', 'unkown', 'float16',
+        'float', 'double'
+    ])
+    tto = np.array(
+        ['I1', 'I8', 'I16', 'I32', 'I64', 'BF16', 'F16', 'F32', 'F64'])
     index = -1
-    for i in range(len(tfrom)) :
-        if tfrom[i] in tstr :
+    for i in range(len(tfrom)):
+        if tfrom[i] in tstr:
             index = i
             break
-    if index == -1 :
+    if index == -1:
         print("error", tstr)
         return ''
-    else :
+    else:
         return tto[i]
 
-def  collect_types(schema, input) :
-    allowedTypeStr=''
-    #first step just ignore the type constraints
-    return allowedTypeStr
-    if input.typeStr :
-        tstr = input.typeStr
-    else :
-        return allwedTypeStr
-    if schema.type_constraints:
-        for type_constraint in schema.type_constraints:
-            if type_constraint.type_param_str != tstr :
-                continue
-            allowedTypes = type_constraint.allowed_type_strs
-            allowedTypeStr=''
-            if (len(allowedTypes) > 0):
-                t = convert_type(allowedTypes[0])
-                if t == '' :
-                    return ''
-                allowedTypeStr += t
-            for allowedType in allowedTypes[1:]:
-                t = convert_type(allowedType)
-                if t == '' :
-                    return ''
-                if  not t in allowedTypeStr :
-                    allowedTypeStr += ', '+t
 
-            return allowedTypeStr
+def get_allowed_elem_types(schema, input):
+    allowed_types_str = None
+    return allowed_types_str
+    # TODO: enable type constraints.
+    # if input.typeStr :
+    #     tstr = input.typeStr
+    # else :
+    #     return allwedTypeStr
+    # if schema.type_constraints:
+    #     for type_constraint in schema.type_constraints:
+    #         if type_constraint.type_param_str != tstr :
+    #             continue
+    #         allowedTypes = type_constraint.allowed_type_strs
+    #         allowedTypeStr=''
+    #         if (len(allowedTypes) > 0):
+    #             t = convert_type(allowedTypes[0])
+    #             if t == '' :
+    #                 return ''
+    #             allowedTypeStr += t
+    #         for allowedType in allowedTypes[1:]:
+    #             t = convert_type(allowedType)
+    #             if t == '' :
+    #                 return ''
+    #             if  not t in allowedTypeStr :
+    #                 allowedTypeStr += ', '+t
+    #
+    #         return allowedTypeStr
+    #
+    # return allowedTypeStr
 
-    return allowedTypeStr
 
-def gen_schema(schema) :
-    line_indent = '  '
+def inc_indent(indent=None):
+    return "" if indent is None else indent + ' ' * 2
 
-    #s = 'def ONNX'+schema.name+str(schema.since_version)+'Op:ONNX_Op<"'+schema.name+'", \n'
-    s = 'def ONNX'+schema.name+'Op:ONNX_Op<"'+schema.name+'", \n'
-    s += line_indent+'  [NoSideEffect'
-    if schema.name in ShapeInferenceList :
-        s+= ', DeclareOpInterfaceMethods<ShapeInferenceOpInterface>'
-    s += ']> {'
 
-    if schema.name in CanonicalList:
-        s += '\n'+line_indent+'let hasCanonicalizer = 1;'
+def dec_indent(indent):
+    return indent[:-2]
 
-    #summary
-    s += '\n'+line_indent
-    s += 'let summary = "ONNX '+schema.name+' operation";'
 
-    #description
-    s += '\n'+line_indent
-    s += 'let description = [{'
-    if schema.doc:
-        """
-        s += '\n'.join(line_indent + line
-                   for line in schema.doc.lstrip().splitlines())
-        """
-        for line in schema.doc.lstrip().splitlines():
-            line = line.replace('}]', '\}\]')
-            s += '\n'+line_indent+'  '+'"'+line+'"'
-    else :
-        s += '\n'+line_indent*2 +'no doc for this op from onnx'
-    s += '\n'+line_indent+'}];'
+def join_args(args):
+    return ", ".join(args)
 
-    #input
-    s+= '\n'+line_indent+'let arguments = (ins '
-    isfirst = True
-    # add operands
-    operand_ins = get_operand_ins(schema)
-    for operand_type, operand_name in operand_ins:
-        if not isfirst:
-            s+= ',\n           '
+
+def get_operands_or_results(schema, is_input):
+    value_list = schema.inputs if is_input else schema.outputs
+    if not value_list:
+        return OrderedDict()
+
+    def any_type_of(types):
+        assert isinstance(types, list)
+        if len(types) == 1:
+            return types[0]
         else:
-            isfirst = False
-        s+=operand_type+':$'+operand_name
+            return "AnyTypeOf<[{}]>".format(", ".join(types))
 
-    # add attributes
-    attr_ins = get_attr_ins(schema)
-    for attr_type, attr_name in attr_ins:
-        if not isfirst:
-            s += ',\n           '
-        else :
-            isfirst = False
-        s += attr_type+':$'+attr_name
-    s+= ');'
+    name_to_types = OrderedDict()
+    for value in value_list:
+        elem_types = get_allowed_elem_types(schema, value)
 
-    #output
-    s+= '\n'+line_indent+'let results = (outs '
-    if schema.outputs:
-        for output in schema.outputs:
-            if output != schema.outputs[0] :
-                s+= ',\n           '
-            #need to interpret output.typeStr
-            etypes=collect_types(schema, output)
-            if etypes == '':
-                s+= 'AnyTypeOf<[AnyMemRef, AnyTensor]>'
+        if elem_types is None:
+            types = ["AnyMemRef", "AnyTensor"]
+        else:
+            types = ["TensorOf<[{}]>", "MemRefOf<[{}]>"]
+            types = list(map(lambda x: x.format(elem_types), types))
+
+        if OpSchema.FormalParameterOption.Optional == value.option:
+            types.append("NoneType")
+        elif OpSchema.FormalParameterOption.Variadic == value.option:
+            if value.isHomogeneous:
+                types = ["Variadic<{}>".format(any_type_of(types))]
             else:
-                s+= 'TensorOf<['+etypes+']>'
-            s += ':$'+get_unique_output_name(schema, output.name)
-    s+= ');\n'
+                #TODO handle(variadic, heterogeneous) "
+                print("warning: (variadic, heterogeneous) for" + schema.name +
+                      ' ' + value.name)
 
-    #s+= 'let hasCanonicalizer = 1;'
-
-    #TODO: any better way to do this.
-    def get_attr_type_for_builder(attr_type) :
-        if 'I64Attr' in attr_type :
-            mytype = 'IntegerAttr'
-        elif 'F32Attr' in attr_type :
-            mytype = 'FloatAttr'
-        elif 'I64ArrayAttr' in attr_type or 'F32ArrayAttr' in attr_type:
-            mytype = 'ArrayAttr'
-        elif 'StrAttr' in attr_type :
-            mytype = 'StringAttr'
-        elif 'strings' in attr_type :
-            mytype = 'ArrayAttr'
-        else :
-            mytype ='Attribute'
-        return mytype
-
-    def get_op_type_for_builder(op_type):
-        if op_type.startswith('Variadic'):
-            mytype = 'ValueRange'
+        # Since output name can coincide with that of an input, we explicitly
+        # append a suffix "_out" to such names for disambiguation.
+        if is_input:
+            value_name = value.name
         else:
-            mytype = 'Value'
-        return mytype
+            value_name = get_unique_output_name(schema, value.name)
+
+        name_to_types[value_name] = any_type_of(types)
+    return name_to_types
+
+
+def get_attrs(schema):
+    def get_attr_type_optional(attr_type):
+        return 'OptionalAttr<{}>'.format(
+            onnx_attr_type_to_mlir_attr_type(attr_type))
+
+    def get_attr_type_with_default(attr_type, attr_default):
+        return 'DefaultValuedAttr<{}, "{}">'.format(
+            onnx_attr_type_to_mlir_attr_type(attr_type), attr_default)
+
+    if not schema.attributes:
+        return OrderedDict()
+
+    name_to_type = OrderedDict()
+    for _, attr in sorted(schema.attributes.items()):
+        qualified_attr_name = "{}.{}".format(schema.name, attr.name)
+        if qualified_attr_name in special_attr_defaults:
+            name_to_type[attr.name] = get_attr_type_with_default(
+                *special_attr_defaults[qualified_attr_name])
+
+        # option holds either required or default value
+        elif attr.required:
+            name_to_type[attr.name] = onnx_attr_type_to_mlir_attr_type(
+                attr.type)
+        elif attr.default_value.name:
+
+            def format_value(value):  # type: (Any) -> Text
+                if isinstance(value, float):
+                    formatted = str(np.round(value, 5))
+                    # use default formatting, unless too long.
+                    if (len(formatted) > 10):
+                        formatted = str("({:e})".format(value))
+                    return formatted
+                elif isinstance(
+                        value,
+                    (bytes, bytearray)) and sys.version_info[0] == 3:
+                    return str(value.decode('utf-8'))
+                return str(value)
+
+            default_value = helper.get_attribute_value(attr.default_value)
+            if isinstance(default_value, list):
+                default_value = [format_value(val) for val in default_value]
+                default_value_str = '{}'.format(default_value)
+                default_value_str = default_value_str.replace('[', '{', 1)
+                default_value_str = default_value_str.replace(']', '}', 1)
+                if Text(attr.type) == "AttrType.STRINGS":
+                    default_value_str = default_value_str.replace("'", '\\"')
+                else:
+                    default_value_str = default_value_str.replace("'", '')
+            else:
+                default_value = format_value(default_value)
+                default_value_str = default_value
+
+            name_to_type[attr.name] = get_attr_type_with_default(
+                attr.type, default_value_str)
+        else:
+            name_to_type[attr.name] = get_attr_type_optional(attr.type)
+    return name_to_type
+
+
+def gen_op_def(schema):
+    indent = inc_indent()
+    s = 'def ONNX{0}Op:ONNX_Op<"{0}",\n'.format(schema.name)
+
+    # Generate decl for op traits.
+    traits = ["NoSideEffect"]
+    if schema.name in OpsWithShapeInference:
+        traits.append("DeclareOpInterfaceMethods<ShapeInferenceOpInterface>")
+    s += inc_indent(indent) + '[{}]> {{\n'.format(join_args(traits))
+
+    # Generate decl for canonicalizer.
+    indent = inc_indent(indent)
+    if schema.name in OpsWithCanonicalizer:
+        s += indent + 'let hasCanonicalizer = 1;\n'
+
+    # Generate decl for summary.
+    s += indent + 'let summary = "ONNX {} operation";\n'.format(schema.name)
+
+    # Generate description.
+    s += indent + 'let description = [{\n'
+    if schema.doc:
+        lines = schema.doc.lstrip().splitlines()
+        for line in lines:
+            escaped_line = line.replace('"', '\\"')\
+                               .replace('}]', '\\}\\]')
+            s += indent + '"{}"\n'.format(escaped_line)
+    s += indent + '}];\n'
+
+    # Generate ins (consisting of operands and attributes).
+    ins = get_operands_or_results(schema, is_input=True)
+    ins.update(get_attrs(schema))
+    ins_strs = ["{1}:${0}".format(*i) for i in ins.items()]
+    s += indent + 'let arguments = (ins {});\n'.format(
+        (',\n' + inc_indent(indent)).join(ins_strs))
+
+    # Generate outs (operation results).
+    outs = get_operands_or_results(schema, is_input=False)
+    outs_strs = ["{1}:${0}".format(*i) for i in outs.items()]
+    s += indent + 'let results = (outs {});\n'.format(
+        (',\n' + inc_indent(indent)).join(outs_strs))
 
     # add custom builders
     # use element type of the first operand to construct an UnrankedTensorType for the output.
     if schema.name in custom_builder_ops_list:
-        if len(operand_ins) == 0:
-            print("warning: not generate custom build methods for " + schema.name + " since it does not have operands.")
+        if len(ins) == 0:
+            raise RuntimeWarning(
+                "warning: not generate custom build methods for " +
+                schema.name + " since it does not have operands.")
         else:
-            if get_op_type_for_builder(operand_ins[0][0]) == 'ValueRange':
-                first_operand = operand_ins[0][1]+'[0]'
-            else:
-                first_operand = operand_ins[0][1]
-
-            s += line_indent+'let builders = [\n'
-
-            # custom builders with operands and attributes having a seperate parameter.
+            s += indent + 'let builders = [\n'
+            # Custom builders with operands and attributes having a seperate parameter.
             # E.g. OpBuilder<"Builder *builder, OperationState &state, Value X, Value, Y, Attribute A", [{}]>
-            s += line_indent*2+'OpBuilder<"Builder *builder, OperationState &state'
-            for arg_type, arg_name in operand_ins:
-                s += ', '+get_op_type_for_builder(arg_type)+' '+arg_name
-            for attr_type, attr_name in attr_ins:
-                s += ', '+get_attr_type_for_builder(attr_type)+' '+attr_name
+            indent = inc_indent(indent)
+            s += indent + 'OpBuilder<"Builder *builder, OperationState &state'
+            operands_dict = get_operands_or_results(schema, is_input=True)
+            for name, ty in operands_dict.items():
+                s += ', {} {}'.format(tblgen_operand_type_to_cpp_type(ty),
+                                      name)
+            for name, ty in get_attrs(schema).items():
+                s += ', {} {}'.format(tblgen_attr_type_to_cpp_type(ty), name)
             s += '", [{\n'
-            s += line_indent*3+'auto elementType = '+first_operand+'.getType().cast<TensorType>().getElementType();\n'
-            s += line_indent*3+'build(builder, state, UnrankedTensorType::get(elementType)'
-            for _, arg_name in operand_ins:
-                s += ', '+arg_name
-            for _, attr_name in attr_ins:
-                s += ', '+attr_name
+            indent = inc_indent(indent)
+
+            # Get output type from first operand's type.
+            first_operand_name = list(ins.items())[0][0]
+            s += indent + 'auto elementType = {}.getType().cast<TensorType>().getElementType();\n'.format(
+                first_operand_name)
+            s += indent + 'build(builder, state, UnrankedTensorType::get(elementType)'
+            for name, _ in ins.items():
+                s += ', ' + name
             s += ');\n'
-            s += line_indent*2+'}]>,\n'
+            indent = dec_indent(indent)
+            s += indent + '}]>,\n'
 
-            # custom builders with all operands and attributes having aggregate parameters.
+            # Custom builders with all operands and attributes having aggregate parameters.
             # E.g. OpBuilder<"Builder *builder, OperationState &state, ValueRange operands, ArrayRef<NamedAttribute> attributes", [{}]>'
-            s += line_indent*2+'OpBuilder<"Builder *builder, OperationState &state, ValueRange operands, ArrayRef<NamedAttribute> attributes", [{\n'
-            s += line_indent*3+'auto elementType = '+first_operand+'.getType().cast<TensorType>().getElementType();\n'
-            s += line_indent*3+'std::vector<mlir::Type> outputTypes;\n'
-            s += line_indent*3+'outputTypes.emplace_back(UnrankedTensorType::get(elementType));\n'
-            s += line_indent*3+'build(builder, state, outputTypes, operands, attributes);\n'
-            s += line_indent*2+'}]>'
+            s += indent + 'OpBuilder<"Builder *builder, OperationState &state, ValueRange operands, ArrayRef<NamedAttribute> attributes", [{\n'
+            indent = inc_indent(indent)
+            s += indent + 'auto elementType = operands[0].getType().cast<TensorType>().getElementType();\n'
+            s += indent + 'std::vector<mlir::Type> outputTypes;\n'
+            s += indent + 'outputTypes.emplace_back(UnrankedTensorType::get(elementType));\n'
+            s += indent + 'build(builder, state, outputTypes, operands, attributes);\n'
+            indent = dec_indent(indent)
+            s += indent + '}]>'
 
-            s += '\n'+line_indent+'];\n'
-
-    #add special code
-    if schema.name in manual_code_in_op_def :
-        s += manual_code_in_op_def[schema.name]
+            s += '\n' + indent + '];\n'
 
     s += '}\n\n'
-
     return s
 
+
 """
 special cases:
 * Split: attr split default value: sizeof(output1) namely 1
@@ -470,328 +412,101 @@ special cases:
 * Transpose: attr perm default value is {} empty int list
 """
 
-def gen_code(schema,fefile) :
 
-    handle_variadic = False
+def gen_op_importer(schema, file):
+    indent = inc_indent()
+    s = indent + 'if (opName == "' + schema.name + '")\n'
 
-    line_indent = '  '
-    fefile.write('    '+'}else if (OpName == "'+schema.name+'") {\n')
-    op_type_str='mlir::ONNX'+schema.name+'Op'
-    if schema.name in special_op_handler :
-        fefile.write('       '+special_op_handler[schema.name]+'(node, '
-          +str(len(schema.inputs))
-          +', ' +str(len(schema.outputs)))
-    elif len(schema.outputs) > 1 :
-        fefile.write('       '+'ImportNodeMultipleOuts<'+op_type_str+'>(node, '
-          +str(len(schema.inputs))
-          +', ' +str(len(schema.outputs)))
-    else :
-        fefile.write('       '+'ImportNodeOneOut<'+op_type_str+'>(node, '
-          +str(len(schema.inputs))
-          +', ' +str(len(schema.outputs)))
-
-    variadicIn = 'false'
-    variadicOut = 'false'
+    expected_num_operands = len(schema.inputs)
+    expected_num_results = len(schema.outputs)
     for input in schema.inputs:
         if OpSchema.FormalParameterOption.Variadic == input.option:
-            if input.isHomogeneous:
-                variadicIn = 'true'
-                handle_variadic = True
+            expected_num_operands = -1
     for output in schema.outputs:
         if OpSchema.FormalParameterOption.Variadic == output.option:
-            if output.isHomogeneous:
-                variadicOut = 'true'
-    if not handle_variadic:
-        fefile.write(');\n')
-    else:
-        fefile.write(', '+variadicIn+', '+variadicOut+');\n')
+            expected_num_results = -1
 
-def get_operand_ins(schema):
-    operand_type_and_name_list = []  # [(optype, opname)]
-    if schema.inputs:
-        for input in schema.inputs:
-            optype = ""
+    handler_func = special_op_handler.get(
+        schema.name, "buildOperation<mlir::ONNX{}Op>".format(schema.name))
 
-            etypes=collect_types(schema, input)
+    # Special handlers currently require expected num operands/results to be specified.
+    # TODO: remove special handlers.
+    args = ["node"]
+    if expected_num_operands != -1 or expected_num_results != -1 or "buildOperation" not in handler_func:
+        args.append(
+            "/* expected_num_operands = */ {}".format(expected_num_operands))
+        args.append(
+            '/* expected_num_results = */ {}'.format(expected_num_results))
+    s += inc_indent(indent) + "return {}({});\n".format(
+        handler_func, ", ".join(args))
 
-            if OpSchema.FormalParameterOption.Optional == input.option:
-                #TODO : handle optional
-                print("warning: optional input for"+schema.name+' '+input.name)
-            elif OpSchema.FormalParameterOption.Variadic == input.option:
-                if input.isHomogeneous:
-                    optype += 'Variadic<'
-                else:
-                    #TODO handle(variadic, heterogeneous) "
-                    print("warning: (variadic, heterogeneous) for"+schema.name+' '+input.name)
-            if etypes == '':
-                optype += 'AnyTypeOf<[AnyMemRef, AnyTensor]>'
-            else:
-                optype += 'TensorOf<['+etypes+']>'
+    file.write(s)
 
-            if OpSchema.FormalParameterOption.Optional == input.option:
-                #TODO : handle optional
-                t=''
-            elif OpSchema.FormalParameterOption.Variadic == input.option:
-                if input.isHomogeneous:
-                    optype += '>'
-                else:
-                    #TODO handle(variadic, heterogeneous) "
-                    t=''
-            operand_type_and_name_list.append((optype, input.name))
-    return operand_type_and_name_list
 
-def get_attr_ins(schema) :
-    
-    def get_attr_type_basic(attr_type) :
-        if attr_type == 'int' :
-            mytype = 'I64Attr'
-        elif attr_type == 'float' :
-            mytype = 'F32Attr'
-        elif attr_type == 'ints' :
-            mytype = 'I64ArrayAttr'
-        elif attr_type == 'floats' :
-            mytype = 'F32ArrayAttr'
-        elif attr_type == "string" :
-            mytype = 'StrAttr'
-        elif attr_type == "strings" :
-            mytype = 'StrArrayAttr'
-        else :
-            mytype ='AnyAttr'
-        #TODO: tensor and sparse tensor
-        return mytype
+def build_operator_schemas():
+    # domain -> support level -> name -> [schema]
+    index = defaultdict(lambda: defaultdict(lambda: defaultdict(
+        list)))  # type: Dict[Text, Dict[int, Dict[Text, List[OpSchema]]]]
+    for schema in defs.get_all_schemas_with_history():
+        index[schema.domain][int(
+            schema.support_level)][schema.name].append(schema)
 
-    def get_attr_type_optional(attr_type) :
-        mytype = 'OptionalAttr<'
-        mytype += get_attr_type_basic(attr_type)
-        mytype += '>'
-        return mytype
+    # Preprocess the Operator Schemas
+    # [(domain, [(support_level, [(schema name, current schema, all versions schemas)])])]
+    operator_schemas = list(
+    )  # type: List[Tuple[Text, List[Tuple[int, List[Tuple[Text, OpSchema, List[OpSchema]]]]]]]
+    exsting_ops = set()  # type: Set[Text]
+    for domain, _supportmap in sorted(index.items()):
+        if not should_render_domain(domain):
+            continue
 
-    def get_attr_type_with_default(attr_type, attr_default) :
-        mytype = 'DefaultValuedAttr<'
-        mytype += get_attr_type_basic(attr_type)
-        mytype += ', "'+attr_default+'">'
-        return mytype
+        processed_supportmap = list()
+        for _support, _namemap in sorted(_supportmap.items()):
+            processed_namemap = list()
+            for n, unsorted_versions in sorted(_namemap.items()):
+                versions = sorted(unsorted_versions,
+                                  key=lambda s: s.since_version)
+                schema = versions[-1]
+                if schema.name in exsting_ops:
+                    continue
+                exsting_ops.add(schema.name)
+                processed_namemap.append((n, schema, versions))
+            processed_supportmap.append((_support, processed_namemap))
+        operator_schemas.append((domain, processed_supportmap))
+    return operator_schemas
 
-    attr_type_and_name_list = []  # :: [(attrtype, attrname)]
-    attr_line = ''
-    if schema.attributes:
-        for _, attr in sorted(schema.attributes.items()):
-            #attr_line = line_indent+line_indent+line_indent+line_indent
-            found = False
-            attr_type = ""
-            if schema.name+' '+attr.name in special_attr_defaults:
-                (attr_type_str, attr_default_str) = special_attr_defaults[schema.name+' '+attr.name]
-                attr_type = get_attr_type_with_default(attr_type_str, attr_default_str)
-                found = True
-            elif attr.required:
-                s = Text(attr.type)
-                attr_type_str  = s[s.rfind('.') + 1:].lower()
-                attr_type = get_attr_type_basic(attr_type_str)
-                found = True
-
-            # option holds either required or default value
-            elif attr.default_value.name:
-                s = Text(attr.type)
-                attr_type_str  = s[s.rfind('.') + 1:].lower()
-
-                default_value = helper.get_attribute_value(attr.default_value)
-                def format_value(value):  # type: (Any) -> Text
-                    if isinstance(value, float):
-                        formatted = str(np.round(value, 5))
-                        # use default formatting, unless too long.
-                        if (len(formatted) > 10):
-                            formatted = str("({:e})".format(value))
-                        return formatted
-                    elif isinstance(value, (bytes, bytearray)) and sys.version_info[0] == 3:
-                        return str(value.decode('utf-8'))
-                    return str(value)
-
-                if isinstance(default_value, list):
-                    default_value = [format_value(val) for val in default_value]
-                    attr_option_str = '{}'.format(default_value)
-                    attr_option_str = attr_option_str.replace('[', '{', 1)
-                    attr_option_str = attr_option_str.replace(']', '}', 1)
-                    if attr_type_str == 'strings' :
-                        attr_option_str = attr_option_str.replace("'", '\\"')
-                    else :
-                        attr_option_str = attr_option_str.replace("'", '')
-                else:
-                    default_value = format_value(default_value)
-                    attr_option_str = default_value
-                attr_type = get_attr_type_with_default(attr_type_str, attr_option_str)
-                found = True
-            else:
-                s = Text(attr.type)
-                attr_type_str  = s[s.rfind('.') + 1:].lower()
-                attr_type = get_attr_type_optional(attr_type_str)
-            if found:
-                attr_type_and_name_list.append((attr_type, attr.name))
-    return attr_type_and_name_list
 
 def main(args):  # type: (Type[Args]) -> None
-    with io.open(args.changelog, 'w', newline='') as fout:
-        fout.write('## Operator Changelog\n')
-        fout.write(
-            "*This file is automatically generated from the\n"
-            "            [def files](/onnx/defs) via [this script](/onnx/defs/gen_doc.py).\n"
-            "            Do not modify directly and instead edit operator definitions.*\n")
+    curr_utc_time = datetime.datetime.now(
+        datetime.timezone.utc).strftime("%m/%d/%Y, %H:%M:%S")
+    autogen_warning = (
+        '//********************************************************\n'
+        '//   This file is generated on UTC-{}.\n'
+        '//   Do not modify this file directly.\n'
+        '//   This file is automatically generated via script.\n'
+        '//   Details can be found in doc/readonnxdefs.md .\n'
+        '//********************************************************\n\n')
+    autogen_warning = autogen_warning.format(curr_utc_time)
 
-        # domain -> version -> [schema]
-        dv_index = defaultdict(lambda: defaultdict(list))  # type: Dict[Text, Dict[int, List[OpSchema]]]
-        for schema in defs.get_all_schemas_with_history():
-            dv_index[schema.domain][schema.since_version].append(schema)
+    op_def = io.open(args.op_def_file, 'w', newline='')
+    op_def.write(autogen_warning)
 
-        fout.write('\n')
+    op_importer = io.open(args.op_importer_file, 'w', newline='')
+    op_importer.write(autogen_warning)
 
-        for domain, versionmap in sorted(dv_index.items()):
-            if not should_render_domain(domain):
-                continue
-
-            s = '# {}\n'.format(display_domain_short(domain))
-
-            for version, unsorted_schemas in sorted(versionmap.items()):
-                s += '## Version {} of {}\n'.format(version, display_domain(domain))
-                for schema in sorted(unsorted_schemas, key=lambda s: s.name):
-                    name_with_ver = '{}-{}'.format(format_name_with_domain(domain, schema.name),
-                                                   schema.since_version)
-                    s += ('### <a name="{}"></a>**{}**' + (' (deprecated)' if schema.deprecated else '') + '</a>\n').format(name_with_ver, name_with_ver)
-                    s += display_schema(schema, [schema])
-                    s += '\n'
-
-            fout.write(s)
-
-    with io.open(args.output, 'w', newline='', encoding="utf-8") as fout:
-        fout.write('## Operator Schemas\n')
-        fout.write(
-            "*This file is automatically generated from the\n"
-            "            [def files](/onnx/defs) via [this script](/onnx/defs/gen_doc.py).\n"
-            "            Do not modify directly and instead edit operator definitions.*\n")
-
-        # domain -> support level -> name -> [schema]
-        index = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))  # type: Dict[Text, Dict[int, Dict[Text, List[OpSchema]]]]
-        for schema in defs.get_all_schemas_with_history():
-            #print("check point 0", schema.name, schema.domain, schema.support_level)
-            #gen_schema(schema)
-            index[schema.domain][int(schema.support_level)][schema.name].append(schema)
-
-        fout.write('\n')
-
-        # Preprocess the Operator Schemas
-        # [(domain, [(support_level, [(schema name, current schema, all versions schemas)])])]
-        operator_schemas = list()  # type: List[Tuple[Text, List[Tuple[int, List[Tuple[Text, OpSchema, List[OpSchema]]]]]]]
-        exsting_ops = set()  # type: Set[Text]
-        for domain, _supportmap in sorted(index.items()):
-            if not should_render_domain(domain):
-                continue
-
-            processed_supportmap = list()
-            for _support, _namemap in sorted(_supportmap.items()):
-                processed_namemap = list()
-                for n, unsorted_versions in sorted(_namemap.items()):
-                    versions = sorted(unsorted_versions, key=lambda s: s.since_version)
-                    schema = versions[-1]
-                    #print("check point 2", schema)
-                    if schema.name in exsting_ops:
-                        continue
-                    exsting_ops.add(schema.name)
-                    processed_namemap.append((n, schema, versions))
-                processed_supportmap.append((_support, processed_namemap))
-            operator_schemas.append((domain, processed_supportmap))
-
-        # Table of contents
-        for domain, supportmap in operator_schemas:
-            s = '* {}\n'.format(display_domain_short(domain))
-            fout.write(s)
-            function_ops = list()
-            for _, namemap in supportmap:
-                for n, schema, versions in namemap:
-                    if schema.has_function:  # type: ignore
-                        function_ops.append((n, schema, versions))
-                        continue
-                    s = '  * {}<a href="#{}">{}</a>\n'.format(
-                        support_level_str(schema.support_level),
-                        format_name_with_domain(domain, n),
-                        format_name_with_domain(domain, n))
-                    fout.write(s)
-            if len(function_ops):
-                fout.write('\n')
-                fout.write('  **Operators with function registered:**\n')
-                for n, schema, versions in function_ops:
-                    s = '  * {}<a href="#{}">{}</a>\n'.format(
-                        support_level_str(schema.support_level),
-                        format_name_with_domain(domain, n),
-                        format_name_with_domain(domain, n))
-                    fout.write(s)
-
-        fout.write('\n')
-        tdfile= io.open(args.tdfile, 'w', newline='') 
-        tdfile.write('//********************************************************\n'+
-                     '//   Warning: Do not modify this file directly\n'+
-                     '//   This file is automatically generated via script\n'+
-                     '//   Details can be found in doc/readonnxdefs.md\n'+
-                     '//********************************************************\n\n'
-               )
-        fefile=io.open('op_build_table.inc', 'w', newline='')
-        firstfunc = True
-
-        fefile.write('//********************************************************\n'+
-                     '//   Warning: Do not modify this file directly\n'+
-                     '//   This file is automatically generated via script\n'+
-                     '//   Details can be found in doc/readonnxdefs.md\n'+
-                     '//********************************************************\n\n'
-               )
-        fefile.write('    '+'if (OpName == "DUMMY") {\n')
-        for domain, supportmap in operator_schemas:
-            s = '## {}\n'.format(display_domain_short(domain))
-            fout.write(s)
-
-            for _, namemap in supportmap:
-                for op_type, schema, versions in namemap:
-                    # op_type
-                    #print("check point 1", schema.name, len(schema.inputs), len(schema.outputs))
-                    gen_code(schema, fefile)
-
-                    r = gen_schema(schema)
-                    tdfile.write(r)
-                    s = ('### {}<a name="{}"></a><a name="{}">**{}**' + (' (deprecated)' if schema.deprecated else '') + '</a>\n').format(
-                        support_level_str(schema.support_level),
-                        format_name_with_domain(domain, op_type),
-                        format_name_with_domain(domain, op_type.lower()),
-                        format_name_with_domain(domain, op_type))
-                    
-                    s += display_schema(schema, versions)
-
-                    s += '\n\n'
-
-                    if op_type in SNIPPETS:
-                        s += '#### Examples\n\n'
-                        for summary, code in sorted(SNIPPETS[op_type]):
-                            s += '<details>\n'
-                            s += '<summary>{}</summary>\n\n'.format(summary)
-                            s += '```python\n{}\n```\n\n'.format(code)
-                            s += '</details>\n'
-                            s += '\n\n'
-                    if op_type.lower() in SAMPLE_IMPLEMENTATIONS:
-                        s += '#### Sample Implementation\n\n'
-                        s += '<details>\n'
-                        s += '<summary>{}</summary>\n\n'.format(op_type)
-                        s += '```python\n{}\n```\n\n'.format(SAMPLE_IMPLEMENTATIONS[op_type.lower()])
-                        s += '</details>\n'
-                        s += '\n\n'
-
-                    fout.write(s)
-        fefile.write('    }')
-        fefile.close()
+    for domain, supportmap in build_operator_schemas():
+        for _, namemap in supportmap:
+            for op_type, schema, versions in namemap:
+                gen_op_importer(schema, op_importer)
+                r = gen_op_def(schema)
+                op_def.write(r)
 
 
 if __name__ == '__main__':
-    base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
-    docs_dir = os.path.join(base_dir, 'docs')
-    print(docs_dir)
+    curr_dir = os.path.dirname(os.path.realpath(__file__))
 
     class Args(object):
-        output = os.path.join(docs_dir, 'Operators' + ext)
-        changelog = os.path.join(docs_dir, 'Changelog' + ext)
-        tdfile = os.path.join(base_dir, 'onnxop.inc')
-    print(Args)
+        op_def_file = os.path.join(curr_dir, 'onnxop.inc')
+        op_importer_file = os.path.join(curr_dir, 'op_build_table.inc')
+
     main(Args)
diff --git a/src/builder/frontend_dialect_transformer.cpp b/src/builder/frontend_dialect_transformer.cpp
index 9cadad8..cd23e8c 100644
--- a/src/builder/frontend_dialect_transformer.cpp
+++ b/src/builder/frontend_dialect_transformer.cpp
@@ -121,6 +121,7 @@ private:
   mlir::MLIRContext &context_;
   mlir::ModuleOp module_;
   mlir::OpBuilder builder_;
+  mlir::Value none_;
   // mapping between string name and symbol
   OnnxOnnfSymbolMapping frontend_symbols_;
 
@@ -287,8 +288,8 @@ private:
     }
   }
 
-  std::vector<mlir::NamedAttribute> ImportNodeAttributes(
-      const onnx::NodeProto &node) {
+  std::vector<mlir::NamedAttribute>
+  ImportNodeAttributes(const onnx::NodeProto &node) {
     std::vector<mlir::NamedAttribute> attributes;
     for (int i = 0; i < node.attribute_size(); ++i) {
       auto attr = node.attribute(i);
@@ -317,21 +318,11 @@ private:
     }
   }
 
-  // if c++17 is used, ImportNodeOneOut and ImportNodeMultipleOuts can be
-  // combined with 'if constexpr' the issue is the type of the output is
-  // different. alternative way to use variadic output for all the op
-
-  /*!
-   * Important onnx node which generates only one output
-   * @param node onnx node
-   * @param nIn number of expected inputs
-   * @param nOut number of expected outputs
-   * @param attrs  list of desription for attributes with format {name, type,
-   * default}
-   */
   template <typename T>
-  void ImportNodeOneOut(const onnx::NodeProto &node, int nIn, int nOut,
-                        bool variadicIn = false, bool variadicOut = false) {
+  void buildOperation(const onnx::NodeProto &node, int expectedNumOperands = -1,
+                      int expectedNumResults = -1) {
+    bool variadicIn = expectedNumOperands == -1;
+    bool variadicOut = expectedNumResults == -1;
     std::vector<mlir::Value> inputs;
     for (const auto &item : node.input()) {
       if (frontend_symbols_.ContainKey(legalize_name(item))) {
@@ -339,6 +330,10 @@ private:
       }
     }
 
+    if (!variadicIn)
+      for (auto i = inputs.size(); i < expectedNumOperands; i++)
+        inputs.emplace_back(none_);
+
     std::vector<mlir::Type> outputTypes;
     for (auto item : node.output()) {
       outputTypes.push_back(
@@ -347,49 +342,11 @@ private:
 
     auto attributes = ImportNodeAttributes(node);
 
-    llvm::StringRef OpName = node.op_type();
-    if ((variadicIn || nIn == inputs.size()) &&
-        (variadicOut || nOut == outputTypes.size())) {
-      auto op =
-          builder_.create<T>(UnknownLoc(), outputTypes, inputs, attributes);
-      frontend_symbols_.AddMapping(legalize_name(node.output()[0]),
-                                   op.getResult());
-    } else {
-      ImportNodeGeneric(node);
-    }
-  }
-
-  template <typename T>
-  void ImportNodeMultipleOuts(const onnx::NodeProto &node, int nIn, int nOut,
-                              bool variadicIn = false,
-                              bool variadicOut = false) {
-    std::vector<mlir::Value> inputs;
-    for (const auto &item : node.input()) {
-      if (frontend_symbols_.ContainKey(legalize_name(item))) {
-        inputs.push_back(frontend_symbols_.GetTensorByOnnxName(item));
-      }
-    }
-
-    std::vector<mlir::Type> outputTypes;
-    for (auto item : node.output()) {
-      outputTypes.push_back(
-          mlir::UnrankedTensorType::get(builder_.getF32Type()));
-    }
-
-    auto attributes = ImportNodeAttributes(node);
-
-    llvm::StringRef OpName = node.op_type();
-
-    if ((variadicIn || nIn == inputs.size()) &&
-        (variadicOut || nOut == outputTypes.size())) {
-      auto op =
-          builder_.create<T>(UnknownLoc(), outputTypes, inputs, attributes);
-      for (int i = 0; i < node.output().size(); i++) {
-        frontend_symbols_.AddMapping(legalize_name(node.output()[i]),
-                                     op.getResult(i));
-      }
-    } else {
-      ImportNodeGeneric(node);
+    // TODO: Handle optional inputs.
+    auto op = builder_.create<T>(UnknownLoc(), outputTypes, inputs, attributes);
+    for (int i = 0; i < node.output().size(); i++) {
+      frontend_symbols_.AddMapping(legalize_name(node.output()[i]),
+                                   *(op.getODSResults(i).begin()));
     }
   }
 
@@ -398,8 +355,7 @@ private:
    * c++ does not allow template specialization inside a class scope
    * a specialized function is used
    */
-  void
-  ImportNodeConv(onnx::NodeProto node, int nIn, int nOut) {
+  void ImportNodeConv(onnx::NodeProto node, int nIn, int nOut) {
     // Conv has attribute dilations, kernel_shape, pads, the default value of
     // which  is determined by the shape of first argument. However, since the
     // shape is unknown now, these attributes can be not generated auto
@@ -413,24 +369,20 @@ private:
     int nOps = node.input().size();
 
     if (nOps == 2)
-      ImportNodeOneOut<mlir::ONNXConvNoBiasOp>(
-          node, nOps, nOut);
+      buildOperation<mlir::ONNXConvNoBiasOp>(node, nOps, nOut);
     else
-      ImportNodeOneOut<mlir::ONNXConvOp>(node, nOps, nOut);
+      buildOperation<mlir::ONNXConvOp>(node, nOps, nOut);
   }
 
   /*!
    * Special handle for MaxPool operations.
    */
-  void ImportNodeMaxPool(
-      onnx::NodeProto node, int nIn, int nOut) {
+  void ImportNodeMaxPool(onnx::NodeProto node, int nIn, int nOut) {
     int nOuts = node.output().size();
     if (nOuts == 1) {
-      ImportNodeOneOut<mlir::ONNXMaxPoolSingleOutOp>(
-          node, nIn, nOuts);
+      buildOperation<mlir::ONNXMaxPoolSingleOutOp>(node, nIn, nOuts);
     } else {
-      ImportNodeMultipleOuts<mlir::ONNXMaxPoolOp>(
-          node, nIn, nOuts);
+      buildOperation<mlir::ONNXMaxPoolOp>(node, nIn, nOuts);
     }
   }
 
@@ -441,23 +393,10 @@ private:
     int nOuts = node.output().size();
     if (nOuts == 1) {
       // Test mode with one output.
-      ImportNodeOneOut<mlir::ONNXBatchNormalizationTestModeOp>(node, nIn,
-                                                               nOuts);
+      buildOperation<mlir::ONNXBatchNormalizationTestModeOp>(node, nIn, nOuts);
     } else {
       // Training mode with four trailing optional outputs. Not handled yet.
-      ImportNodeMultipleOuts<mlir::ONNXBatchNormalizationOp>(node, nIn, nOuts);
-    }
-  }
-
-  /*!
-   * Special handle for Gemm operations.
-   */
-  void ImportNodeGemm(onnx::NodeProto node, int nIn, int nOut) {
-    int nOps = node.input().size();
-    if (nOps == 2) {
-      ImportNodeOneOut<mlir::ONNXGemmNoBiasOp>(node, 2, nOut);
-    } else {
-      ImportNodeOneOut<mlir::ONNXGemmOp>(node, nIn, nOut);
+      buildOperation<mlir::ONNXBatchNormalizationOp>(node, nIn, nOuts);
     }
   }
 
@@ -467,28 +406,14 @@ private:
   void ImportNodePad(onnx::NodeProto node, int nIn, int nOut) {
     int nOps = node.input().size();
     if (nOps == 2) {
-      ImportNodeOneOut<mlir::ONNXPadConstantValueOp>(node, 2, nOut);
+      buildOperation<mlir::ONNXPadConstantValueOp>(node, 2, nOut);
     } else {
-      ImportNodeOneOut<mlir::ONNXPadOp>(node, nIn, nOut);
+      buildOperation<mlir::ONNXPadOp>(node, nIn, nOut);
     }
   }
 
   void ImportNode(const onnx::NodeProto &node) {
-    std::vector<mlir::Value> inputs;
-    for (const auto &item : node.input()) {
-      if (frontend_symbols_.ContainKey(legalize_name(item))) {
-        inputs.push_back(frontend_symbols_.GetTensorByOnnxName(item));
-      }
-    }
-
-    std::vector<mlir::Type> outputTypes;
-    for (auto item : node.output()) {
-      outputTypes.push_back(
-          mlir::UnrankedTensorType::get(builder_.getF32Type()));
-    }
-
-    std::vector<mlir::NamedAttribute> attributes;
-    llvm::StringRef OpName = node.op_type();
+    llvm::StringRef opName = node.op_type();
 
     // the following code is generated by gen_doc.py
     // refer to dialect/onnx/onnx.td for details
@@ -555,9 +480,11 @@ private:
       ImportInputTensorSymbol(std::get<0>(it), std::get<1>(it));
     }
 
-    // import nodes in the graph
-    auto node = graph.node();
-    for (const auto &item : node) {
+    // Create a NoneTyped constant.
+    none_ =
+        builder_.create<mlir::ConstantOp>(UnknownLoc(), builder_.getUnitAttr());
+    // Import nodes in the graph.
+    for (const auto &item : graph.node()) {
       ImportNode(item);
     }
 
diff --git a/src/builder/op_build_table.inc b/src/builder/op_build_table.inc
index c0b2ca6..41a910f 100644
--- a/src/builder/op_build_table.inc
+++ b/src/builder/op_build_table.inc
@@ -1,320 +1,319 @@
 //********************************************************
-//   Warning: Do not modify this file directly
-//   This file is automatically generated via script
-//   Details can be found in doc/readonnxdefs.md
+//   This file is generated on UTC-02/24/2020, 06:29:01.
+//   Do not modify this file directly.
+//   This file is automatically generated via script.
+//   Details can be found in doc/readonnxdefs.md .
 //********************************************************
 
-    if (OpName == "DUMMY") {
-    }else if (OpName == "Abs") {
-       ImportNodeOneOut<mlir::ONNXAbsOp>(node, 1, 1);
-    }else if (OpName == "Acos") {
-       ImportNodeOneOut<mlir::ONNXAcosOp>(node, 1, 1);
-    }else if (OpName == "Acosh") {
-       ImportNodeOneOut<mlir::ONNXAcoshOp>(node, 1, 1);
-    }else if (OpName == "Add") {
-       ImportNodeOneOut<mlir::ONNXAddOp>(node, 2, 1);
-    }else if (OpName == "And") {
-       ImportNodeOneOut<mlir::ONNXAndOp>(node, 2, 1);
-    }else if (OpName == "ArgMax") {
-       ImportNodeOneOut<mlir::ONNXArgMaxOp>(node, 1, 1);
-    }else if (OpName == "ArgMin") {
-       ImportNodeOneOut<mlir::ONNXArgMinOp>(node, 1, 1);
-    }else if (OpName == "Asin") {
-       ImportNodeOneOut<mlir::ONNXAsinOp>(node, 1, 1);
-    }else if (OpName == "Asinh") {
-       ImportNodeOneOut<mlir::ONNXAsinhOp>(node, 1, 1);
-    }else if (OpName == "Atan") {
-       ImportNodeOneOut<mlir::ONNXAtanOp>(node, 1, 1);
-    }else if (OpName == "Atanh") {
-       ImportNodeOneOut<mlir::ONNXAtanhOp>(node, 1, 1);
-    }else if (OpName == "AveragePool") {
-       ImportNodeOneOut<mlir::ONNXAveragePoolOp>(node, 1, 1);
-    }else if (OpName == "BatchNormalization") {
-       ImportNodeBatchNormalization(node, 5, 5);
-    }else if (OpName == "BitShift") {
-       ImportNodeOneOut<mlir::ONNXBitShiftOp>(node, 2, 1);
-    }else if (OpName == "Cast") {
-       ImportNodeOneOut<mlir::ONNXCastOp>(node, 1, 1);
-    }else if (OpName == "Ceil") {
-       ImportNodeOneOut<mlir::ONNXCeilOp>(node, 1, 1);
-    }else if (OpName == "Clip") {
-       ImportNodeOneOut<mlir::ONNXClipOp>(node, 3, 1);
-    }else if (OpName == "Compress") {
-       ImportNodeOneOut<mlir::ONNXCompressOp>(node, 2, 1);
-    }else if (OpName == "Concat") {
-       ImportNodeOneOut<mlir::ONNXConcatOp>(node, 1, 1, true, false);
-    }else if (OpName == "ConcatFromSequence") {
-       ImportNodeOneOut<mlir::ONNXConcatFromSequenceOp>(node, 1, 1);
-    }else if (OpName == "Constant") {
-       ImportNodeOneOut<mlir::ONNXConstantOp>(node, 0, 1);
-    }else if (OpName == "ConstantOfShape") {
-       ImportNodeOneOut<mlir::ONNXConstantOfShapeOp>(node, 1, 1);
-    }else if (OpName == "Conv") {
-       ImportNodeConv(node, 3, 1);
-    }else if (OpName == "ConvInteger") {
-       ImportNodeOneOut<mlir::ONNXConvIntegerOp>(node, 4, 1);
-    }else if (OpName == "ConvTranspose") {
-       ImportNodeOneOut<mlir::ONNXConvTransposeOp>(node, 3, 1);
-    }else if (OpName == "Cos") {
-       ImportNodeOneOut<mlir::ONNXCosOp>(node, 1, 1);
-    }else if (OpName == "Cosh") {
-       ImportNodeOneOut<mlir::ONNXCoshOp>(node, 1, 1);
-    }else if (OpName == "CumSum") {
-       ImportNodeOneOut<mlir::ONNXCumSumOp>(node, 2, 1);
-    }else if (OpName == "DepthToSpace") {
-       ImportNodeOneOut<mlir::ONNXDepthToSpaceOp>(node, 1, 1);
-    }else if (OpName == "DequantizeLinear") {
-       ImportNodeOneOut<mlir::ONNXDequantizeLinearOp>(node, 3, 1);
-    }else if (OpName == "Det") {
-       ImportNodeOneOut<mlir::ONNXDetOp>(node, 1, 1);
-    }else if (OpName == "Div") {
-       ImportNodeOneOut<mlir::ONNXDivOp>(node, 2, 1);
-    }else if (OpName == "Dropout") {
-       ImportNodeMultipleOuts<mlir::ONNXDropoutOp>(node, 1, 2);
-    }else if (OpName == "DynamicQuantizeLinear") {
-       ImportNodeMultipleOuts<mlir::ONNXDynamicQuantizeLinearOp>(node, 1, 3);
-    }else if (OpName == "Elu") {
-       ImportNodeOneOut<mlir::ONNXEluOp>(node, 1, 1);
-    }else if (OpName == "Equal") {
-       ImportNodeOneOut<mlir::ONNXEqualOp>(node, 2, 1);
-    }else if (OpName == "Erf") {
-       ImportNodeOneOut<mlir::ONNXErfOp>(node, 1, 1);
-    }else if (OpName == "Exp") {
-       ImportNodeOneOut<mlir::ONNXExpOp>(node, 1, 1);
-    }else if (OpName == "Expand") {
-       ImportNodeOneOut<mlir::ONNXExpandOp>(node, 2, 1);
-    }else if (OpName == "EyeLike") {
-       ImportNodeOneOut<mlir::ONNXEyeLikeOp>(node, 1, 1);
-    }else if (OpName == "Flatten") {
-       ImportNodeOneOut<mlir::ONNXFlattenOp>(node, 1, 1);
-    }else if (OpName == "Floor") {
-       ImportNodeOneOut<mlir::ONNXFloorOp>(node, 1, 1);
-    }else if (OpName == "GRU") {
-       ImportNodeMultipleOuts<mlir::ONNXGRUOp>(node, 6, 2);
-    }else if (OpName == "Gather") {
-       ImportNodeOneOut<mlir::ONNXGatherOp>(node, 2, 1);
-    }else if (OpName == "GatherElements") {
-       ImportNodeOneOut<mlir::ONNXGatherElementsOp>(node, 2, 1);
-    }else if (OpName == "GatherND") {
-       ImportNodeOneOut<mlir::ONNXGatherNDOp>(node, 2, 1);
-    }else if (OpName == "Gemm") {
-       ImportNodeGemm(node, 3, 1);
-    }else if (OpName == "GlobalAveragePool") {
-       ImportNodeOneOut<mlir::ONNXGlobalAveragePoolOp>(node, 1, 1);
-    }else if (OpName == "GlobalLpPool") {
-       ImportNodeOneOut<mlir::ONNXGlobalLpPoolOp>(node, 1, 1);
-    }else if (OpName == "GlobalMaxPool") {
-       ImportNodeOneOut<mlir::ONNXGlobalMaxPoolOp>(node, 1, 1);
-    }else if (OpName == "Greater") {
-       ImportNodeOneOut<mlir::ONNXGreaterOp>(node, 2, 1);
-    }else if (OpName == "HardSigmoid") {
-       ImportNodeOneOut<mlir::ONNXHardSigmoidOp>(node, 1, 1);
-    }else if (OpName == "Hardmax") {
-       ImportNodeOneOut<mlir::ONNXHardmaxOp>(node, 1, 1);
-    }else if (OpName == "Identity") {
-       ImportNodeOneOut<mlir::ONNXIdentityOp>(node, 1, 1);
-    }else if (OpName == "If") {
-       ImportNodeOneOut<mlir::ONNXIfOp>(node, 1, 1);
-    }else if (OpName == "InstanceNormalization") {
-       ImportNodeOneOut<mlir::ONNXInstanceNormalizationOp>(node, 3, 1);
-    }else if (OpName == "IsInf") {
-       ImportNodeOneOut<mlir::ONNXIsInfOp>(node, 1, 1);
-    }else if (OpName == "IsNaN") {
-       ImportNodeOneOut<mlir::ONNXIsNaNOp>(node, 1, 1);
-    }else if (OpName == "LRN") {
-       ImportNodeOneOut<mlir::ONNXLRNOp>(node, 1, 1);
-    }else if (OpName == "LSTM") {
-       ImportNodeMultipleOuts<mlir::ONNXLSTMOp>(node, 8, 3);
-    }else if (OpName == "LeakyRelu") {
-       ImportNodeOneOut<mlir::ONNXLeakyReluOp>(node, 1, 1);
-    }else if (OpName == "Less") {
-       ImportNodeOneOut<mlir::ONNXLessOp>(node, 2, 1);
-    }else if (OpName == "Log") {
-       ImportNodeOneOut<mlir::ONNXLogOp>(node, 1, 1);
-    }else if (OpName == "LogSoftmax") {
-       ImportNodeOneOut<mlir::ONNXLogSoftmaxOp>(node, 1, 1);
-    }else if (OpName == "Loop") {
-       ImportNodeOneOut<mlir::ONNXLoopOp>(node, 3, 1);
-    }else if (OpName == "LpNormalization") {
-       ImportNodeOneOut<mlir::ONNXLpNormalizationOp>(node, 1, 1);
-    }else if (OpName == "LpPool") {
-       ImportNodeOneOut<mlir::ONNXLpPoolOp>(node, 1, 1);
-    }else if (OpName == "MatMul") {
-       ImportNodeOneOut<mlir::ONNXMatMulOp>(node, 2, 1);
-    }else if (OpName == "MatMulInteger") {
-       ImportNodeOneOut<mlir::ONNXMatMulIntegerOp>(node, 4, 1);
-    }else if (OpName == "Max") {
-       ImportNodeOneOut<mlir::ONNXMaxOp>(node, 1, 1, true, false);
-    }else if (OpName == "MaxPool") {
-       ImportNodeMaxPool(node, 1, 2);
-    }else if (OpName == "MaxRoiPool") {
-       ImportNodeOneOut<mlir::ONNXMaxRoiPoolOp>(node, 2, 1);
-    }else if (OpName == "MaxUnpool") {
-       ImportNodeOneOut<mlir::ONNXMaxUnpoolOp>(node, 3, 1);
-    }else if (OpName == "Mean") {
-       ImportNodeOneOut<mlir::ONNXMeanOp>(node, 1, 1, true, false);
-    }else if (OpName == "MeanVarianceNormalization") {
-       ImportNodeOneOut<mlir::ONNXMeanVarianceNormalizationOp>(node, 1, 1);
-    }else if (OpName == "Min") {
-       ImportNodeOneOut<mlir::ONNXMinOp>(node, 1, 1, true, false);
-    }else if (OpName == "Mod") {
-       ImportNodeOneOut<mlir::ONNXModOp>(node, 2, 1);
-    }else if (OpName == "Mul") {
-       ImportNodeOneOut<mlir::ONNXMulOp>(node, 2, 1);
-    }else if (OpName == "Multinomial") {
-       ImportNodeOneOut<mlir::ONNXMultinomialOp>(node, 1, 1);
-    }else if (OpName == "Neg") {
-       ImportNodeOneOut<mlir::ONNXNegOp>(node, 1, 1);
-    }else if (OpName == "NonMaxSuppression") {
-       ImportNodeOneOut<mlir::ONNXNonMaxSuppressionOp>(node, 5, 1);
-    }else if (OpName == "NonZero") {
-       ImportNodeOneOut<mlir::ONNXNonZeroOp>(node, 1, 1);
-    }else if (OpName == "Not") {
-       ImportNodeOneOut<mlir::ONNXNotOp>(node, 1, 1);
-    }else if (OpName == "OneHot") {
-       ImportNodeOneOut<mlir::ONNXOneHotOp>(node, 3, 1);
-    }else if (OpName == "Or") {
-       ImportNodeOneOut<mlir::ONNXOrOp>(node, 2, 1);
-    }else if (OpName == "PRelu") {
-       ImportNodeOneOut<mlir::ONNXPReluOp>(node, 2, 1);
-    }else if (OpName == "Pad") {
-       ImportNodePad(node, 3, 1);
-    }else if (OpName == "Pow") {
-       ImportNodeOneOut<mlir::ONNXPowOp>(node, 2, 1);
-    }else if (OpName == "QLinearConv") {
-       ImportNodeOneOut<mlir::ONNXQLinearConvOp>(node, 9, 1);
-    }else if (OpName == "QLinearMatMul") {
-       ImportNodeOneOut<mlir::ONNXQLinearMatMulOp>(node, 8, 1);
-    }else if (OpName == "QuantizeLinear") {
-       ImportNodeOneOut<mlir::ONNXQuantizeLinearOp>(node, 3, 1);
-    }else if (OpName == "RNN") {
-       ImportNodeMultipleOuts<mlir::ONNXRNNOp>(node, 6, 2);
-    }else if (OpName == "RandomNormal") {
-       ImportNodeOneOut<mlir::ONNXRandomNormalOp>(node, 0, 1);
-    }else if (OpName == "RandomNormalLike") {
-       ImportNodeOneOut<mlir::ONNXRandomNormalLikeOp>(node, 1, 1);
-    }else if (OpName == "RandomUniform") {
-       ImportNodeOneOut<mlir::ONNXRandomUniformOp>(node, 0, 1);
-    }else if (OpName == "RandomUniformLike") {
-       ImportNodeOneOut<mlir::ONNXRandomUniformLikeOp>(node, 1, 1);
-    }else if (OpName == "Range") {
-       ImportNodeOneOut<mlir::ONNXRangeOp>(node, 3, 1);
-    }else if (OpName == "Reciprocal") {
-       ImportNodeOneOut<mlir::ONNXReciprocalOp>(node, 1, 1);
-    }else if (OpName == "ReduceL1") {
-       ImportNodeOneOut<mlir::ONNXReduceL1Op>(node, 1, 1);
-    }else if (OpName == "ReduceL2") {
-       ImportNodeOneOut<mlir::ONNXReduceL2Op>(node, 1, 1);
-    }else if (OpName == "ReduceLogSum") {
-       ImportNodeOneOut<mlir::ONNXReduceLogSumOp>(node, 1, 1);
-    }else if (OpName == "ReduceLogSumExp") {
-       ImportNodeOneOut<mlir::ONNXReduceLogSumExpOp>(node, 1, 1);
-    }else if (OpName == "ReduceMax") {
-       ImportNodeOneOut<mlir::ONNXReduceMaxOp>(node, 1, 1);
-    }else if (OpName == "ReduceMean") {
-       ImportNodeOneOut<mlir::ONNXReduceMeanOp>(node, 1, 1);
-    }else if (OpName == "ReduceMin") {
-       ImportNodeOneOut<mlir::ONNXReduceMinOp>(node, 1, 1);
-    }else if (OpName == "ReduceProd") {
-       ImportNodeOneOut<mlir::ONNXReduceProdOp>(node, 1, 1);
-    }else if (OpName == "ReduceSum") {
-       ImportNodeOneOut<mlir::ONNXReduceSumOp>(node, 1, 1);
-    }else if (OpName == "ReduceSumSquare") {
-       ImportNodeOneOut<mlir::ONNXReduceSumSquareOp>(node, 1, 1);
-    }else if (OpName == "Relu") {
-       ImportNodeOneOut<mlir::ONNXReluOp>(node, 1, 1);
-    }else if (OpName == "Reshape") {
-       ImportNodeOneOut<mlir::ONNXReshapeOp>(node, 2, 1);
-    }else if (OpName == "Resize") {
-       ImportNodeOneOut<mlir::ONNXResizeOp>(node, 4, 1);
-    }else if (OpName == "ReverseSequence") {
-       ImportNodeOneOut<mlir::ONNXReverseSequenceOp>(node, 2, 1);
-    }else if (OpName == "RoiAlign") {
-       ImportNodeOneOut<mlir::ONNXRoiAlignOp>(node, 3, 1);
-    }else if (OpName == "Round") {
-       ImportNodeOneOut<mlir::ONNXRoundOp>(node, 1, 1);
-    }else if (OpName == "Scan") {
-       ImportNodeOneOut<mlir::ONNXScanOp>(node, 1, 1);
-    }else if (OpName == "Scatter") {
-       ImportNodeOneOut<mlir::ONNXScatterOp>(node, 3, 1);
-    }else if (OpName == "ScatterElements") {
-       ImportNodeOneOut<mlir::ONNXScatterElementsOp>(node, 3, 1);
-    }else if (OpName == "ScatterND") {
-       ImportNodeOneOut<mlir::ONNXScatterNDOp>(node, 3, 1);
-    }else if (OpName == "Selu") {
-       ImportNodeOneOut<mlir::ONNXSeluOp>(node, 1, 1);
-    }else if (OpName == "SequenceAt") {
-       ImportNodeOneOut<mlir::ONNXSequenceAtOp>(node, 2, 1);
-    }else if (OpName == "SequenceConstruct") {
-       ImportNodeOneOut<mlir::ONNXSequenceConstructOp>(node, 1, 1, true, false);
-    }else if (OpName == "SequenceEmpty") {
-       ImportNodeOneOut<mlir::ONNXSequenceEmptyOp>(node, 0, 1);
-    }else if (OpName == "SequenceErase") {
-       ImportNodeOneOut<mlir::ONNXSequenceEraseOp>(node, 2, 1);
-    }else if (OpName == "SequenceInsert") {
-       ImportNodeOneOut<mlir::ONNXSequenceInsertOp>(node, 3, 1);
-    }else if (OpName == "SequenceLength") {
-       ImportNodeOneOut<mlir::ONNXSequenceLengthOp>(node, 1, 1);
-    }else if (OpName == "Shape") {
-       ImportNodeOneOut<mlir::ONNXShapeOp>(node, 1, 1);
-    }else if (OpName == "Shrink") {
-       ImportNodeOneOut<mlir::ONNXShrinkOp>(node, 1, 1);
-    }else if (OpName == "Sigmoid") {
-       ImportNodeOneOut<mlir::ONNXSigmoidOp>(node, 1, 1);
-    }else if (OpName == "Sign") {
-       ImportNodeOneOut<mlir::ONNXSignOp>(node, 1, 1);
-    }else if (OpName == "Sin") {
-       ImportNodeOneOut<mlir::ONNXSinOp>(node, 1, 1);
-    }else if (OpName == "Sinh") {
-       ImportNodeOneOut<mlir::ONNXSinhOp>(node, 1, 1);
-    }else if (OpName == "Size") {
-       ImportNodeOneOut<mlir::ONNXSizeOp>(node, 1, 1);
-    }else if (OpName == "Slice") {
-       ImportNodeOneOut<mlir::ONNXSliceOp>(node, 5, 1);
-    }else if (OpName == "Softmax") {
-       ImportNodeOneOut<mlir::ONNXSoftmaxOp>(node, 1, 1);
-    }else if (OpName == "Softplus") {
-       ImportNodeOneOut<mlir::ONNXSoftplusOp>(node, 1, 1);
-    }else if (OpName == "Softsign") {
-       ImportNodeOneOut<mlir::ONNXSoftsignOp>(node, 1, 1);
-    }else if (OpName == "SpaceToDepth") {
-       ImportNodeOneOut<mlir::ONNXSpaceToDepthOp>(node, 1, 1);
-    }else if (OpName == "Split") {
-       ImportNodeOneOut<mlir::ONNXSplitOp>(node, 1, 1);
-    }else if (OpName == "SplitToSequence") {
-       ImportNodeOneOut<mlir::ONNXSplitToSequenceOp>(node, 2, 1);
-    }else if (OpName == "Sqrt") {
-       ImportNodeOneOut<mlir::ONNXSqrtOp>(node, 1, 1);
-    }else if (OpName == "Squeeze") {
-       ImportNodeOneOut<mlir::ONNXSqueezeOp>(node, 1, 1);
-    }else if (OpName == "StringNormalizer") {
-       ImportNodeOneOut<mlir::ONNXStringNormalizerOp>(node, 1, 1);
-    }else if (OpName == "Sub") {
-       ImportNodeOneOut<mlir::ONNXSubOp>(node, 2, 1);
-    }else if (OpName == "Sum") {
-       ImportNodeOneOut<mlir::ONNXSumOp>(node, 1, 1, true, false);
-    }else if (OpName == "Tan") {
-       ImportNodeOneOut<mlir::ONNXTanOp>(node, 1, 1);
-    }else if (OpName == "Tanh") {
-       ImportNodeOneOut<mlir::ONNXTanhOp>(node, 1, 1);
-    }else if (OpName == "TfIdfVectorizer") {
-       ImportNodeOneOut<mlir::ONNXTfIdfVectorizerOp>(node, 1, 1);
-    }else if (OpName == "ThresholdedRelu") {
-       ImportNodeOneOut<mlir::ONNXThresholdedReluOp>(node, 1, 1);
-    }else if (OpName == "Tile") {
-       ImportNodeOneOut<mlir::ONNXTileOp>(node, 2, 1);
-    }else if (OpName == "TopK") {
-       ImportNodeMultipleOuts<mlir::ONNXTopKOp>(node, 2, 2);
-    }else if (OpName == "Transpose") {
-       ImportNodeOneOut<mlir::ONNXTransposeOp>(node, 1, 1);
-    }else if (OpName == "Unique") {
-       ImportNodeMultipleOuts<mlir::ONNXUniqueOp>(node, 1, 4);
-    }else if (OpName == "Unsqueeze") {
-       ImportNodeOneOut<mlir::ONNXUnsqueezeOp>(node, 1, 1);
-    }else if (OpName == "Upsample") {
-       ImportNodeOneOut<mlir::ONNXUpsampleOp>(node, 2, 1);
-    }else if (OpName == "Where") {
-       ImportNodeOneOut<mlir::ONNXWhereOp>(node, 3, 1);
-    }else if (OpName == "Xor") {
-       ImportNodeOneOut<mlir::ONNXXorOp>(node, 2, 1);
-    }
\ No newline at end of file
+if (opName == "Abs")
+  return buildOperation<mlir::ONNXAbsOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Acos")
+  return buildOperation<mlir::ONNXAcosOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Acosh")
+  return buildOperation<mlir::ONNXAcoshOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Add")
+  return buildOperation<mlir::ONNXAddOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "And")
+  return buildOperation<mlir::ONNXAndOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "ArgMax")
+  return buildOperation<mlir::ONNXArgMaxOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "ArgMin")
+  return buildOperation<mlir::ONNXArgMinOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Asin")
+  return buildOperation<mlir::ONNXAsinOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Asinh")
+  return buildOperation<mlir::ONNXAsinhOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Atan")
+  return buildOperation<mlir::ONNXAtanOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Atanh")
+  return buildOperation<mlir::ONNXAtanhOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "AveragePool")
+  return buildOperation<mlir::ONNXAveragePoolOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "BatchNormalization")
+  return ImportNodeBatchNormalization(node, /* expected_num_operands = */ 5, /* expected_num_results = */ 5);
+if (opName == "BitShift")
+  return buildOperation<mlir::ONNXBitShiftOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "Cast")
+  return buildOperation<mlir::ONNXCastOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Ceil")
+  return buildOperation<mlir::ONNXCeilOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Clip")
+  return buildOperation<mlir::ONNXClipOp>(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "Compress")
+  return buildOperation<mlir::ONNXCompressOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "Concat")
+  return buildOperation<mlir::ONNXConcatOp>(node, /* expected_num_operands = */ -1, /* expected_num_results = */ 1);
+if (opName == "ConcatFromSequence")
+  return buildOperation<mlir::ONNXConcatFromSequenceOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Constant")
+  return buildOperation<mlir::ONNXConstantOp>(node, /* expected_num_operands = */ 0, /* expected_num_results = */ 1);
+if (opName == "ConstantOfShape")
+  return buildOperation<mlir::ONNXConstantOfShapeOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Conv")
+  return ImportNodeConv(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "ConvInteger")
+  return buildOperation<mlir::ONNXConvIntegerOp>(node, /* expected_num_operands = */ 4, /* expected_num_results = */ 1);
+if (opName == "ConvTranspose")
+  return buildOperation<mlir::ONNXConvTransposeOp>(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "Cos")
+  return buildOperation<mlir::ONNXCosOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Cosh")
+  return buildOperation<mlir::ONNXCoshOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "CumSum")
+  return buildOperation<mlir::ONNXCumSumOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "DepthToSpace")
+  return buildOperation<mlir::ONNXDepthToSpaceOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "DequantizeLinear")
+  return buildOperation<mlir::ONNXDequantizeLinearOp>(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "Det")
+  return buildOperation<mlir::ONNXDetOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Div")
+  return buildOperation<mlir::ONNXDivOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "Dropout")
+  return buildOperation<mlir::ONNXDropoutOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 2);
+if (opName == "DynamicQuantizeLinear")
+  return buildOperation<mlir::ONNXDynamicQuantizeLinearOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 3);
+if (opName == "Elu")
+  return buildOperation<mlir::ONNXEluOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Equal")
+  return buildOperation<mlir::ONNXEqualOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "Erf")
+  return buildOperation<mlir::ONNXErfOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Exp")
+  return buildOperation<mlir::ONNXExpOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Expand")
+  return buildOperation<mlir::ONNXExpandOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "EyeLike")
+  return buildOperation<mlir::ONNXEyeLikeOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Flatten")
+  return buildOperation<mlir::ONNXFlattenOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Floor")
+  return buildOperation<mlir::ONNXFloorOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "GRU")
+  return buildOperation<mlir::ONNXGRUOp>(node, /* expected_num_operands = */ 6, /* expected_num_results = */ 2);
+if (opName == "Gather")
+  return buildOperation<mlir::ONNXGatherOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "GatherElements")
+  return buildOperation<mlir::ONNXGatherElementsOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "GatherND")
+  return buildOperation<mlir::ONNXGatherNDOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "Gemm")
+  return buildOperation<mlir::ONNXGemmOp>(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "GlobalAveragePool")
+  return buildOperation<mlir::ONNXGlobalAveragePoolOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "GlobalLpPool")
+  return buildOperation<mlir::ONNXGlobalLpPoolOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "GlobalMaxPool")
+  return buildOperation<mlir::ONNXGlobalMaxPoolOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Greater")
+  return buildOperation<mlir::ONNXGreaterOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "HardSigmoid")
+  return buildOperation<mlir::ONNXHardSigmoidOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Hardmax")
+  return buildOperation<mlir::ONNXHardmaxOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Identity")
+  return buildOperation<mlir::ONNXIdentityOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "If")
+  return buildOperation<mlir::ONNXIfOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ -1);
+if (opName == "InstanceNormalization")
+  return buildOperation<mlir::ONNXInstanceNormalizationOp>(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "IsInf")
+  return buildOperation<mlir::ONNXIsInfOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "IsNaN")
+  return buildOperation<mlir::ONNXIsNaNOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "LRN")
+  return buildOperation<mlir::ONNXLRNOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "LSTM")
+  return buildOperation<mlir::ONNXLSTMOp>(node, /* expected_num_operands = */ 8, /* expected_num_results = */ 3);
+if (opName == "LeakyRelu")
+  return buildOperation<mlir::ONNXLeakyReluOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Less")
+  return buildOperation<mlir::ONNXLessOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "Log")
+  return buildOperation<mlir::ONNXLogOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "LogSoftmax")
+  return buildOperation<mlir::ONNXLogSoftmaxOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Loop")
+  return buildOperation<mlir::ONNXLoopOp>(node);
+if (opName == "LpNormalization")
+  return buildOperation<mlir::ONNXLpNormalizationOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "LpPool")
+  return buildOperation<mlir::ONNXLpPoolOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "MatMul")
+  return buildOperation<mlir::ONNXMatMulOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "MatMulInteger")
+  return buildOperation<mlir::ONNXMatMulIntegerOp>(node, /* expected_num_operands = */ 4, /* expected_num_results = */ 1);
+if (opName == "Max")
+  return buildOperation<mlir::ONNXMaxOp>(node, /* expected_num_operands = */ -1, /* expected_num_results = */ 1);
+if (opName == "MaxPool")
+  return ImportNodeMaxPool(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 2);
+if (opName == "MaxRoiPool")
+  return buildOperation<mlir::ONNXMaxRoiPoolOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "MaxUnpool")
+  return buildOperation<mlir::ONNXMaxUnpoolOp>(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "Mean")
+  return buildOperation<mlir::ONNXMeanOp>(node, /* expected_num_operands = */ -1, /* expected_num_results = */ 1);
+if (opName == "MeanVarianceNormalization")
+  return buildOperation<mlir::ONNXMeanVarianceNormalizationOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Min")
+  return buildOperation<mlir::ONNXMinOp>(node, /* expected_num_operands = */ -1, /* expected_num_results = */ 1);
+if (opName == "Mod")
+  return buildOperation<mlir::ONNXModOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "Mul")
+  return buildOperation<mlir::ONNXMulOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "Multinomial")
+  return buildOperation<mlir::ONNXMultinomialOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Neg")
+  return buildOperation<mlir::ONNXNegOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "NonMaxSuppression")
+  return buildOperation<mlir::ONNXNonMaxSuppressionOp>(node, /* expected_num_operands = */ 5, /* expected_num_results = */ 1);
+if (opName == "NonZero")
+  return buildOperation<mlir::ONNXNonZeroOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Not")
+  return buildOperation<mlir::ONNXNotOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "OneHot")
+  return buildOperation<mlir::ONNXOneHotOp>(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "Or")
+  return buildOperation<mlir::ONNXOrOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "PRelu")
+  return buildOperation<mlir::ONNXPReluOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "Pad")
+  return ImportNodePad(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "Pow")
+  return buildOperation<mlir::ONNXPowOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "QLinearConv")
+  return buildOperation<mlir::ONNXQLinearConvOp>(node, /* expected_num_operands = */ 9, /* expected_num_results = */ 1);
+if (opName == "QLinearMatMul")
+  return buildOperation<mlir::ONNXQLinearMatMulOp>(node, /* expected_num_operands = */ 8, /* expected_num_results = */ 1);
+if (opName == "QuantizeLinear")
+  return buildOperation<mlir::ONNXQuantizeLinearOp>(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "RNN")
+  return buildOperation<mlir::ONNXRNNOp>(node, /* expected_num_operands = */ 6, /* expected_num_results = */ 2);
+if (opName == "RandomNormal")
+  return buildOperation<mlir::ONNXRandomNormalOp>(node, /* expected_num_operands = */ 0, /* expected_num_results = */ 1);
+if (opName == "RandomNormalLike")
+  return buildOperation<mlir::ONNXRandomNormalLikeOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "RandomUniform")
+  return buildOperation<mlir::ONNXRandomUniformOp>(node, /* expected_num_operands = */ 0, /* expected_num_results = */ 1);
+if (opName == "RandomUniformLike")
+  return buildOperation<mlir::ONNXRandomUniformLikeOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Range")
+  return buildOperation<mlir::ONNXRangeOp>(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "Reciprocal")
+  return buildOperation<mlir::ONNXReciprocalOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "ReduceL1")
+  return buildOperation<mlir::ONNXReduceL1Op>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "ReduceL2")
+  return buildOperation<mlir::ONNXReduceL2Op>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "ReduceLogSum")
+  return buildOperation<mlir::ONNXReduceLogSumOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "ReduceLogSumExp")
+  return buildOperation<mlir::ONNXReduceLogSumExpOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "ReduceMax")
+  return buildOperation<mlir::ONNXReduceMaxOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "ReduceMean")
+  return buildOperation<mlir::ONNXReduceMeanOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "ReduceMin")
+  return buildOperation<mlir::ONNXReduceMinOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "ReduceProd")
+  return buildOperation<mlir::ONNXReduceProdOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "ReduceSum")
+  return buildOperation<mlir::ONNXReduceSumOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "ReduceSumSquare")
+  return buildOperation<mlir::ONNXReduceSumSquareOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Relu")
+  return buildOperation<mlir::ONNXReluOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Reshape")
+  return buildOperation<mlir::ONNXReshapeOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "Resize")
+  return buildOperation<mlir::ONNXResizeOp>(node, /* expected_num_operands = */ 4, /* expected_num_results = */ 1);
+if (opName == "ReverseSequence")
+  return buildOperation<mlir::ONNXReverseSequenceOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "RoiAlign")
+  return buildOperation<mlir::ONNXRoiAlignOp>(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "Round")
+  return buildOperation<mlir::ONNXRoundOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Scan")
+  return buildOperation<mlir::ONNXScanOp>(node);
+if (opName == "Scatter")
+  return buildOperation<mlir::ONNXScatterOp>(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "ScatterElements")
+  return buildOperation<mlir::ONNXScatterElementsOp>(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "ScatterND")
+  return buildOperation<mlir::ONNXScatterNDOp>(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "Selu")
+  return buildOperation<mlir::ONNXSeluOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "SequenceAt")
+  return buildOperation<mlir::ONNXSequenceAtOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "SequenceConstruct")
+  return buildOperation<mlir::ONNXSequenceConstructOp>(node, /* expected_num_operands = */ -1, /* expected_num_results = */ 1);
+if (opName == "SequenceEmpty")
+  return buildOperation<mlir::ONNXSequenceEmptyOp>(node, /* expected_num_operands = */ 0, /* expected_num_results = */ 1);
+if (opName == "SequenceErase")
+  return buildOperation<mlir::ONNXSequenceEraseOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "SequenceInsert")
+  return buildOperation<mlir::ONNXSequenceInsertOp>(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "SequenceLength")
+  return buildOperation<mlir::ONNXSequenceLengthOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Shape")
+  return buildOperation<mlir::ONNXShapeOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Shrink")
+  return buildOperation<mlir::ONNXShrinkOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Sigmoid")
+  return buildOperation<mlir::ONNXSigmoidOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Sign")
+  return buildOperation<mlir::ONNXSignOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Sin")
+  return buildOperation<mlir::ONNXSinOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Sinh")
+  return buildOperation<mlir::ONNXSinhOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Size")
+  return buildOperation<mlir::ONNXSizeOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Slice")
+  return buildOperation<mlir::ONNXSliceOp>(node, /* expected_num_operands = */ 5, /* expected_num_results = */ 1);
+if (opName == "Softmax")
+  return buildOperation<mlir::ONNXSoftmaxOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Softplus")
+  return buildOperation<mlir::ONNXSoftplusOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Softsign")
+  return buildOperation<mlir::ONNXSoftsignOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "SpaceToDepth")
+  return buildOperation<mlir::ONNXSpaceToDepthOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Split")
+  return buildOperation<mlir::ONNXSplitOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ -1);
+if (opName == "SplitToSequence")
+  return buildOperation<mlir::ONNXSplitToSequenceOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "Sqrt")
+  return buildOperation<mlir::ONNXSqrtOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Squeeze")
+  return buildOperation<mlir::ONNXSqueezeOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "StringNormalizer")
+  return buildOperation<mlir::ONNXStringNormalizerOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Sub")
+  return buildOperation<mlir::ONNXSubOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "Sum")
+  return buildOperation<mlir::ONNXSumOp>(node, /* expected_num_operands = */ -1, /* expected_num_results = */ 1);
+if (opName == "Tan")
+  return buildOperation<mlir::ONNXTanOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Tanh")
+  return buildOperation<mlir::ONNXTanhOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "TfIdfVectorizer")
+  return buildOperation<mlir::ONNXTfIdfVectorizerOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "ThresholdedRelu")
+  return buildOperation<mlir::ONNXThresholdedReluOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Tile")
+  return buildOperation<mlir::ONNXTileOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "TopK")
+  return buildOperation<mlir::ONNXTopKOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 2);
+if (opName == "Transpose")
+  return buildOperation<mlir::ONNXTransposeOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Unique")
+  return buildOperation<mlir::ONNXUniqueOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 4);
+if (opName == "Unsqueeze")
+  return buildOperation<mlir::ONNXUnsqueezeOp>(node, /* expected_num_operands = */ 1, /* expected_num_results = */ 1);
+if (opName == "Upsample")
+  return buildOperation<mlir::ONNXUpsampleOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
+if (opName == "Where")
+  return buildOperation<mlir::ONNXWhereOp>(node, /* expected_num_operands = */ 3, /* expected_num_results = */ 1);
+if (opName == "Xor")
+  return buildOperation<mlir::ONNXXorOp>(node, /* expected_num_operands = */ 2, /* expected_num_results = */ 1);
diff --git a/src/conversion/onnx_to_krnl/rewrite_patterns/math/gemm.inc b/src/conversion/onnx_to_krnl/rewrite_patterns/math/gemm.inc
index af1da9e..8a9bf8e 100644
--- a/src/conversion/onnx_to_krnl/rewrite_patterns/math/gemm.inc
+++ b/src/conversion/onnx_to_krnl/rewrite_patterns/math/gemm.inc
@@ -17,20 +17,24 @@ struct ONNXGemmOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    auto has_bias = (operands.size() == 3);
+    // The first predicate is unnecessary when we remove ONXGemmNoBiasOp.
+    bool hasBias = (operands.size() == 3) &&
+                   (!op->getOperand(2).getType().isa<NoneType>());
 
     Value A, B, C;
     A = operands[0];
     B = operands[1];
-    if (has_bias)
+    if (hasBias)
       C = operands[2];
 
     auto memRefType = convertToMemRefType(*op->result_type_begin());
 
-    auto alphaAttr = FloatAttr::get(memRefType.getElementType(),
-        llvm::dyn_cast<GemmOp>(op).alpha().convertToFloat());
-    auto betaAttr = FloatAttr::get(memRefType.getElementType(),
-        llvm::dyn_cast<GemmOp>(op).beta().convertToFloat());
+    auto alphaAttr =
+        FloatAttr::get(memRefType.getElementType(),
+                       llvm::dyn_cast<GemmOp>(op).alpha().convertToFloat());
+    auto betaAttr =
+        FloatAttr::get(memRefType.getElementType(),
+                       llvm::dyn_cast<GemmOp>(op).beta().convertToFloat());
     auto alpha = rewriter.create<ConstantOp>(loc, alphaAttr);
     auto beta = rewriter.create<ConstantOp>(loc, betaAttr);
 
@@ -68,8 +72,8 @@ struct ONNXGemmOpLowering : public ConversionPattern {
     // Define loops.
     std::vector<Value> originalLoops;
     std::vector<Value> optimizedLoops;
-    Block *optimizationBlock = defineLoops(rewriter, loc, originalLoops,
-            optimizedLoops, numLoops);
+    Block *optimizationBlock =
+        defineLoops(rewriter, loc, originalLoops, optimizedLoops, numLoops);
 
     // We have two Krnl loops:
     // - Outer loop iterates over the output matrix dimensions, and
@@ -83,8 +87,7 @@ struct ONNXGemmOpLowering : public ConversionPattern {
       outerLoops.push_back(originalLoops[i]);
       optimizedOuterLoops.push_back(optimizedLoops[i]);
     }
-    KrnlIterateOperandPack outerPack(rewriter, outerLoops,
-                                      optimizedOuterLoops);
+    KrnlIterateOperandPack outerPack(rewriter, outerLoops, optimizedOuterLoops);
     // Induction variables for the outer loops
     for (int i = 0; i < 2; ++i)
       addDimensionToPack(rewriter, loc, outerPack, alloc, i);
@@ -106,20 +109,19 @@ struct ONNXGemmOpLowering : public ConversionPattern {
     int64_t K_B_Idx = (isTransB) ? 1 : 0;
     reductionPack.pushConstantBound(0);
     if (ATy.getShape()[K_A_Idx] != -1)
-        reductionPack.pushConstantBound(ATy.getShape()[K_A_Idx]);
+      reductionPack.pushConstantBound(ATy.getShape()[K_A_Idx]);
+    else if (BTy.getShape()[K_B_Idx] != -1)
+      reductionPack.pushConstantBound(BTy.getShape()[K_B_Idx]);
     else
-      if (BTy.getShape()[K_B_Idx] != -1)
-        reductionPack.pushConstantBound(BTy.getShape()[K_B_Idx]);
-      else
-        reductionPack.pushOperandBound(
-            rewriter.create<DimOp>(loc, B, K_B_Idx).getResult());
+      reductionPack.pushOperandBound(
+          rewriter.create<DimOp>(loc, B, K_B_Idx).getResult());
 
     // Get run-time dimension information for unknown dimensions used for
     // broadcasting.
     // GemmOp supports unidirectional broadcasting from C to A*B.
     // Hence, it must be enough to get broadcasting information for C only.
     std::map<int, Value> broadcastedDimInfo;
-    if (has_bias) {
+    if (hasBias) {
       auto shape = C.getType().cast<MemRefType>().getShape();
       for (int i = 0; i < shape.size(); ++i) {
         if (shape[i] < 0) {
@@ -162,7 +164,7 @@ struct ONNXGemmOpLowering : public ConversionPattern {
     // Compute beta*C, and add up to alpha*A*B (unidirectional broadcasting)
     auto loadedAB = rewriter.create<LoadOp>(loc, alloc, loopMNIVs);
     auto alphaAB = rewriter.create<MulFOp>(loc, alpha, loadedAB);
-    if (has_bias) {
+    if (hasBias) {
       auto loopCIVs = getLoopIVsForBroadcasting(loc, rewriter, loopMNIVs, C,
                                                 broadcastedDimInfo);
       auto loadedC = rewriter.create<LoadOp>(loc, C, loopCIVs);
@@ -210,8 +212,8 @@ struct ONNXGemmOpLowering : public ConversionPattern {
   }
 };
 
-void populateLoweringONNXGemmOpPattern(
-    OwningRewritePatternList &patterns, MLIRContext *ctx) {
+void populateLoweringONNXGemmOpPattern(OwningRewritePatternList &patterns,
+                                       MLIRContext *ctx) {
   patterns.insert<ONNXGemmOpLowering<ONNXGemmOp>>(ctx);
   patterns.insert<ONNXGemmOpLowering<ONNXGemmNoBiasOp>>(ctx);
 }
diff --git a/src/dialect/onnx/onnx_ops.cpp b/src/dialect/onnx/onnx_ops.cpp
index f3cfeef..4de481a 100644
--- a/src/dialect/onnx/onnx_ops.cpp
+++ b/src/dialect/onnx/onnx_ops.cpp
@@ -120,25 +120,19 @@ void ONNXExpOp::inferShapes() { getResult().setType(getOperand().getType()); }
 // Tanh
 /// Infer the output shape of the ONNXTanhOp. This method is required by the
 /// shape inference interface.
-void ONNXTanhOp::inferShapes() {
-  getResult().setType(getOperand().getType());
-}
+void ONNXTanhOp::inferShapes() { getResult().setType(getOperand().getType()); }
 
 //===----------------------------------------------------------------------===//
 // Sinh
 /// Infer the output shape of the ONNXSinhOp. This method is required by the
 /// shape inference interface.
-void ONNXSinhOp::inferShapes() {
-  getResult().setType(getOperand().getType());
-}
+void ONNXSinhOp::inferShapes() { getResult().setType(getOperand().getType()); }
 
 //===----------------------------------------------------------------------===//
 // Cosh
 /// Infer the output shape of the ONNXCoshOp. This method is required by the
 /// shape inference interface.
-void ONNXCoshOp::inferShapes() {
-  getResult().setType(getOperand().getType());
-}
+void ONNXCoshOp::inferShapes() { getResult().setType(getOperand().getType()); }
 
 //===----------------------------------------------------------------------===//
 // Cos
@@ -178,9 +172,7 @@ void ONNXEluOp::inferShapes() { getResult().setType(getOperand().getType()); }
 // Relu
 /// Infer the output shape of the ONNXReluOp. This method is required by the
 /// shape inference interface.
-void ONNXReluOp::inferShapes() {
-  getResult().setType(getOperand().getType());
-}
+void ONNXReluOp::inferShapes() { getResult().setType(getOperand().getType()); }
 
 //===----------------------------------------------------------------------===//
 // LeakyRelu
@@ -194,9 +186,7 @@ void ONNXLeakyReluOp::inferShapes() {
 // Selu
 /// Infer the output shape of the ONNXSeluOp. This method is required by
 /// the shape inference interface.
-void ONNXSeluOp::inferShapes() {
-  getResult().setType(getOperand().getType());
-}
+void ONNXSeluOp::inferShapes() { getResult().setType(getOperand().getType()); }
 
 //===----------------------------------------------------------------------===//
 // Reciprocal
@@ -234,17 +224,13 @@ void ONNXSoftsignOp::inferShapes() {
 // Sqrt
 /// Infer the output shape of the ONNXSqrtOp. This method is required by
 /// the shape inference interface.
-void ONNXSqrtOp::inferShapes() {
-  getResult().setType(getOperand().getType());
-}
+void ONNXSqrtOp::inferShapes() { getResult().setType(getOperand().getType()); }
 
 //===----------------------------------------------------------------------===//
 // Sign
 /// Infer the output shape of the ONNXSignOp. This method is required by
 /// the shape inference interface.
-void ONNXSignOp::inferShapes() {
-  getResult().setType(getOperand().getType());
-}
+void ONNXSignOp::inferShapes() { getResult().setType(getOperand().getType()); }
 
 //===----------------------------------------------------------------------===//
 // Add
@@ -423,8 +409,7 @@ void ONNXMatMulOp::inferShapes() {
     // numpy rules the types need to be extended to 1xN and Nx1. Helper sizes
     // need to be removed after the multiplication but cannot be removed if all
     // sizes are 1.
-    if (lhsShape[0] != -1 && rhsShape[0] != -1 &&
-        lhsShape[0] != rhsShape[0])
+    if (lhsShape[0] != -1 && rhsShape[0] != -1 && lhsShape[0] != rhsShape[0])
       emitError("Attempt to multiply incompatible matrices.");
     dims.emplace_back(1);
   } else if (lhsShape.size() == 1 && rhsShape.size() >= 2) {
@@ -541,14 +526,14 @@ void ONNXMatMulOp::inferShapes() {
 // Gemm
 
 void ONNXGemmOp::inferShapes() {
+  bool hasBias = !getOperand(2).getType().isa<NoneType>();
   // Cannot infer shape if no shape exists.
   if (!getOperand(0).getType().isa<RankedTensorType>() ||
       !getOperand(1).getType().isa<RankedTensorType>() ||
-      !getOperand(2).getType().isa<RankedTensorType>())
+      (hasBias && !getOperand(2).getType().isa<RankedTensorType>()))
     return;
   auto lhsTy = getOperand(0).getType().cast<RankedTensorType>();
   auto rhsTy = getOperand(1).getType().cast<RankedTensorType>();
-  auto biasTy = getOperand(2).getType().cast<RankedTensorType>();
 
   int64_t M, N, K_A, K_B;
   M = (transA() == 0) ? lhsTy.getShape()[0] : lhsTy.getShape()[1];
@@ -560,15 +545,18 @@ void ONNXGemmOp::inferShapes() {
     emitError("Tensor shapes mismatched.");
   }
 
-  // Check whether bias is unidirectional broadcasting or not.
-  auto shape = biasTy.getShape();
-  int rank = shape.size();
-  if ((rank > 2) ||
-      (rank >= 1 && shape[rank - 1] != -1 && N != -1 && N != shape[rank - 1] &&
-       shape[rank - 1] != 1) ||
-      (rank == 2 && shape[rank - 2] != -1 && M != -1 && M != shape[rank - 2] &&
-       shape[rank - 2] != 1)) {
-    emitError("Bias shape mismatched.");
+  if (hasBias) {
+    // Check whether bias is unidirectional broadcasting or not.
+    auto biasTy = getOperand(2).getType().cast<RankedTensorType>();
+    auto shape = biasTy.getShape();
+    int rank = shape.size();
+    if ((rank > 2) ||
+        (rank >= 1 && shape[rank - 1] != -1 && N != -1 &&
+         N != shape[rank - 1] && shape[rank - 1] != 1) ||
+        (rank == 2 && shape[rank - 2] != -1 && M != -1 &&
+         M != shape[rank - 2] && shape[rank - 2] != 1)) {
+      emitError("Bias shape mismatched.");
+    }
   }
 
   SmallVector<int64_t, 2> dims;
@@ -713,7 +701,6 @@ void ONNXTransposeOp::inferShapes() {
   getResult().setType(RankedTensorType::get(dims, arrayTy.getElementType()));
 }
 
-
 //===----------------------------------------------------------------------===//
 
 // ReduceMax
@@ -801,7 +788,8 @@ void ONNXConvNoBiasOp::inferShapes() {
   // Required attribute auto_pad defaults to NOTSET.
   auto autoPad = auto_pad();
   // Group is a required attribute and should have default value of 1.
-  int64_t group = ONNXConvNoBiasOp::group().getSExtValue(); //.getLimitedValue();
+  int64_t group =
+      ONNXConvNoBiasOp::group().getSExtValue(); //.getLimitedValue();
   // Check that the X.shape[1] == (W.shape[1] * group) == C condition holds.
   if (dataShape[1] != (weightShape[1] * group))
     emitError("Channel dimension mismatch.");
@@ -859,8 +847,10 @@ void ONNXConvNoBiasOp::inferShapes() {
     if (dilations.getValue().size() != nDims)
       emitError("dilations length incompatible with spatial dimensions.");
     for (int i = 0; i < nDims; ++i)
-      kernelDims[i] = (kernelDims[i] + 1) *
-          (dilations.getValue()[i]).cast<IntegerAttr>().getInt() - 1;
+      kernelDims[i] =
+          (kernelDims[i] + 1) *
+              (dilations.getValue()[i]).cast<IntegerAttr>().getInt() -
+          1;
   }
 
   // Subtract kernel dimensions from input data dimensions.
@@ -906,8 +896,7 @@ void ONNXConvNoBiasOp::inferShapes() {
     if (strides.getValue().size() != nDims)
       emitError("strides length incompatible with spatial dimensions.");
     for (int i = 0; i < nDims; ++i) {
-      int64_t stride =
-          strides.getValue()[i].cast<IntegerAttr>().getInt();
+      int64_t stride = strides.getValue()[i].cast<IntegerAttr>().getInt();
       outSpatialDims[i] = floor(outSpatialDims[i] / stride);
     }
   }
@@ -934,12 +923,13 @@ void ONNXMaxPoolSingleOutOp::inferShapes() {
   auto xRank = xShape.size();
 
   // 2) analyse parameters
-  // get kernel sizes from kernel_shape attribute 
+  // get kernel sizes from kernel_shape attribute
   auto kernelShape = kernel_shape();
   if (!kernelShape)
-    emitError("kernel_shape is a mandatory attribute for which there is no default.");
+    emitError(
+        "kernel_shape is a mandatory attribute for which there is no default.");
   auto kernelShapeArray = kernelShape.getValue();
-  auto kernelRank = kernelShape.size(); 
+  auto kernelRank = kernelShape.size();
   if (kernelRank > xRank)
     emitError("kernel_shape spatial dimension is too large.");
   auto kernelOffset = xRank - kernelRank;
@@ -951,41 +941,42 @@ void ONNXMaxPoolSingleOutOp::inferShapes() {
   SmallVector<int64_t, 4> actualDilations;
   auto dilationsOpt = dilations();
   if (dilationsOpt.hasValue()) {
-    auto dilationsArray = dilationsOpt.getValue().getValue(); // opt -> attr -> array
+    auto dilationsArray =
+        dilationsOpt.getValue().getValue(); // opt -> attr -> array
     if (dilationsArray.size() != kernelRank)
-        emitError("dialation rank is not the same as the spatial rank.");
+      emitError("dialation rank is not the same as the spatial rank.");
     // fill in the actual values
     for (int i = 0; i < kernelRank; ++i) {
       int64_t d = (dilationsArray[i]).cast<IntegerAttr>().getInt();
-      if (d < 1) 
+      if (d < 1)
         emitError("dialation value must be nonzero positive.");
       actualDilations.emplace_back(d);
     }
   } else {
-    for(int i=0; i < kernelRank; ++i) {
-      actualDilations.emplace_back(1);      
+    for (int i = 0; i < kernelRank; ++i) {
+      actualDilations.emplace_back(1);
     }
   }
 
   // storage order
-  
+
   // strides
   SmallVector<int64_t, 4> actualStrides;
   auto stridesOpt = strides();
   if (stridesOpt.hasValue()) {
     auto stridesArray = stridesOpt.getValue().getValue();
     if (stridesArray.size() != kernelRank)
-        emitError("strides rank is not the same as the spatial rank.");
+      emitError("strides rank is not the same as the spatial rank.");
     // fill in the actual values
     for (int i = 0; i < kernelRank; ++i) {
       int64_t s = (stridesArray[i]).cast<IntegerAttr>().getInt();
-      if (s < 1) 
+      if (s < 1)
         emitError("strides value must be nonzero positive.");
       actualStrides.emplace_back(s);
     }
   } else {
-    for(int i=0; i < kernelRank; ++i) {
-      actualStrides.emplace_back(1);      
+    for (int i = 0; i < kernelRank; ++i) {
+      actualStrides.emplace_back(1);
     }
   }
 
@@ -1002,9 +993,9 @@ void ONNXMaxPoolSingleOutOp::inferShapes() {
       if (padsArray.size() != 2 * kernelRank)
         emitError("pads rank is not twice the spatial rank.");
       // fill in the actual values
-      for (int i = 0; i < 2*kernelRank; ++i) {
+      for (int i = 0; i < 2 * kernelRank; ++i) {
         int64_t p = (padsArray[i]).cast<IntegerAttr>().getInt();
-        if (p < 0) 
+        if (p < 0)
           emitError("pads value must be nonnegative.");
         actualPads.emplace_back(p);
       }
@@ -1016,24 +1007,26 @@ void ONNXMaxPoolSingleOutOp::inferShapes() {
     defaultPads = true;
   } else if (autoPad == "SAME_UPPER" || autoPad == "SAME_LOWER") {
     // init pad with zero
-    for(int i=0; i<2*kernelRank; ++i) {
+    for (int i = 0; i < 2 * kernelRank; ++i) {
       actualPads.emplace_back(0);
     }
-    for(int i=0; i<kernelRank; ++i) {
-      auto inputSpatialShape = xShape[kernelOffset  + i];
-      auto kernelSpatialShape = (kernelShapeArray[i]).cast<IntegerAttr>().getInt();
+    for (int i = 0; i < kernelRank; ++i) {
+      auto inputSpatialShape = xShape[kernelOffset + i];
+      auto kernelSpatialShape =
+          (kernelShapeArray[i]).cast<IntegerAttr>().getInt();
       auto dilations = actualDilations[i];
       auto strideSpatialShape = actualStrides[i];
-      int64_t outputSpatialShape = ceil((1.0 * inputSpatialShape) /
-        (1.0 * strideSpatialShape));
-      auto sumOfPad = (outputSpatialShape - 1) * strideSpatialShape + 
-        ((kernelSpatialShape - 1) * dilations + 1) - inputSpatialShape;
+      int64_t outputSpatialShape =
+          ceil((1.0 * inputSpatialShape) / (1.0 * strideSpatialShape));
+      auto sumOfPad = (outputSpatialShape - 1) * strideSpatialShape +
+                      ((kernelSpatialShape - 1) * dilations + 1) -
+                      inputSpatialShape;
       actualPads[i] = actualPads[kernelRank + i] = sumOfPad / 2;
       if (sumOfPad % 2 != 0) {
         if (autoPad == "SAME_UPPER") {
           actualPads[kernelRank + i] += 1;
         } else {
-          actualPads[i] += 1;          
+          actualPads[i] += 1;
         }
       }
     }
@@ -1042,24 +1035,26 @@ void ONNXMaxPoolSingleOutOp::inferShapes() {
   }
   // handle case where default pad values must be used
   if (defaultPads) {
-    for(int i=0; i<2*kernelRank; ++i) {
+    for (int i = 0; i < 2 * kernelRank; ++i) {
       actualPads.emplace_back(0);
     }
   }
 
-  // initialize output shape 
+  // initialize output shape
   SmallVector<int64_t, 4> yShape(xShape.begin(), xShape.end());
   // for all kernel dimensions
-  for(int i=0; i<kernelRank; ++i) {
-    auto inputSpatialShape = xShape[kernelOffset  + i];
-    auto padShape = actualPads[i] + actualPads[kernelRank+i];
-    auto kernelSpatialShape = (kernelShapeArray[i]).cast<IntegerAttr>().getInt();
+  for (int i = 0; i < kernelRank; ++i) {
+    auto inputSpatialShape = xShape[kernelOffset + i];
+    auto padShape = actualPads[i] + actualPads[kernelRank + i];
+    auto kernelSpatialShape =
+        (kernelShapeArray[i]).cast<IntegerAttr>().getInt();
     auto dilations = actualDilations[i];
     auto strideSpatialShape = actualStrides[i];
-    ///output_spatial_shape[i] = ceil( (input_spatial_shape[i] + pad_shape[i] - 
-    //  ((kernel_spatial_shape[i] - 1) * dilations[i] + 1)) / strides_spatial_shape[i] + 1)
-    double numerator = inputSpatialShape + padShape - 
-      ((kernelSpatialShape - 1) * dilations + 1);
+    /// output_spatial_shape[i] = ceil( (input_spatial_shape[i] + pad_shape[i] -
+    //  ((kernel_spatial_shape[i] - 1) * dilations[i] + 1)) /
+    //  strides_spatial_shape[i] + 1)
+    double numerator = inputSpatialShape + padShape -
+                       ((kernelSpatialShape - 1) * dilations + 1);
     double denominator = strideSpatialShape;
     int64_t res;
     if (ceilMode) {
diff --git a/src/dialect/onnx/onnxop.inc b/src/dialect/onnx/onnxop.inc
index 38d7075..abbda6d 100644
--- a/src/dialect/onnx/onnxop.inc
+++ b/src/dialect/onnx/onnxop.inc
@@ -1,16 +1,17 @@
 //********************************************************
-//   Warning: Do not modify this file directly
-//   This file is automatically generated via script
-//   Details can be found in doc/readonnxdefs.md
+//   This file is generated on UTC-02/24/2020, 06:44:13.
+//   Do not modify this file directly.
+//   This file is automatically generated via script.
+//   Details can be found in doc/readonnxdefs.md .
 //********************************************************
 
-def ONNXAbsOp:ONNX_Op<"Abs", 
-    [NoSideEffect]> {
+def ONNXAbsOp:ONNX_Op<"Abs",
+  [NoSideEffect]> {
   let summary = "ONNX Abs operation";
   let description = [{
-    "Absolute takes one input data (Tensor<T>) and produces one output data"
-    "(Tensor<T>) where the absolute is, y = abs(x), is applied to"
-    "the tensor elementwise."
+  "Absolute takes one input data (Tensor<T>) and produces one output data"
+  "(Tensor<T>) where the absolute is, y = abs(x), is applied to"
+  "the tensor elementwise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
@@ -25,639 +26,639 @@ def ONNXAbsOp:ONNX_Op<"Abs",
       outputTypes.emplace_back(UnrankedTensorType::get(elementType));
       build(builder, state, outputTypes, operands, attributes);
     }]>
-  ];
+    ];
 }
 
-def ONNXAcosOp:ONNX_Op<"Acos", 
-    [NoSideEffect]> {
+def ONNXAcosOp:ONNX_Op<"Acos",
+  [NoSideEffect]> {
   let summary = "ONNX Acos operation";
   let description = [{
-    "Calculates the arccosine (inverse of cosine) of the given input tensor, element-wise."
+  "Calculates the arccosine (inverse of cosine) of the given input tensor, element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXAcoshOp:ONNX_Op<"Acosh", 
-    [NoSideEffect]> {
+def ONNXAcoshOp:ONNX_Op<"Acosh",
+  [NoSideEffect]> {
   let summary = "ONNX Acosh operation";
   let description = [{
-    "Calculates the hyperbolic arccosine of the given input tensor element-wise."
+  "Calculates the hyperbolic arccosine of the given input tensor element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXAddOp:ONNX_Op<"Add", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXAddOp:ONNX_Op<"Add",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let hasCanonicalizer = 1;
   let summary = "ONNX Add operation";
   let description = [{
-    "Performs element-wise binary addition (with Numpy-style broadcasting support)."
-    ""
-    "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Performs element-wise binary addition (with Numpy-style broadcasting support)."
+  ""
+  "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$A,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$C);
 }
 
-def ONNXAndOp:ONNX_Op<"And", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXAndOp:ONNX_Op<"And",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX And operation";
   let description = [{
-    "Returns the tensor resulted from performing the `and` logical operation"
-    "elementwise on the input tensors `A` and `B` (with Numpy-style broadcasting support)."
-    ""
-    "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Returns the tensor resulted from performing the `and` logical operation"
+  "elementwise on the input tensors `A` and `B` (with Numpy-style broadcasting support)."
+  ""
+  "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$A,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$C);
 }
 
-def ONNXArgMaxOp:ONNX_Op<"ArgMax", 
-    [NoSideEffect]> {
+def ONNXArgMaxOp:ONNX_Op<"ArgMax",
+  [NoSideEffect]> {
   let summary = "ONNX ArgMax operation";
   let description = [{
-    "Computes the indices of the max elements of the input tensor's element along the "
-    "provided axis. The resulted tensor has the same rank as the input if keepdims equal 1."
-    "If keepdims equal 0, then the resulted tensor have the reduced dimension pruned. "
-    "The type of the output tensor is integer."
+  "Computes the indices of the max elements of the input tensor's element along the "
+  "provided axis. The resulted tensor has the same rank as the input if keepdims equal 1."
+  "If keepdims equal 0, then the resulted tensor have the reduced dimension pruned. "
+  "The type of the output tensor is integer."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           DefaultValuedAttr<I64Attr, "0">:$axis,
-           DefaultValuedAttr<I64Attr, "1">:$keepdims);
+    DefaultValuedAttr<I64Attr, "0">:$axis,
+    DefaultValuedAttr<I64Attr, "1">:$keepdims);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$reduced);
 }
 
-def ONNXArgMinOp:ONNX_Op<"ArgMin", 
-    [NoSideEffect]> {
+def ONNXArgMinOp:ONNX_Op<"ArgMin",
+  [NoSideEffect]> {
   let summary = "ONNX ArgMin operation";
   let description = [{
-    "Computes the indices of the min elements of the input tensor's element along the "
-    "provided axis. The resulted tensor has the same rank as the input if keepdims equal 1."
-    "If keepdims equal 0, then the resulted tensor have the reduced dimension pruned. "
-    "The type of the output tensor is integer."
+  "Computes the indices of the min elements of the input tensor's element along the "
+  "provided axis. The resulted tensor has the same rank as the input if keepdims equal 1."
+  "If keepdims equal 0, then the resulted tensor have the reduced dimension pruned. "
+  "The type of the output tensor is integer."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           DefaultValuedAttr<I64Attr, "0">:$axis,
-           DefaultValuedAttr<I64Attr, "1">:$keepdims);
+    DefaultValuedAttr<I64Attr, "0">:$axis,
+    DefaultValuedAttr<I64Attr, "1">:$keepdims);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$reduced);
 }
 
-def ONNXAsinOp:ONNX_Op<"Asin", 
-    [NoSideEffect]> {
+def ONNXAsinOp:ONNX_Op<"Asin",
+  [NoSideEffect]> {
   let summary = "ONNX Asin operation";
   let description = [{
-    "Calculates the arcsine (inverse of sine) of the given input tensor, element-wise."
+  "Calculates the arcsine (inverse of sine) of the given input tensor, element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXAsinhOp:ONNX_Op<"Asinh", 
-    [NoSideEffect]> {
+def ONNXAsinhOp:ONNX_Op<"Asinh",
+  [NoSideEffect]> {
   let summary = "ONNX Asinh operation";
   let description = [{
-    "Calculates the hyperbolic arcsine of the given input tensor element-wise."
+  "Calculates the hyperbolic arcsine of the given input tensor element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXAtanOp:ONNX_Op<"Atan", 
-    [NoSideEffect]> {
+def ONNXAtanOp:ONNX_Op<"Atan",
+  [NoSideEffect]> {
   let summary = "ONNX Atan operation";
   let description = [{
-    "Calculates the arctangent (inverse of tangent) of the given input tensor, element-wise."
+  "Calculates the arctangent (inverse of tangent) of the given input tensor, element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXAtanhOp:ONNX_Op<"Atanh", 
-    [NoSideEffect]> {
+def ONNXAtanhOp:ONNX_Op<"Atanh",
+  [NoSideEffect]> {
   let summary = "ONNX Atanh operation";
   let description = [{
-    "Calculates the hyperbolic arctangent of the given input tensor element-wise."
+  "Calculates the hyperbolic arctangent of the given input tensor element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXAveragePoolOp:ONNX_Op<"AveragePool", 
-    [NoSideEffect]> {
+def ONNXAveragePoolOp:ONNX_Op<"AveragePool",
+  [NoSideEffect]> {
   let summary = "ONNX AveragePool operation";
   let description = [{
-    "AveragePool consumes an input tensor X and applies average pooling across"
-    " the tensor according to kernel sizes, stride sizes, and pad lengths."
-    " average pooling consisting of computing the average on all values of a"
-    " subset of the input tensor according to the kernel size and downsampling the"
-    " data into the output tensor Y for further processing. The output spatial shape will be following:"
-    " ```"
-    " output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - kernel_spatial_shape[i]) / strides_spatial_shape[i] + 1)"
-    " ```"
-    " or"
-    " ```"
-    " output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - kernel_spatial_shape[i]) / strides_spatial_shape[i] + 1)"
-    " ```"
-    " if ceil_mode is enabled"
-    ""
-    " ```"
-    " * pad_shape[i] is sum of pads along axis i"
-    " ```"
-    ""
-    " `auto_pad` is a DEPRECATED attribute. If you are using them currently, the output spatial shape will be following:"
-    " ```"
-    " VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - kernel_spatial_shape[i] + 1) / strides_spatial_shape[i])"
-    " SAME_UPPER or SAME_LOWER: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i])"
-    " ```"
-    " And pad shape will be following if `SAME_UPPER` or `SAME_LOWER`:"
-    " ```"
-    " pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + kernel_spatial_shape[i] - input_spatial_shape[i]"
-    " ```"
-    " The output of each pooling window is divided by the number of elements (exclude pad when attribute count_include_pad is zero)."
-    " "
+  "AveragePool consumes an input tensor X and applies average pooling across"
+  " the tensor according to kernel sizes, stride sizes, and pad lengths."
+  " average pooling consisting of computing the average on all values of a"
+  " subset of the input tensor according to the kernel size and downsampling the"
+  " data into the output tensor Y for further processing. The output spatial shape will be following:"
+  " ```"
+  " output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - kernel_spatial_shape[i]) / strides_spatial_shape[i] + 1)"
+  " ```"
+  " or"
+  " ```"
+  " output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - kernel_spatial_shape[i]) / strides_spatial_shape[i] + 1)"
+  " ```"
+  " if ceil_mode is enabled"
+  ""
+  " ```"
+  " * pad_shape[i] is sum of pads along axis i"
+  " ```"
+  ""
+  " `auto_pad` is a DEPRECATED attribute. If you are using them currently, the output spatial shape will be following:"
+  " ```"
+  " VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - kernel_spatial_shape[i] + 1) / strides_spatial_shape[i])"
+  " SAME_UPPER or SAME_LOWER: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i])"
+  " ```"
+  " And pad shape will be following if `SAME_UPPER` or `SAME_LOWER`:"
+  " ```"
+  " pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + kernel_spatial_shape[i] - input_spatial_shape[i]"
+  " ```"
+  " The output of each pooling window is divided by the number of elements (exclude pad when attribute count_include_pad is zero)."
+  " "
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           DefaultValuedAttr<StrAttr, "NOTSET">:$auto_pad,
-           DefaultValuedAttr<I64Attr, "0">:$ceil_mode,
-           DefaultValuedAttr<I64Attr, "0">:$count_include_pad,
-           I64ArrayAttr:$kernel_shape,
-           OptionalAttr<I64ArrayAttr>:$pads,
-           OptionalAttr<I64ArrayAttr>:$strides);
+    DefaultValuedAttr<StrAttr, "NOTSET">:$auto_pad,
+    DefaultValuedAttr<I64Attr, "0">:$ceil_mode,
+    DefaultValuedAttr<I64Attr, "0">:$count_include_pad,
+    I64ArrayAttr:$kernel_shape,
+    OptionalAttr<I64ArrayAttr>:$pads,
+    OptionalAttr<I64ArrayAttr>:$strides);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXBatchNormalizationOp:ONNX_Op<"BatchNormalization", 
-    [NoSideEffect]> {
+def ONNXBatchNormalizationOp:ONNX_Op<"BatchNormalization",
+  [NoSideEffect]> {
   let summary = "ONNX BatchNormalization operation";
   let description = [{
-    "Carries out batch normalization as described in the paper"
-    "https://arxiv.org/abs/1502.03167. Depending on the mode it is being run,"
-    "there are multiple cases for the number of outputs, which we list below:"
-    ""
-    "Output case #1: Y, mean, var, saved_mean, saved_var (training mode)"
-    "Output case #2: Y (test mode)"
-    ""
-    "For previous (depreciated) non-spatial cases, implementors are suggested"
-    "to flatten the input shape to (N x C*D1*D2 ..*Dn) before a BatchNormalization Op."
-    "This operator has **optional** inputs/outputs. See [the doc](IR.md) for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted."
+  "Carries out batch normalization as described in the paper"
+  "https://arxiv.org/abs/1502.03167. Depending on the mode it is being run,"
+  "there are multiple cases for the number of outputs, which we list below:"
+  ""
+  "Output case #1: Y, mean, var, saved_mean, saved_var (training mode)"
+  "Output case #2: Y (test mode)"
+  ""
+  "For previous (depreciated) non-spatial cases, implementors are suggested"
+  "to flatten the input shape to (N x C*D1*D2 ..*Dn) before a BatchNormalization Op."
+  "This operator has **optional** inputs/outputs. See [the doc](IR.md) for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$scale,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$mean,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$var,
-           DefaultValuedAttr<F32Attr, "1e-05">:$epsilon,
-           DefaultValuedAttr<F32Attr, "0.9">:$momentum);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$scale,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$mean,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$var,
+    DefaultValuedAttr<F32Attr, "1e-05">:$epsilon,
+    DefaultValuedAttr<F32Attr, "0.9">:$momentum);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$out_mean,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$out_var,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$saved_mean,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$saved_var);
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$out_mean,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$out_var,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$saved_mean,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$saved_var);
 }
 
-def ONNXBitShiftOp:ONNX_Op<"BitShift", 
-    [NoSideEffect]> {
+def ONNXBitShiftOp:ONNX_Op<"BitShift",
+  [NoSideEffect]> {
   let summary = "ONNX BitShift operation";
   let description = [{
-    "Bitwise shift operator performs element-wise operation. For each input element, if the"
-    " attribute "direction" is "RIGHT", this operator moves its binary representation toward"
-    " the right side so that the input value is effectively decreased. If the attribute "direction""
-    " is "LEFT", bits of binary representation moves toward the left side, which results the"
-    " increase of its actual value. The input X is the tensor to be shifted and another input"
-    " Y specifies the amounts of shifting. For example, if "direction" is "Right", X is [1, 4],"
-    " and S is [1, 1], the corresponding output Z would be [0, 2]. If "direction" is "LEFT" with"
-    " X=[1, 2] and S=[1, 2], the corresponding output Y would be [2, 8]."
-    " "
-    " Because this operator supports Numpy-style broadcasting, X's and Y's shapes are"
-    " not necessarily identical."
-    "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Bitwise shift operator performs element-wise operation. For each input element, if the"
+  " attribute \"direction\" is \"RIGHT\", this operator moves its binary representation toward"
+  " the right side so that the input value is effectively decreased. If the attribute \"direction\""
+  " is \"LEFT\", bits of binary representation moves toward the left side, which results the"
+  " increase of its actual value. The input X is the tensor to be shifted and another input"
+  " Y specifies the amounts of shifting. For example, if \"direction\" is \"Right\", X is [1, 4],"
+  " and S is [1, 1], the corresponding output Z would be [0, 2]. If \"direction\" is \"LEFT\" with"
+  " X=[1, 2] and S=[1, 2], the corresponding output Y would be [2, 8]."
+  " "
+  " Because this operator supports Numpy-style broadcasting, X's and Y's shapes are"
+  " not necessarily identical."
+  "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y,
-           StrAttr:$direction);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y,
+    StrAttr:$direction);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Z);
 }
 
-def ONNXCastOp:ONNX_Op<"Cast", 
-    [NoSideEffect]> {
+def ONNXCastOp:ONNX_Op<"Cast",
+  [NoSideEffect]> {
   let summary = "ONNX Cast operation";
   let description = [{
-    "The operator casts the elements of a given input tensor to a data type"
-    "specified by the 'to' argument and returns an output tensor of the same size in"
-    "the converted type. The 'to' argument must be one of the data types specified"
-    "in the 'DataType' enum field in the TensorProto message."
-    ""
-    "Casting from string tensor in plain (e.g., "3.14" and "1000") and scientific numeric representations"
-    "(e.g., "1e-5" and "1E8") to float types is supported. For example, converting string "100.5" to an integer may"
-    "result 100. There are some string literals reserved for special floating-point values;"
-    ""+INF" (and "INF"), "-INF", and "NaN" are positive infinity, negative infinity, and not-a-number, respectively."
-    "Any string which can exactly match "+INF" in a case-insensitive way would be mapped to positive infinite. Similarly,"
-    "this case-insensitive rule is applied to "INF" and "NaN". When casting from numeric tensors"
-    "to string tensors, plain floating-point representation (such as "314.15926") would be used. "
-    "Converting non-numerical-literal string such as "Hello World!" is an undefined behavior. Cases "
-    "of converting string representing floating-point arithmetic value, such as "2.718", to INT is an undefined behavior."
-    ""
-    "Conversion from a numerical type to any numerical type is always allowed."
-    "User must be aware of precision loss and value change caused by range difference between two types."
-    "For example, a 64-bit float 3.1415926459 may be round to a 32-bit float 3.141592. Similarly, converting"
-    "an integer 36 to Boolean may produce 1 because we truncate bits which can't be stored in the targeted type."
+  "The operator casts the elements of a given input tensor to a data type"
+  "specified by the 'to' argument and returns an output tensor of the same size in"
+  "the converted type. The 'to' argument must be one of the data types specified"
+  "in the 'DataType' enum field in the TensorProto message."
+  ""
+  "Casting from string tensor in plain (e.g., \"3.14\" and \"1000\") and scientific numeric representations"
+  "(e.g., \"1e-5\" and \"1E8\") to float types is supported. For example, converting string \"100.5\" to an integer may"
+  "result 100. There are some string literals reserved for special floating-point values;"
+  "\"+INF\" (and \"INF\"), \"-INF\", and \"NaN\" are positive infinity, negative infinity, and not-a-number, respectively."
+  "Any string which can exactly match \"+INF\" in a case-insensitive way would be mapped to positive infinite. Similarly,"
+  "this case-insensitive rule is applied to \"INF\" and \"NaN\". When casting from numeric tensors"
+  "to string tensors, plain floating-point representation (such as \"314.15926\") would be used. "
+  "Converting non-numerical-literal string such as \"Hello World!\" is an undefined behavior. Cases "
+  "of converting string representing floating-point arithmetic value, such as \"2.718\", to INT is an undefined behavior."
+  ""
+  "Conversion from a numerical type to any numerical type is always allowed."
+  "User must be aware of precision loss and value change caused by range difference between two types."
+  "For example, a 64-bit float 3.1415926459 may be round to a 32-bit float 3.141592. Similarly, converting"
+  "an integer 36 to Boolean may produce 1 because we truncate bits which can't be stored in the targeted type."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           I64Attr:$to);
+    I64Attr:$to);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXCeilOp:ONNX_Op<"Ceil", 
-    [NoSideEffect]> {
+def ONNXCeilOp:ONNX_Op<"Ceil",
+  [NoSideEffect]> {
   let summary = "ONNX Ceil operation";
   let description = [{
-    "Ceil takes one input data (Tensor<T>) and produces one output data"
-    "(Tensor<T>) where the ceil is, y = ceil(x), is applied to"
-    "the tensor elementwise."
+  "Ceil takes one input data (Tensor<T>) and produces one output data"
+  "(Tensor<T>) where the ceil is, y = ceil(x), is applied to"
+  "the tensor elementwise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXClipOp:ONNX_Op<"Clip", 
-    [NoSideEffect]> {
+def ONNXClipOp:ONNX_Op<"Clip",
+  [NoSideEffect]> {
   let summary = "ONNX Clip operation";
   let description = [{
-    "Clip operator limits the given input within an interval. The interval is"
-    "specified by the inputs 'min' and 'max'. They default to"
-    "numeric_limits::lowest() and numeric_limits::max(), respectively."
+  "Clip operator limits the given input within an interval. The interval is"
+  "specified by the inputs 'min' and 'max'. They default to"
+  "numeric_limits::lowest() and numeric_limits::max(), respectively."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$min,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$max);
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$min,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$max);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXCompressOp:ONNX_Op<"Compress", 
-    [NoSideEffect]> {
+def ONNXCompressOp:ONNX_Op<"Compress",
+  [NoSideEffect]> {
   let summary = "ONNX Compress operation";
   let description = [{
-    "Selects slices from an input tensor along a given axis where condition evaluates to True for each axis index."
-    "    In case axis is not provided, input is flattened before elements are selected."
-    "    Compress behaves like numpy.compress: https://docs.scipy.org/doc/numpy/reference/generated/numpy.compress.html"
-    "    "
+  "Selects slices from an input tensor along a given axis where condition evaluates to True for each axis index."
+  "    In case axis is not provided, input is flattened before elements are selected."
+  "    Compress behaves like numpy.compress: https://docs.scipy.org/doc/numpy/reference/generated/numpy.compress.html"
+  "    "
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$condition,
-           OptionalAttr<I64Attr>:$axis);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$condition,
+    OptionalAttr<I64Attr>:$axis);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXConcatOp:ONNX_Op<"Concat", 
-    [NoSideEffect]> {
+def ONNXConcatOp:ONNX_Op<"Concat",
+  [NoSideEffect]> {
   let summary = "ONNX Concat operation";
   let description = [{
-    "Concatenate a list of tensors into a single tensor. All input tensors must have the same shape, except for the dimension size of the axis to concatenate on."
+  "Concatenate a list of tensors into a single tensor. All input tensors must have the same shape, except for the dimension size of the axis to concatenate on."
   }];
   let arguments = (ins Variadic<AnyTypeOf<[AnyMemRef, AnyTensor]>>:$inputs,
-           I64Attr:$axis);
+    I64Attr:$axis);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$concat_result);
 }
 
-def ONNXConcatFromSequenceOp:ONNX_Op<"ConcatFromSequence", 
-    [NoSideEffect]> {
+def ONNXConcatFromSequenceOp:ONNX_Op<"ConcatFromSequence",
+  [NoSideEffect]> {
   let summary = "ONNX ConcatFromSequence operation";
   let description = [{
-    "Concatenate a sequence of tensors into a single tensor."
-    "All input tensors must have the same shape, except for the dimension size of the axis to concatenate on."
-    "By default 'new_axis' is 0, the behavior is similar to numpy.concatenate."
-    "When 'new_axis' is 1, the behavior is similar to numpy.stack."
+  "Concatenate a sequence of tensors into a single tensor."
+  "All input tensors must have the same shape, except for the dimension size of the axis to concatenate on."
+  "By default 'new_axis' is 0, the behavior is similar to numpy.concatenate."
+  "When 'new_axis' is 1, the behavior is similar to numpy.stack."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input_sequence,
-           I64Attr:$axis,
-           DefaultValuedAttr<I64Attr, "0">:$new_axis);
+    I64Attr:$axis,
+    DefaultValuedAttr<I64Attr, "0">:$new_axis);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$concat_result);
 }
 
-def ONNXConstantOp:ONNX_Op<"Constant", 
-    [NoSideEffect]> {
+def ONNXConstantOp:ONNX_Op<"Constant",
+  [NoSideEffect]> {
   let summary = "ONNX Constant operation";
   let description = [{
-    "A constant tensor. Exactly one of the two attributes, either value or sparse_value,"
-    "must be specified."
+  "A constant tensor. Exactly one of the two attributes, either value or sparse_value,"
+  "must be specified."
   }];
   let arguments = (ins OptionalAttr<AnyAttr>:$sparse_value,
-           OptionalAttr<AnyAttr>:$value);
+    OptionalAttr<AnyAttr>:$value);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXConstantOfShapeOp:ONNX_Op<"ConstantOfShape", 
-    [NoSideEffect]> {
+def ONNXConstantOfShapeOp:ONNX_Op<"ConstantOfShape",
+  [NoSideEffect]> {
   let summary = "ONNX ConstantOfShape operation";
   let description = [{
-    "Generate a tensor with given value and shape."
+  "Generate a tensor with given value and shape."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           OptionalAttr<AnyAttr>:$value);
+    OptionalAttr<AnyAttr>:$value);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXConvOp:ONNX_Op<"Conv", 
-    [NoSideEffect]> {
+def ONNXConvOp:ONNX_Op<"Conv",
+  [NoSideEffect]> {
   let summary = "ONNX Conv operation";
   let description = [{
-    "The convolution operator consumes an input tensor and a filter, and"
-    "computes the output."
+  "The convolution operator consumes an input tensor and a filter, and"
+  "computes the output."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$W,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
-           DefaultValuedAttr<StrAttr, "NOTSET">:$auto_pad,
-           OptionalAttr<I64ArrayAttr>:$dilations,
-           DefaultValuedAttr<I64Attr, "1">:$group,
-           OptionalAttr<I64ArrayAttr>:$kernel_shape,
-           OptionalAttr<I64ArrayAttr>:$pads,
-           OptionalAttr<I64ArrayAttr>:$strides);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$W,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$B,
+    DefaultValuedAttr<StrAttr, "NOTSET">:$auto_pad,
+    OptionalAttr<I64ArrayAttr>:$dilations,
+    DefaultValuedAttr<I64Attr, "1">:$group,
+    OptionalAttr<I64ArrayAttr>:$kernel_shape,
+    OptionalAttr<I64ArrayAttr>:$pads,
+    OptionalAttr<I64ArrayAttr>:$strides);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXConvIntegerOp:ONNX_Op<"ConvInteger", 
-    [NoSideEffect]> {
+def ONNXConvIntegerOp:ONNX_Op<"ConvInteger",
+  [NoSideEffect]> {
   let summary = "ONNX ConvInteger operation";
   let description = [{
-    "The integer convolution operator consumes an input tensor, its zero-point, a filter, and its zero-point,"
-    "and computes the output. The production MUST never overflow. The accumulation may overflow if and only if in 32 bits."
+  "The integer convolution operator consumes an input tensor, its zero-point, a filter, and its zero-point,"
+  "and computes the output. The production MUST never overflow. The accumulation may overflow if and only if in 32 bits."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$x,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$w,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$x_zero_point,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$w_zero_point,
-           DefaultValuedAttr<StrAttr, "NOTSET">:$auto_pad,
-           OptionalAttr<I64ArrayAttr>:$dilations,
-           DefaultValuedAttr<I64Attr, "1">:$group,
-           OptionalAttr<I64ArrayAttr>:$kernel_shape,
-           OptionalAttr<I64ArrayAttr>:$pads,
-           OptionalAttr<I64ArrayAttr>:$strides);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$w,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$x_zero_point,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$w_zero_point,
+    DefaultValuedAttr<StrAttr, "NOTSET">:$auto_pad,
+    OptionalAttr<I64ArrayAttr>:$dilations,
+    DefaultValuedAttr<I64Attr, "1">:$group,
+    OptionalAttr<I64ArrayAttr>:$kernel_shape,
+    OptionalAttr<I64ArrayAttr>:$pads,
+    OptionalAttr<I64ArrayAttr>:$strides);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$y);
 }
 
-def ONNXConvTransposeOp:ONNX_Op<"ConvTranspose", 
-    [NoSideEffect]> {
+def ONNXConvTransposeOp:ONNX_Op<"ConvTranspose",
+  [NoSideEffect]> {
   let summary = "ONNX ConvTranspose operation";
   let description = [{
-    "The convolution transpose operator consumes an input tensor and a filter,"
-    "and computes the output."
-    ""
-    "If the pads parameter is provided the shape of the output is calculated via the following equation:"
-    ""
-    "  output_shape[i] = stride[i] * (input_size[i] - 1) + output_padding[i] + ((kernel_shape[i] - 1) * dilations[i] + 1) - pads[start_i] - pads[end_i]"
-    ""
-    "output_shape can also be explicitly specified in which case pads values are auto generated using these equations:"
-    ""
-    "  total_padding[i] = stride[i] * (input_size[i] - 1) + output_padding[i] + ((kernel_shape[i] - 1) * dilations[i] + 1) - output_shape[i]"
-    "  If (auto_pads != SAME_UPPER): pads[start_i] = total_padding[i]/2; pads[end_i] = total_padding[i] - (total_padding[i]/2)"
-    "  Else: pads[start_i] = total_padding[i] - (total_padding[i]/2); pads[end_i] = (total_padding[i]/2)."
-    ""
-    "    "
+  "The convolution transpose operator consumes an input tensor and a filter,"
+  "and computes the output."
+  ""
+  "If the pads parameter is provided the shape of the output is calculated via the following equation:"
+  ""
+  "  output_shape[i] = stride[i] * (input_size[i] - 1) + output_padding[i] + ((kernel_shape[i] - 1) * dilations[i] + 1) - pads[start_i] - pads[end_i]"
+  ""
+  "output_shape can also be explicitly specified in which case pads values are auto generated using these equations:"
+  ""
+  "  total_padding[i] = stride[i] * (input_size[i] - 1) + output_padding[i] + ((kernel_shape[i] - 1) * dilations[i] + 1) - output_shape[i]"
+  "  If (auto_pads != SAME_UPPER): pads[start_i] = total_padding[i]/2; pads[end_i] = total_padding[i] - (total_padding[i]/2)"
+  "  Else: pads[start_i] = total_padding[i] - (total_padding[i]/2); pads[end_i] = (total_padding[i]/2)."
+  ""
+  "    "
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$W,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
-           DefaultValuedAttr<StrAttr, "NOTSET">:$auto_pad,
-           OptionalAttr<I64ArrayAttr>:$dilations,
-           DefaultValuedAttr<I64Attr, "1">:$group,
-           OptionalAttr<I64ArrayAttr>:$kernel_shape,
-           OptionalAttr<I64ArrayAttr>:$output_padding,
-           OptionalAttr<I64ArrayAttr>:$output_shape,
-           OptionalAttr<I64ArrayAttr>:$pads,
-           OptionalAttr<I64ArrayAttr>:$strides);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$W,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$B,
+    DefaultValuedAttr<StrAttr, "NOTSET">:$auto_pad,
+    OptionalAttr<I64ArrayAttr>:$dilations,
+    DefaultValuedAttr<I64Attr, "1">:$group,
+    OptionalAttr<I64ArrayAttr>:$kernel_shape,
+    OptionalAttr<I64ArrayAttr>:$output_padding,
+    OptionalAttr<I64ArrayAttr>:$output_shape,
+    OptionalAttr<I64ArrayAttr>:$pads,
+    OptionalAttr<I64ArrayAttr>:$strides);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXCosOp:ONNX_Op<"Cos", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXCosOp:ONNX_Op<"Cos",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Cos operation";
   let description = [{
-    "Calculates the cosine of the given input tensor, element-wise."
+  "Calculates the cosine of the given input tensor, element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXCoshOp:ONNX_Op<"Cosh", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXCoshOp:ONNX_Op<"Cosh",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Cosh operation";
   let description = [{
-    "Calculates the hyperbolic cosine of the given input tensor element-wise."
+  "Calculates the hyperbolic cosine of the given input tensor element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXCumSumOp:ONNX_Op<"CumSum", 
-    [NoSideEffect]> {
+def ONNXCumSumOp:ONNX_Op<"CumSum",
+  [NoSideEffect]> {
   let summary = "ONNX CumSum operation";
   let description = [{
-    "Performs cumulative sum of the input elements along the given axis."
-    "By default, it will do the sum inclusively meaning the first element is copied as is."
-    "Through an `exclusive` attribute, this behavior can change to exclude the first element."
-    "It can also perform summation in the opposite direction of the axis. For that, set `reverse` attribute to 1."
-    ""
-    "Example:"
-    "```"
-    "input_x = [1, 2, 3]"
-    "axis=0"
-    "output = [1, 3, 6]"
-    "exclusive=1"
-    "output = [0, 1, 3]"
-    "exclusive=0"
-    "reverse=1"
-    "output = [6, 5, 3]"
-    "exclusive=1"
-    "reverse=1"
-    "output = [5, 3, 0]"
-    "```"
-    " "
+  "Performs cumulative sum of the input elements along the given axis."
+  "By default, it will do the sum inclusively meaning the first element is copied as is."
+  "Through an `exclusive` attribute, this behavior can change to exclude the first element."
+  "It can also perform summation in the opposite direction of the axis. For that, set `reverse` attribute to 1."
+  ""
+  "Example:"
+  "```"
+  "input_x = [1, 2, 3]"
+  "axis=0"
+  "output = [1, 3, 6]"
+  "exclusive=1"
+  "output = [0, 1, 3]"
+  "exclusive=0"
+  "reverse=1"
+  "output = [6, 5, 3]"
+  "exclusive=1"
+  "reverse=1"
+  "output = [5, 3, 0]"
+  "```"
+  " "
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$x,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$axis,
-           DefaultValuedAttr<I64Attr, "0">:$exclusive,
-           DefaultValuedAttr<I64Attr, "0">:$reverse);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$axis,
+    DefaultValuedAttr<I64Attr, "0">:$exclusive,
+    DefaultValuedAttr<I64Attr, "0">:$reverse);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$y);
 }
 
-def ONNXDepthToSpaceOp:ONNX_Op<"DepthToSpace", 
-    [NoSideEffect]> {
+def ONNXDepthToSpaceOp:ONNX_Op<"DepthToSpace",
+  [NoSideEffect]> {
   let summary = "ONNX DepthToSpace operation";
   let description = [{
-    "DepthToSpace rearranges (permutes) data from depth into blocks of spatial data."
-    "This is the reverse transformation of SpaceToDepth. More specifically, this op outputs a copy of"
-    "the input tensor where values from the depth dimension are moved in spatial blocks to the height"
-    "and width dimensions. By default, `mode` = `DCR`."
-    "In the DCR mode, elements along the depth dimension from the input tensor are rearranged in the"
-    "following order: depth, column, and then row. The output y is computed from the input x as below:"
-    ""
-    "b, c, h, w = x.shape"
-    ""
-    "tmp = np.reshape(x, [b, blocksize, blocksize, c // (blocksize**2), h, w])"
-    ""
-    "tmp = np.transpose(tmp, [0, 3, 4, 1, 5, 2])"
-    ""
-    "y = np.reshape(tmp, [b, c // (blocksize**2), h * blocksize, w * blocksize])"
-    ""
-    ""
-    "In the CRD mode, elements along the depth dimension from the input tensor are rearranged in the"
-    "following order: column, row, and the depth. The output y is computed from the input x as below:"
-    ""
-    "b, c, h, w = x.shape"
-    ""
-    "tmp = np.reshape(x, [b, c // (blocksize ** 2), blocksize, blocksize, h, w])"
-    ""
-    "tmp = np.transpose(tmp, [0, 1, 4, 2, 5, 3])"
-    ""
-    "y = np.reshape(tmp, [b, c // (blocksize ** 2), h * blocksize, w * blocksize])"
-    ""
+  "DepthToSpace rearranges (permutes) data from depth into blocks of spatial data."
+  "This is the reverse transformation of SpaceToDepth. More specifically, this op outputs a copy of"
+  "the input tensor where values from the depth dimension are moved in spatial blocks to the height"
+  "and width dimensions. By default, `mode` = `DCR`."
+  "In the DCR mode, elements along the depth dimension from the input tensor are rearranged in the"
+  "following order: depth, column, and then row. The output y is computed from the input x as below:"
+  ""
+  "b, c, h, w = x.shape"
+  ""
+  "tmp = np.reshape(x, [b, blocksize, blocksize, c // (blocksize**2), h, w])"
+  ""
+  "tmp = np.transpose(tmp, [0, 3, 4, 1, 5, 2])"
+  ""
+  "y = np.reshape(tmp, [b, c // (blocksize**2), h * blocksize, w * blocksize])"
+  ""
+  ""
+  "In the CRD mode, elements along the depth dimension from the input tensor are rearranged in the"
+  "following order: column, row, and the depth. The output y is computed from the input x as below:"
+  ""
+  "b, c, h, w = x.shape"
+  ""
+  "tmp = np.reshape(x, [b, c // (blocksize ** 2), blocksize, blocksize, h, w])"
+  ""
+  "tmp = np.transpose(tmp, [0, 1, 4, 2, 5, 3])"
+  ""
+  "y = np.reshape(tmp, [b, c // (blocksize ** 2), h * blocksize, w * blocksize])"
+  ""
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           I64Attr:$blocksize,
-           DefaultValuedAttr<StrAttr, "DCR">:$mode);
+    I64Attr:$blocksize,
+    DefaultValuedAttr<StrAttr, "DCR">:$mode);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXDequantizeLinearOp:ONNX_Op<"DequantizeLinear", 
-    [NoSideEffect]> {
+def ONNXDequantizeLinearOp:ONNX_Op<"DequantizeLinear",
+  [NoSideEffect]> {
   let summary = "ONNX DequantizeLinear operation";
   let description = [{
-    "The linear dequantization operator. It consumes a quantized tensor, a scale, a zero point to compute the full precision tensor."
-    "The dequantization formula is y = (x - x_zero_point) * x_scale. 'x_scale' and 'x_zero_point' must have same shape."
-    "'x_zero_point' and 'x' must have same type. 'x' and 'y' must have same shape. In the case of dequantizing int32,"
-    "there's no zero point (zero point is supposed to be 0)."
+  "The linear dequantization operator. It consumes a quantized tensor, a scale, a zero point to compute the full precision tensor."
+  "The dequantization formula is y = (x - x_zero_point) * x_scale. 'x_scale' and 'x_zero_point' must have same shape."
+  "'x_zero_point' and 'x' must have same type. 'x' and 'y' must have same shape. In the case of dequantizing int32,"
+  "there's no zero point (zero point is supposed to be 0)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$x,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$x_scale,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$x_zero_point);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$x_scale,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$x_zero_point);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$y);
 }
 
-def ONNXDetOp:ONNX_Op<"Det", 
-    [NoSideEffect]> {
+def ONNXDetOp:ONNX_Op<"Det",
+  [NoSideEffect]> {
   let summary = "ONNX Det operation";
   let description = [{
-    "Det calculates determinant of a square matrix or batches of square matrices."
-    "Det takes one input tensor of shape `[*, M, M]`, where `*` is zero or more batch dimensions,"
-    "and the inner-most 2 dimensions form square matrices."
-    "The output is a tensor of shape `[*]`, containing the determinants of all input submatrices."
-    "e.g., When the input is 2-D, the output is a scalar(shape is empty: `[]`)."
+  "Det calculates determinant of a square matrix or batches of square matrices."
+  "Det takes one input tensor of shape `[*, M, M]`, where `*` is zero or more batch dimensions,"
+  "and the inner-most 2 dimensions form square matrices."
+  "The output is a tensor of shape `[*]`, containing the determinants of all input submatrices."
+  "e.g., When the input is 2-D, the output is a scalar(shape is empty: `[]`)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXDivOp:ONNX_Op<"Div", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXDivOp:ONNX_Op<"Div",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Div operation";
   let description = [{
-    "Performs element-wise binary division (with Numpy-style broadcasting support)."
-    ""
-    "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Performs element-wise binary division (with Numpy-style broadcasting support)."
+  ""
+  "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$A,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$C);
 }
 
-def ONNXDropoutOp:ONNX_Op<"Dropout", 
-    [NoSideEffect]> {
+def ONNXDropoutOp:ONNX_Op<"Dropout",
+  [NoSideEffect]> {
   let summary = "ONNX Dropout operation";
   let description = [{
-    "Dropout takes one input floating tensor and produces two tensor outputs,"
-    "output (floating tensor) and mask (`Tensor<bool>`). Depending on whether it is"
-    "in test mode or not, the output Y will either be a random dropout, or a simple"
-    "copy of the input. Note that our implementation of Dropout does scaling in"
-    "the training phase, so during testing nothing needs to be done."
-    "This operator has **optional** inputs/outputs. See [the doc](IR.md) for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted."
+  "Dropout takes one input floating tensor and produces two tensor outputs,"
+  "output (floating tensor) and mask (`Tensor<bool>`). Depending on whether it is"
+  "in test mode or not, the output Y will either be a random dropout, or a simple"
+  "copy of the input. Note that our implementation of Dropout does scaling in"
+  "the training phase, so during testing nothing needs to be done."
+  "This operator has **optional** inputs/outputs. See [the doc](IR.md) for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           DefaultValuedAttr<F32Attr, "0.5">:$ratio);
+    DefaultValuedAttr<F32Attr, "0.5">:$ratio);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$mask);
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$mask);
 }
 
-def ONNXDynamicQuantizeLinearOp:ONNX_Op<"DynamicQuantizeLinear", 
-    [NoSideEffect]> {
+def ONNXDynamicQuantizeLinearOp:ONNX_Op<"DynamicQuantizeLinear",
+  [NoSideEffect]> {
   let summary = "ONNX DynamicQuantizeLinear operation";
   let description = [{
-    "A Function to fuse calculation for Scale, Zero Point and FP32->8Bit convertion of FP32 Input data."
-    "Outputs Scale, ZeroPoint and Quantized Input for a given FP32 Input."
-    "Scale is calculated as:"
-    "```"
-    " y_scale = (max(x) - min(x))/(qmax - qmin)"
-    " * where qmax and qmin are max and min values for quantization range .i.e [0, 255] in case of uint8"
-    " * data range is adjusted to include 0."
-    "```"
-    "Zero point is calculated as:"
-    "```"
-    "intermediate_zero_point = (qmin - min(x))/(qmax - qmin)"
-    "y_zero_point = cast(round(saturate(itermediate_zero_point)))"
-    "* where qmax and qmin are max and min values for quantization range .i.e [0, 255] in case of uint8"
-    "* for saturation, it saturates to [0, 255] if it's uint8, or [-127, 127] if it's int8. Right now only uint8 is supported."
-    "* rounding to nearest ties to even."
-    "```"
-    "Data quantization formula is:"
-    "```"
-    "y = saturate (round (x / y_scale) + y_zero_point)"
-    "* for saturation, it saturates to [0, 255] if it's uint8, or [-127, 127] if it's int8. Right now only uint8 is supported."
-    "* rounding to nearest ties to even."
-    "```"
+  "A Function to fuse calculation for Scale, Zero Point and FP32->8Bit convertion of FP32 Input data."
+  "Outputs Scale, ZeroPoint and Quantized Input for a given FP32 Input."
+  "Scale is calculated as:"
+  "```"
+  " y_scale = (max(x) - min(x))/(qmax - qmin)"
+  " * where qmax and qmin are max and min values for quantization range .i.e [0, 255] in case of uint8"
+  " * data range is adjusted to include 0."
+  "```"
+  "Zero point is calculated as:"
+  "```"
+  "intermediate_zero_point = (qmin - min(x))/(qmax - qmin)"
+  "y_zero_point = cast(round(saturate(itermediate_zero_point)))"
+  "* where qmax and qmin are max and min values for quantization range .i.e [0, 255] in case of uint8"
+  "* for saturation, it saturates to [0, 255] if it's uint8, or [-127, 127] if it's int8. Right now only uint8 is supported."
+  "* rounding to nearest ties to even."
+  "```"
+  "Data quantization formula is:"
+  "```"
+  "y = saturate (round (x / y_scale) + y_zero_point)"
+  "* for saturation, it saturates to [0, 255] if it's uint8, or [-127, 127] if it's int8. Right now only uint8 is supported."
+  "* rounding to nearest ties to even."
+  "```"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$x);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$y,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$y_scale,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$y_zero_point);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$y_scale,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$y_zero_point);
 }
 
-def ONNXEluOp:ONNX_Op<"Elu", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXEluOp:ONNX_Op<"Elu",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Elu operation";
   let description = [{
-    "Elu takes one input data (Tensor<T>) and produces one output data"
-    "(Tensor<T>) where the function `f(x) = alpha * (exp(x) - 1.) for x <"
-    "0`, `f(x) = x for x >= 0`., is applied to the tensor elementwise."
-    ""
+  "Elu takes one input data (Tensor<T>) and produces one output data"
+  "(Tensor<T>) where the function `f(x) = alpha * (exp(x) - 1.) for x <"
+  "0`, `f(x) = x for x >= 0`., is applied to the tensor elementwise."
+  ""
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           DefaultValuedAttr<F32Attr, "1.0">:$alpha);
+    DefaultValuedAttr<F32Attr, "1.0">:$alpha);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXEqualOp:ONNX_Op<"Equal", 
-    [NoSideEffect]> {
+def ONNXEqualOp:ONNX_Op<"Equal",
+  [NoSideEffect]> {
   let summary = "ONNX Equal operation";
   let description = [{
-    "Returns the tensor resulted from performing the `equal` logical operation"
-    "elementwise on the input tensors `A` and `B` (with Numpy-style broadcasting support)."
-    ""
-    "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Returns the tensor resulted from performing the `equal` logical operation"
+  "elementwise on the input tensors `A` and `B` (with Numpy-style broadcasting support)."
+  ""
+  "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$A,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$C);
 }
 
-def ONNXErfOp:ONNX_Op<"Erf", 
-    [NoSideEffect]> {
+def ONNXErfOp:ONNX_Op<"Erf",
+  [NoSideEffect]> {
   let summary = "ONNX Erf operation";
   let description = [{
-    "Computes the error function of the given input tensor element-wise."
+  "Computes the error function of the given input tensor element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXExpOp:ONNX_Op<"Exp", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXExpOp:ONNX_Op<"Exp",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Exp operation";
   let description = [{
-    "Calculates the exponential of the given input tensor, element-wise."
+  "Calculates the exponential of the given input tensor, element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
@@ -672,1120 +673,1101 @@ def ONNXExpOp:ONNX_Op<"Exp",
       outputTypes.emplace_back(UnrankedTensorType::get(elementType));
       build(builder, state, outputTypes, operands, attributes);
     }]>
-  ];
+    ];
 }
 
-def ONNXExpandOp:ONNX_Op<"Expand", 
-    [NoSideEffect]> {
+def ONNXExpandOp:ONNX_Op<"Expand",
+  [NoSideEffect]> {
   let summary = "ONNX Expand operation";
   let description = [{
-    "Broadcast the input tensor following the given shape and the broadcast rule."
-    "The broadcast rule is similar to numpy.array(input) * numpy.ones(shape):"
-    "Dimensions are right alignment;"
-    "Two corresponding dimension must have the same value, or one of them is equal to 1."
-    "Also, this operator is similar to numpy.broadcast_to(input, shape),"
-    "but the major difference is numpy.broadcast_to() does not allow shape to be smaller than input.size()."
-    "It is possible that the output.shape is not equal to shape, when some dimensions in shape is equal to 1,"
-    "or the shape.ndim < input.shape.ndim."
+  "Broadcast the input tensor following the given shape and the broadcast rule."
+  "The broadcast rule is similar to numpy.array(input) * numpy.ones(shape):"
+  "Dimensions are right alignment;"
+  "Two corresponding dimension must have the same value, or one of them is equal to 1."
+  "Also, this operator is similar to numpy.broadcast_to(input, shape),"
+  "but the major difference is numpy.broadcast_to() does not allow shape to be smaller than input.size()."
+  "It is possible that the output.shape is not equal to shape, when some dimensions in shape is equal to 1,"
+  "or the shape.ndim < input.shape.ndim."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$shape);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$shape);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXEyeLikeOp:ONNX_Op<"EyeLike", 
-    [NoSideEffect]> {
+def ONNXEyeLikeOp:ONNX_Op<"EyeLike",
+  [NoSideEffect]> {
   let summary = "ONNX EyeLike operation";
   let description = [{
-    "Generate a 2D tensor (matrix) with ones on the diagonal and zeros everywhere else. Only 2D"
-    "tensors are supported, i.e. input T1 must be of rank 2. The shape of the output tensor is the"
-    "same as the input tensor. The data type can be specified by the 'dtype' argument. If"
-    "'dtype' is not specified, then the type of input tensor is used. By default, the main diagonal"
-    "is populated with ones, but attribute 'k' can be used to populate upper or lower diagonals."
-    "The 'dtype' argument must be one of the data types specified in the 'DataType' enum field in the"
-    "TensorProto message and be valid as an output type."
+  "Generate a 2D tensor (matrix) with ones on the diagonal and zeros everywhere else. Only 2D"
+  "tensors are supported, i.e. input T1 must be of rank 2. The shape of the output tensor is the"
+  "same as the input tensor. The data type can be specified by the 'dtype' argument. If"
+  "'dtype' is not specified, then the type of input tensor is used. By default, the main diagonal"
+  "is populated with ones, but attribute 'k' can be used to populate upper or lower diagonals."
+  "The 'dtype' argument must be one of the data types specified in the 'DataType' enum field in the"
+  "TensorProto message and be valid as an output type."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           OptionalAttr<I64Attr>:$dtype,
-           DefaultValuedAttr<I64Attr, "0">:$k);
+    OptionalAttr<I64Attr>:$dtype,
+    DefaultValuedAttr<I64Attr, "0">:$k);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXFlattenOp:ONNX_Op<"Flatten", 
-    [NoSideEffect]> {
+def ONNXFlattenOp:ONNX_Op<"Flatten",
+  [NoSideEffect]> {
   let summary = "ONNX Flatten operation";
   let description = [{
-    "Flattens the input tensor into a 2D matrix. If input tensor has shape"
-    "(d_0, d_1, ... d_n) then the output will have shape"
-    "(d_0 X d_1 ... d_(axis-1), d_axis X d_(axis+1) ... X dn)."
+  "Flattens the input tensor into a 2D matrix. If input tensor has shape"
+  "(d_0, d_1, ... d_n) then the output will have shape"
+  "(d_0 X d_1 ... d_(axis-1), d_axis X d_(axis+1) ... X dn)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           DefaultValuedAttr<I64Attr, "1">:$axis);
+    DefaultValuedAttr<I64Attr, "1">:$axis);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXFloorOp:ONNX_Op<"Floor", 
-    [NoSideEffect]> {
+def ONNXFloorOp:ONNX_Op<"Floor",
+  [NoSideEffect]> {
   let summary = "ONNX Floor operation";
   let description = [{
-    "Floor takes one input data (Tensor<T>) and produces one output data"
-    "(Tensor<T>) where the floor is, y = floor(x), is applied to"
-    "the tensor elementwise."
+  "Floor takes one input data (Tensor<T>) and produces one output data"
+  "(Tensor<T>) where the floor is, y = floor(x), is applied to"
+  "the tensor elementwise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXGRUOp:ONNX_Op<"GRU", 
-    [NoSideEffect]> {
+def ONNXGRUOp:ONNX_Op<"GRU",
+  [NoSideEffect]> {
   let summary = "ONNX GRU operation";
   let description = [{
-    "Computes an one-layer GRU. This operator is usually supported via some custom"
-    "implementation such as CuDNN."
-    ""
-    "Notations:"
-    ""
-    "`X` - input tensor"
-    ""
-    "`z` - update gate"
-    ""
-    "`r` - reset gate"
-    ""
-    "`h` - hidden gate"
-    ""
-    "`t` - time step (t-1 means previous time step)"
-    ""
-    "`W[zrh]` - W parameter weight matrix for update, reset, and hidden gates"
-    ""
-    "`R[zrh]` - R recurrence weight matrix for update, reset, and hidden gates"
-    ""
-    "`Wb[zrh]` - W bias vectors for update, reset, and hidden gates"
-    ""
-    "`Rb[zrh]` - R bias vectors for update, reset, and hidden gates"
-    ""
-    "`WB[zrh]` - W parameter weight matrix for backward update, reset, and hidden gates"
-    ""
-    "`RB[zrh]` - R recurrence weight matrix for backward update, reset, and hidden gates"
-    ""
-    "`WBb[zrh]` - W bias vectors for backward update, reset, and hidden gates"
-    ""
-    "`RBb[zrh]` - R bias vectors for backward update, reset, and hidden gates"
-    ""
-    "`H` - Hidden state"
-    ""
-    "`num_directions` - 2 if direction == bidirectional else 1"
-    ""
-    "Activation functions:"
-    ""
-    "  Relu(x)                - max(0, x)"
-    ""
-    "  Tanh(x)                - (1 - e^{-2x})/(1 + e^{-2x})"
-    ""
-    "  Sigmoid(x)             - 1/(1 + e^{-x})"
-    ""
-    "  (NOTE: Below are optional)"
-    ""
-    "  Affine(x)              - alpha*x + beta"
-    ""
-    "  LeakyRelu(x)           - x if x >= 0 else alpha * x"
-    ""
-    "  ThresholdedRelu(x)     - x if x >= alpha else 0"
-    ""
-    "  ScaledTanh(x)          - alpha*Tanh(beta*x)"
-    ""
-    "  HardSigmoid(x)         - min(max(alpha*x + beta, 0), 1)"
-    ""
-    "  Elu(x)                 - x if x >= 0 else alpha*(e^x - 1)"
-    ""
-    "  Softsign(x)            - x/(1 + |x|)"
-    ""
-    "  Softplus(x)            - log(1 + e^x)"
-    ""
-    "Equations (Default: f=Sigmoid, g=Tanh):"
-    ""
-    "  - zt = f(Xt*(Wz^T) + Ht-1*(Rz^T) + Wbz + Rbz)"
-    ""
-    "  - rt = f(Xt*(Wr^T) + Ht-1*(Rr^T) + Wbr + Rbr)"
-    ""
-    "  - ht = g(Xt*(Wh^T) + (rt (.) Ht-1)*(Rh^T) + Rbh + Wbh) # default, when linear_before_reset = 0"
-    ""
-    "  - ht = g(Xt*(Wh^T) + (rt (.) (Ht-1*(Rh^T) + Rbh)) + Wbh) # when linear_before_reset != 0"
-    ""
-    "  - Ht = (1 - zt) (.) ht + zt (.) Ht-1"
-    "This operator has **optional** inputs/outputs. See [the doc](IR.md) for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted."
+  "Computes an one-layer GRU. This operator is usually supported via some custom"
+  "implementation such as CuDNN."
+  ""
+  "Notations:"
+  ""
+  "`X` - input tensor"
+  ""
+  "`z` - update gate"
+  ""
+  "`r` - reset gate"
+  ""
+  "`h` - hidden gate"
+  ""
+  "`t` - time step (t-1 means previous time step)"
+  ""
+  "`W[zrh]` - W parameter weight matrix for update, reset, and hidden gates"
+  ""
+  "`R[zrh]` - R recurrence weight matrix for update, reset, and hidden gates"
+  ""
+  "`Wb[zrh]` - W bias vectors for update, reset, and hidden gates"
+  ""
+  "`Rb[zrh]` - R bias vectors for update, reset, and hidden gates"
+  ""
+  "`WB[zrh]` - W parameter weight matrix for backward update, reset, and hidden gates"
+  ""
+  "`RB[zrh]` - R recurrence weight matrix for backward update, reset, and hidden gates"
+  ""
+  "`WBb[zrh]` - W bias vectors for backward update, reset, and hidden gates"
+  ""
+  "`RBb[zrh]` - R bias vectors for backward update, reset, and hidden gates"
+  ""
+  "`H` - Hidden state"
+  ""
+  "`num_directions` - 2 if direction == bidirectional else 1"
+  ""
+  "Activation functions:"
+  ""
+  "  Relu(x)                - max(0, x)"
+  ""
+  "  Tanh(x)                - (1 - e^{-2x})/(1 + e^{-2x})"
+  ""
+  "  Sigmoid(x)             - 1/(1 + e^{-x})"
+  ""
+  "  (NOTE: Below are optional)"
+  ""
+  "  Affine(x)              - alpha*x + beta"
+  ""
+  "  LeakyRelu(x)           - x if x >= 0 else alpha * x"
+  ""
+  "  ThresholdedRelu(x)     - x if x >= alpha else 0"
+  ""
+  "  ScaledTanh(x)          - alpha*Tanh(beta*x)"
+  ""
+  "  HardSigmoid(x)         - min(max(alpha*x + beta, 0), 1)"
+  ""
+  "  Elu(x)                 - x if x >= 0 else alpha*(e^x - 1)"
+  ""
+  "  Softsign(x)            - x/(1 + |x|)"
+  ""
+  "  Softplus(x)            - log(1 + e^x)"
+  ""
+  "Equations (Default: f=Sigmoid, g=Tanh):"
+  ""
+  "  - zt = f(Xt*(Wz^T) + Ht-1*(Rz^T) + Wbz + Rbz)"
+  ""
+  "  - rt = f(Xt*(Wr^T) + Ht-1*(Rr^T) + Wbr + Rbr)"
+  ""
+  "  - ht = g(Xt*(Wh^T) + (rt (.) Ht-1)*(Rh^T) + Rbh + Wbh) # default, when linear_before_reset = 0"
+  ""
+  "  - ht = g(Xt*(Wh^T) + (rt (.) (Ht-1*(Rh^T) + Rbh)) + Wbh) # when linear_before_reset != 0"
+  ""
+  "  - Ht = (1 - zt) (.) ht + zt (.) Ht-1"
+  "This operator has **optional** inputs/outputs. See [the doc](IR.md) for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$W,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$R,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$sequence_lens,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$initial_h,
-           OptionalAttr<F32ArrayAttr>:$activation_alpha,
-           OptionalAttr<F32ArrayAttr>:$activation_beta,
-           OptionalAttr<StrArrayAttr>:$activations,
-           OptionalAttr<F32Attr>:$clip,
-           DefaultValuedAttr<StrAttr, "forward">:$direction,
-           OptionalAttr<I64Attr>:$hidden_size,
-           DefaultValuedAttr<I64Attr, "0">:$linear_before_reset);
-  let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y_h);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$W,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$R,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$B,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$sequence_lens,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$initial_h,
+    OptionalAttr<F32ArrayAttr>:$activation_alpha,
+    OptionalAttr<F32ArrayAttr>:$activation_beta,
+    OptionalAttr<StrArrayAttr>:$activations,
+    OptionalAttr<F32Attr>:$clip,
+    DefaultValuedAttr<StrAttr, "forward">:$direction,
+    OptionalAttr<I64Attr>:$hidden_size,
+    DefaultValuedAttr<I64Attr, "0">:$linear_before_reset);
+  let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$Y,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$Y_h);
 }
 
-def ONNXGatherOp:ONNX_Op<"Gather", 
-    [NoSideEffect]> {
+def ONNXGatherOp:ONNX_Op<"Gather",
+  [NoSideEffect]> {
   let summary = "ONNX Gather operation";
   let description = [{
-    "Given `data` tensor of rank r >= 1, and `indices` tensor of rank q, gather"
-    "entries of the axis dimension of `data` (by default outer-most one as axis=0) indexed by `indices`, and concatenates"
-    "them in an output tensor of rank q + (r - 1)."
-    ""
-    "axis = 0 :"
-    ""
-    "Let"
-    "k = indices[i_{0}, ..., i_{q-1\}\]"
-    "Then"
-    "output[i_{0}, ..., i_{q-1}, j_{0}, ..., j_{r-2\}\] = input[k , j_{0}, ..., j_{r-2\}\]"
-    ""
-    "```"
-    "  data = ["
-    "      [1.0, 1.2],"
-    "      [2.3, 3.4],"
-    "      [4.5, 5.7],"
-    "  ]"
-    "  indices = ["
-    "      [0, 1],"
-    "      [1, 2],"
-    "  ]"
-    "  output = ["
-    "      ["
-    "          [1.0, 1.2],"
-    "          [2.3, 3.4],"
-    "      ],"
-    "      ["
-    "          [2.3, 3.4],"
-    "          [4.5, 5.7],"
-    "      ],"
-    "  ]"
-    "```"
-    "axis = 1 :"
-    ""
-    "Let"
-    "k = indices[i_{0}, ..., i_{q-1\}\]"
-    "Then"
-    "output[i_{0}, ..., i_{q-1}, j_{0}, ..., j_{r-2\}\] = input[j_{0}, k, j_{1}, ..., j_{r-2\}\]"
-    ""
-    "```"
-    "  data = ["
-    "      [1.0, 1.2, 1.9],"
-    "      [2.3, 3.4, 3.9],"
-    "      [4.5, 5.7, 5.9],"
-    "  ]"
-    "  indices = ["
-    "      [0, 2],"
-    "  ]"
-    "  axis = 1,"
-    "  output = ["
-    "      ["
-    "          [1.0, 1.9],"
-    "          [2.3, 3.9],"
-    "          [4.5, 5.9],"
-    "      ],"
-    "  ]"
-    "```"
+  "Given `data` tensor of rank r >= 1, and `indices` tensor of rank q, gather"
+  "entries of the axis dimension of `data` (by default outer-most one as axis=0) indexed by `indices`, and concatenates"
+  "them in an output tensor of rank q + (r - 1)."
+  ""
+  "axis = 0 :"
+  ""
+  "Let"
+  "k = indices[i_{0}, ..., i_{q-1\}\]"
+  "Then"
+  "output[i_{0}, ..., i_{q-1}, j_{0}, ..., j_{r-2\}\] = input[k , j_{0}, ..., j_{r-2\}\]"
+  ""
+  "```"
+  "  data = ["
+  "      [1.0, 1.2],"
+  "      [2.3, 3.4],"
+  "      [4.5, 5.7],"
+  "  ]"
+  "  indices = ["
+  "      [0, 1],"
+  "      [1, 2],"
+  "  ]"
+  "  output = ["
+  "      ["
+  "          [1.0, 1.2],"
+  "          [2.3, 3.4],"
+  "      ],"
+  "      ["
+  "          [2.3, 3.4],"
+  "          [4.5, 5.7],"
+  "      ],"
+  "  ]"
+  "```"
+  "axis = 1 :"
+  ""
+  "Let"
+  "k = indices[i_{0}, ..., i_{q-1\}\]"
+  "Then"
+  "output[i_{0}, ..., i_{q-1}, j_{0}, ..., j_{r-2\}\] = input[j_{0}, k, j_{1}, ..., j_{r-2\}\]"
+  ""
+  "```"
+  "  data = ["
+  "      [1.0, 1.2, 1.9],"
+  "      [2.3, 3.4, 3.9],"
+  "      [4.5, 5.7, 5.9],"
+  "  ]"
+  "  indices = ["
+  "      [0, 2],"
+  "  ]"
+  "  axis = 1,"
+  "  output = ["
+  "      ["
+  "          [1.0, 1.9],"
+  "          [2.3, 3.9],"
+  "          [4.5, 5.9],"
+  "      ],"
+  "  ]"
+  "```"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$indices,
-           DefaultValuedAttr<I64Attr, "0">:$axis);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$indices,
+    DefaultValuedAttr<I64Attr, "0">:$axis);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXGatherElementsOp:ONNX_Op<"GatherElements", 
-    [NoSideEffect]> {
+def ONNXGatherElementsOp:ONNX_Op<"GatherElements",
+  [NoSideEffect]> {
   let summary = "ONNX GatherElements operation";
   let description = [{
-    "GatherElements takes two inputs `data` and `indices` of the same rank r >= 1"
-    "and an optional attribute `axis` that identifies an axis of `data`"
-    "(by default, the outer-most axis, that is axis 0). It is an indexing operation"
-    "that produces its output by indexing into the input data tensor at index"
-    "positions determined by elements of the `indices` tensor."
-    "Its output shape is the same as the shape of `indices` and consists of one value"
-    "(gathered from the `data`) for each element in `indices`."
-    ""
-    "For instance, in the 3-D case (r = 3), the output produced is determined"
-    "by the following equations: "
-    "```"
-    "  out[i][j][k] = input[index[i][j][k]][j][k] if axis = 0,"
-    "  out[i][j][k] = input[i][index[i][j][k]][k] if axis = 1,"
-    "  out[i][j][k] = input[i][j][index[i][j][k]] if axis = 2,"
-    "```"
-    ""
-    "This operator is also the inverse of ScatterElements. It is similar to Torch's gather operation."
-    ""
-    "Example 1:"
-    "```"
-    "  data = ["
-    "      [1, 2],"
-    "      [3, 4],"
-    "  ]"
-    "  indices = ["
-    "      [0, 0],"
-    "      [1, 0],"
-    "  ]"
-    "  axis = 1"
-    "  output = ["
-    "      ["
-    "        [1, 1],"
-    "        [4, 3],"
-    "      ],"
-    "  ]"
-    "```"
-    "Example 2:"
-    "```"
-    "  data = ["
-    "      [1, 2, 3],"
-    "      [4, 5, 6],"
-    "      [7, 8, 9],"
-    "  ]"
-    "  indices = ["
-    "      [1, 2, 0],"
-    "      [2, 0, 0],"
-    "  ]"
-    "  axis = 0"
-    "  output = ["
-    "      ["
-    "        [4, 8, 3],"
-    "        [7, 2, 3],"
-    "      ],"
-    "  ]"
-    "```"
+  "GatherElements takes two inputs `data` and `indices` of the same rank r >= 1"
+  "and an optional attribute `axis` that identifies an axis of `data`"
+  "(by default, the outer-most axis, that is axis 0). It is an indexing operation"
+  "that produces its output by indexing into the input data tensor at index"
+  "positions determined by elements of the `indices` tensor."
+  "Its output shape is the same as the shape of `indices` and consists of one value"
+  "(gathered from the `data`) for each element in `indices`."
+  ""
+  "For instance, in the 3-D case (r = 3), the output produced is determined"
+  "by the following equations: "
+  "```"
+  "  out[i][j][k] = input[index[i][j][k]][j][k] if axis = 0,"
+  "  out[i][j][k] = input[i][index[i][j][k]][k] if axis = 1,"
+  "  out[i][j][k] = input[i][j][index[i][j][k]] if axis = 2,"
+  "```"
+  ""
+  "This operator is also the inverse of ScatterElements. It is similar to Torch's gather operation."
+  ""
+  "Example 1:"
+  "```"
+  "  data = ["
+  "      [1, 2],"
+  "      [3, 4],"
+  "  ]"
+  "  indices = ["
+  "      [0, 0],"
+  "      [1, 0],"
+  "  ]"
+  "  axis = 1"
+  "  output = ["
+  "      ["
+  "        [1, 1],"
+  "        [4, 3],"
+  "      ],"
+  "  ]"
+  "```"
+  "Example 2:"
+  "```"
+  "  data = ["
+  "      [1, 2, 3],"
+  "      [4, 5, 6],"
+  "      [7, 8, 9],"
+  "  ]"
+  "  indices = ["
+  "      [1, 2, 0],"
+  "      [2, 0, 0],"
+  "  ]"
+  "  axis = 0"
+  "  output = ["
+  "      ["
+  "        [4, 8, 3],"
+  "        [7, 2, 3],"
+  "      ],"
+  "  ]"
+  "```"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$indices,
-           DefaultValuedAttr<I64Attr, "0">:$axis);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$indices,
+    DefaultValuedAttr<I64Attr, "0">:$axis);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXGatherNDOp:ONNX_Op<"GatherND", 
-    [NoSideEffect]> {
+def ONNXGatherNDOp:ONNX_Op<"GatherND",
+  [NoSideEffect]> {
   let summary = "ONNX GatherND operation";
   let description = [{
-    "Given `data` tensor of rank `r` >= 1, and `indices` tensor of rank `q` >= 1, this operator gathers "
-    "slices of `data` into an output tensor of rank `q + r - indices_shape[-1] - 1`."
-    ""
-    "`indices` is an q-dimensional integer tensor, best thought of as a `(q-1)`-dimensional tensor of index-tuples into `data`, "
-    "where each element defines a slice of `data`"
-    ""
-    "Some salient points about the inputs' rank and shape:"
-    " "
-    "1) r >= 1 and q >= 1 are to be honored. There is no dependency condition to be met between ranks `r` and `q`"
-    ""
-    "2) The `indices_shape[-1]` should have a value between 1 (inclusive) and rank `r` (inclusive) "
-    ""
-    "3) All values in `indices` are expected to be within bounds [-s, s-1] along axis of size `s` (i.e.) `-data_shape[i] <= indices[...,i] <= data_shape[i] - 1`."
-    "   It is an error if any of the index values are out of bounds."
-    ""
-    "The output is computed as follows:"
-    ""
-    "The output tensor is obtained by mapping each index-tuple in the `indices` tensor to the corresponding slice of the input `data`."
-    " "
-    "1) If `indices_shape[-1] > r` => error condition"
-    ""
-    "2) If `indices_shape[-1] == r`, since the rank of `indices` is `q`, `indices` can be thought of as a `(q-1)`-dimensional tensor"
-    "   containing 1-D tensors of dimension `r`. Let us think of each such `r` ranked tensor as `indices_slice`. "
-    "   Each *scalar value* corresponding to `data[indices_slice]` is filled into the corresponding location of the `(q-1)`-dimensional tensor "
-    "   to form the `output` tensor (Example 1 below)"
-    ""
-    "3) If `indices_shape[-1] < r`, since the rank of `indices` is `q`, `indices` can be thought of as a `(q-1)`-dimensional tensor"
-    "   containing 1-D tensors of dimension `< r`. Let us think of each such tensors as `indices_slice`. "
-    "   Each *tensor slice* corresponding to `data[indices_slice , :]` is filled into the corresponding location of the `(q-1)`-dimensional tensor "
-    "   to form the `output` tensor (Examples 2, 3, and 4 below)"
-    ""
-    "This operator is the inverse of `ScatterND`."
-    ""
-    "`Example 1`"
-    ""
-    "  data    = [[0,1],[2,3]]   # data_shape = [2, 2]"
-    ""
-    "  indices = [[0,0],[1,1]]   # indices_shape = [2, 2]"
-    ""
-    "  output  = [0,3]           # output_shape = [2]"
-    ""
-    "`Example 2`"
-    ""
-    "  data    = [[0,1],[2,3]]  # data_shape = [2, 2]"
-    ""
-    "  indices = [[1],[0]]      # indices_shape = [2, 1]"
-    ""
-    "  output  = [[2,3],[0,1]]  # output_shape = [2, 2]"
-    ""
-    "`Example 3`"
-    ""
-    "  data    = [[[0,1],[2,3]],[[4,5],[6,7]]] # data_shape = [2, 2, 2]"
-    ""
-    "  indices = [[0,1],[1,0]]                 # indices_shape = [2, 2]"
-    ""
-    "  output  = [[2,3],[4,5]]                 # output_shape = [2, 2]   "
-    ""
-    "`Example 4`"
-    ""
-    "  data    = [[[0,1],[2,3]],[[4,5],[6,7]]] # data_shape = [2, 2, 2]"
-    ""
-    "  indices = [[[0,1]],[[1,0]]]             # indices_shape = [2, 1, 2]"
-    ""
-    "  output  = [[[2,3]],[[4,5]]]             # output_shape = [2, 1, 2] "
-    ""
+  "Given `data` tensor of rank `r` >= 1, and `indices` tensor of rank `q` >= 1, this operator gathers "
+  "slices of `data` into an output tensor of rank `q + r - indices_shape[-1] - 1`."
+  ""
+  "`indices` is an q-dimensional integer tensor, best thought of as a `(q-1)`-dimensional tensor of index-tuples into `data`, "
+  "where each element defines a slice of `data`"
+  ""
+  "Some salient points about the inputs' rank and shape:"
+  " "
+  "1) r >= 1 and q >= 1 are to be honored. There is no dependency condition to be met between ranks `r` and `q`"
+  ""
+  "2) The `indices_shape[-1]` should have a value between 1 (inclusive) and rank `r` (inclusive) "
+  ""
+  "3) All values in `indices` are expected to be within bounds [-s, s-1] along axis of size `s` (i.e.) `-data_shape[i] <= indices[...,i] <= data_shape[i] - 1`."
+  "   It is an error if any of the index values are out of bounds."
+  ""
+  "The output is computed as follows:"
+  ""
+  "The output tensor is obtained by mapping each index-tuple in the `indices` tensor to the corresponding slice of the input `data`."
+  " "
+  "1) If `indices_shape[-1] > r` => error condition"
+  ""
+  "2) If `indices_shape[-1] == r`, since the rank of `indices` is `q`, `indices` can be thought of as a `(q-1)`-dimensional tensor"
+  "   containing 1-D tensors of dimension `r`. Let us think of each such `r` ranked tensor as `indices_slice`. "
+  "   Each *scalar value* corresponding to `data[indices_slice]` is filled into the corresponding location of the `(q-1)`-dimensional tensor "
+  "   to form the `output` tensor (Example 1 below)"
+  ""
+  "3) If `indices_shape[-1] < r`, since the rank of `indices` is `q`, `indices` can be thought of as a `(q-1)`-dimensional tensor"
+  "   containing 1-D tensors of dimension `< r`. Let us think of each such tensors as `indices_slice`. "
+  "   Each *tensor slice* corresponding to `data[indices_slice , :]` is filled into the corresponding location of the `(q-1)`-dimensional tensor "
+  "   to form the `output` tensor (Examples 2, 3, and 4 below)"
+  ""
+  "This operator is the inverse of `ScatterND`."
+  ""
+  "`Example 1`"
+  ""
+  "  data    = [[0,1],[2,3]]   # data_shape = [2, 2]"
+  ""
+  "  indices = [[0,0],[1,1]]   # indices_shape = [2, 2]"
+  ""
+  "  output  = [0,3]           # output_shape = [2]"
+  ""
+  "`Example 2`"
+  ""
+  "  data    = [[0,1],[2,3]]  # data_shape = [2, 2]"
+  ""
+  "  indices = [[1],[0]]      # indices_shape = [2, 1]"
+  ""
+  "  output  = [[2,3],[0,1]]  # output_shape = [2, 2]"
+  ""
+  "`Example 3`"
+  ""
+  "  data    = [[[0,1],[2,3]],[[4,5],[6,7]]] # data_shape = [2, 2, 2]"
+  ""
+  "  indices = [[0,1],[1,0]]                 # indices_shape = [2, 2]"
+  ""
+  "  output  = [[2,3],[4,5]]                 # output_shape = [2, 2]   "
+  ""
+  "`Example 4`"
+  ""
+  "  data    = [[[0,1],[2,3]],[[4,5],[6,7]]] # data_shape = [2, 2, 2]"
+  ""
+  "  indices = [[[0,1]],[[1,0]]]             # indices_shape = [2, 1, 2]"
+  ""
+  "  output  = [[[2,3]],[[4,5]]]             # output_shape = [2, 1, 2] "
+  ""
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$indices);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$indices);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXGemmOp:ONNX_Op<"Gemm", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXGemmOp:ONNX_Op<"Gemm",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let hasCanonicalizer = 1;
   let summary = "ONNX Gemm operation";
   let description = [{
-    "General Matrix multiplication:"
-    "https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms#Level_3"
-    ""
-    "A' = transpose(A) if transA else A"
-    ""
-    "B' = transpose(B) if transB else B"
-    ""
-    "Compute Y = alpha * A' * B' + beta * C, where input tensor A has shape (M, K) or (K, M),"
-    "input tensor B has shape (K, N) or (N, K), input tensor C is broadcastable to shape (M, N),"
-    "and output tensor Y has shape (M, N). A will be transposed before doing the"
-    "computation if attribute transA is non-zero, same for B and transB."
-    "This operator supports **unidirectional broadcasting** (tensor C should be unidirectional broadcastable to tensor A * B); for more details please check [the doc](Broadcasting.md)."
-    "This operator has **optional** inputs/outputs. See [the doc](IR.md) for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted."
+  "General Matrix multiplication:"
+  "https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms#Level_3"
+  ""
+  "A' = transpose(A) if transA else A"
+  ""
+  "B' = transpose(B) if transB else B"
+  ""
+  "Compute Y = alpha * A' * B' + beta * C, where input tensor A has shape (M, K) or (K, M),"
+  "input tensor B has shape (K, N) or (N, K), input tensor C is broadcastable to shape (M, N),"
+  "and output tensor Y has shape (M, N). A will be transposed before doing the"
+  "computation if attribute transA is non-zero, same for B and transB."
+  "This operator supports **unidirectional broadcasting** (tensor C should be unidirectional broadcastable to tensor A * B); for more details please check [the doc](Broadcasting.md)."
+  "This operator has **optional** inputs/outputs. See [the doc](IR.md) for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$A,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$C,
-           DefaultValuedAttr<F32Attr, "1.0">:$alpha,
-           DefaultValuedAttr<F32Attr, "1.0">:$beta,
-           DefaultValuedAttr<I64Attr, "0">:$transA,
-           DefaultValuedAttr<I64Attr, "0">:$transB);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$C,
+    DefaultValuedAttr<F32Attr, "1.0">:$alpha,
+    DefaultValuedAttr<F32Attr, "1.0">:$beta,
+    DefaultValuedAttr<I64Attr, "0">:$transA,
+    DefaultValuedAttr<I64Attr, "0">:$transB);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXGlobalAveragePoolOp:ONNX_Op<"GlobalAveragePool", 
-    [NoSideEffect]> {
+def ONNXGlobalAveragePoolOp:ONNX_Op<"GlobalAveragePool",
+  [NoSideEffect]> {
   let summary = "ONNX GlobalAveragePool operation";
   let description = [{
-    "GlobalAveragePool consumes an input tensor X and applies average pooling across"
-    " the values in the same channel. This is equivalent to AveragePool with kernel size"
-    " equal to the spatial dimension of input tensor."
+  "GlobalAveragePool consumes an input tensor X and applies average pooling across"
+  " the values in the same channel. This is equivalent to AveragePool with kernel size"
+  " equal to the spatial dimension of input tensor."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXGlobalLpPoolOp:ONNX_Op<"GlobalLpPool", 
-    [NoSideEffect]> {
+def ONNXGlobalLpPoolOp:ONNX_Op<"GlobalLpPool",
+  [NoSideEffect]> {
   let summary = "ONNX GlobalLpPool operation";
   let description = [{
-    "GlobalLpPool consumes an input tensor X and applies lp pool pooling across"
-    " the values in the same channel. This is equivalent to LpPool with kernel size"
-    " equal to the spatial dimension of input tensor."
+  "GlobalLpPool consumes an input tensor X and applies lp pool pooling across"
+  " the values in the same channel. This is equivalent to LpPool with kernel size"
+  " equal to the spatial dimension of input tensor."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           DefaultValuedAttr<I64Attr, "2">:$p);
+    DefaultValuedAttr<I64Attr, "2">:$p);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXGlobalMaxPoolOp:ONNX_Op<"GlobalMaxPool", 
-    [NoSideEffect]> {
+def ONNXGlobalMaxPoolOp:ONNX_Op<"GlobalMaxPool",
+  [NoSideEffect]> {
   let summary = "ONNX GlobalMaxPool operation";
   let description = [{
-    "GlobalMaxPool consumes an input tensor X and applies max pooling across"
-    " the values in the same channel. This is equivalent to MaxPool with kernel size"
-    " equal to the spatial dimension of input tensor."
+  "GlobalMaxPool consumes an input tensor X and applies max pooling across"
+  " the values in the same channel. This is equivalent to MaxPool with kernel size"
+  " equal to the spatial dimension of input tensor."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXGreaterOp:ONNX_Op<"Greater", 
-    [NoSideEffect]> {
+def ONNXGreaterOp:ONNX_Op<"Greater",
+  [NoSideEffect]> {
   let summary = "ONNX Greater operation";
   let description = [{
-    "Returns the tensor resulted from performing the `greater` logical operation"
-    "elementwise on the input tensors `A` and `B` (with Numpy-style broadcasting support)."
-    ""
-    "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Returns the tensor resulted from performing the `greater` logical operation"
+  "elementwise on the input tensors `A` and `B` (with Numpy-style broadcasting support)."
+  ""
+  "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$A,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$C);
 }
 
-def ONNXHardSigmoidOp:ONNX_Op<"HardSigmoid", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXHardSigmoidOp:ONNX_Op<"HardSigmoid",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX HardSigmoid operation";
   let description = [{
-    "HardSigmoid takes one input data (Tensor<T>) and produces one output data"
-    "(Tensor<T>) where the HardSigmoid function, y = max(0, min(1, alpha * x + beta)),"
-    "is applied to the tensor elementwise."
+  "HardSigmoid takes one input data (Tensor<T>) and produces one output data"
+  "(Tensor<T>) where the HardSigmoid function, y = max(0, min(1, alpha * x + beta)),"
+  "is applied to the tensor elementwise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           DefaultValuedAttr<F32Attr, "0.2">:$alpha,
-           DefaultValuedAttr<F32Attr, "0.5">:$beta);
+    DefaultValuedAttr<F32Attr, "0.2">:$alpha,
+    DefaultValuedAttr<F32Attr, "0.5">:$beta);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXHardmaxOp:ONNX_Op<"Hardmax", 
-    [NoSideEffect]> {
+def ONNXHardmaxOp:ONNX_Op<"Hardmax",
+  [NoSideEffect]> {
   let summary = "ONNX Hardmax operation";
   let description = [{
-    "The operator computes the hardmax (1 for the first maximum value, and 0 for all others) values for each layer in the batch"
-    " of the given input."
-    ""
-    "The input does not need to explicitly be a 2D vector; rather, it will be"
-    "coerced into one. For an arbitrary n-dimensional tensor"
-    "input \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1\}\] and k is"
-    "the axis provided, then input will be coerced into a 2-dimensional tensor with"
-    "dimensions [a_0 * ... * a_{k-1}, a_k * ... * a_{n-1\}\]. For the default"
-    "case where axis=1, this means the input tensor will be coerced into a 2D tensor"
-    "of dimensions [a_0, a_1 * ... * a_{n-1\}\], where a_0 is often the batch size."
-    "In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D."
-    "Each of these dimensions must be matched correctly, or else the operator"
-    "will throw errors. The output tensor has the same shape"
-    "and contains the hardmax values of the corresponding input."
+  "The operator computes the hardmax (1 for the first maximum value, and 0 for all others) values for each layer in the batch"
+  " of the given input."
+  ""
+  "The input does not need to explicitly be a 2D vector; rather, it will be"
+  "coerced into one. For an arbitrary n-dimensional tensor"
+  "input \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1\}\] and k is"
+  "the axis provided, then input will be coerced into a 2-dimensional tensor with"
+  "dimensions [a_0 * ... * a_{k-1}, a_k * ... * a_{n-1\}\]. For the default"
+  "case where axis=1, this means the input tensor will be coerced into a 2D tensor"
+  "of dimensions [a_0, a_1 * ... * a_{n-1\}\], where a_0 is often the batch size."
+  "In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D."
+  "Each of these dimensions must be matched correctly, or else the operator"
+  "will throw errors. The output tensor has the same shape"
+  "and contains the hardmax values of the corresponding input."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           DefaultValuedAttr<I64Attr, "1">:$axis);
+    DefaultValuedAttr<I64Attr, "1">:$axis);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXIdentityOp:ONNX_Op<"Identity", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXIdentityOp:ONNX_Op<"Identity",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let hasCanonicalizer = 1;
   let summary = "ONNX Identity operation";
   let description = [{
-    "Identity operator"
+  "Identity operator"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXIfOp:ONNX_Op<"If", 
-    [NoSideEffect]> {
+def ONNXIfOp:ONNX_Op<"If",
+  [NoSideEffect]> {
   let summary = "ONNX If operation";
   let description = [{
-    "If conditional"
+  "If conditional"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$cond,
-           AnyAttr:$else_branch,
-           AnyAttr:$then_branch);
+    AnyAttr:$else_branch,
+    AnyAttr:$then_branch);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$outputs);
 }
 
-def ONNXInstanceNormalizationOp:ONNX_Op<"InstanceNormalization", 
-    [NoSideEffect]> {
+def ONNXInstanceNormalizationOp:ONNX_Op<"InstanceNormalization",
+  [NoSideEffect]> {
   let summary = "ONNX InstanceNormalization operation";
   let description = [{
-    "Carries out instance normalization as described in the paper"
-    "https://arxiv.org/abs/1607.08022."
-    ""
-    "y = scale * (x - mean) / sqrt(variance + epsilon) + B,"
-    "where mean and variance are computed per instance per channel."
-    ""
+  "Carries out instance normalization as described in the paper"
+  "https://arxiv.org/abs/1607.08022."
+  ""
+  "y = scale * (x - mean) / sqrt(variance + epsilon) + B,"
+  "where mean and variance are computed per instance per channel."
+  ""
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$scale,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
-           DefaultValuedAttr<F32Attr, "1e-05">:$epsilon);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$scale,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
+    DefaultValuedAttr<F32Attr, "1e-05">:$epsilon);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXIsInfOp:ONNX_Op<"IsInf", 
-    [NoSideEffect]> {
+def ONNXIsInfOp:ONNX_Op<"IsInf",
+  [NoSideEffect]> {
   let summary = "ONNX IsInf operation";
   let description = [{
-    "Map infinity to true and other values to false."
+  "Map infinity to true and other values to false."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           DefaultValuedAttr<I64Attr, "1">:$detect_negative,
-           DefaultValuedAttr<I64Attr, "1">:$detect_positive);
+    DefaultValuedAttr<I64Attr, "1">:$detect_negative,
+    DefaultValuedAttr<I64Attr, "1">:$detect_positive);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXIsNaNOp:ONNX_Op<"IsNaN", 
-    [NoSideEffect]> {
+def ONNXIsNaNOp:ONNX_Op<"IsNaN",
+  [NoSideEffect]> {
   let summary = "ONNX IsNaN operation";
   let description = [{
-    "Returns which elements of the input are NaN."
+  "Returns which elements of the input are NaN."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXLRNOp:ONNX_Op<"LRN", 
-    [NoSideEffect]> {
+def ONNXLRNOp:ONNX_Op<"LRN",
+  [NoSideEffect]> {
   let summary = "ONNX LRN operation";
   let description = [{
-    "Local Response Normalization proposed in the [AlexNet paper](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)."
-    "It normalizes over local input regions."
-    "The local region is defined across the channels. For an element X[n, c, d1, ..., dk] in a tensor"
-    "of shape (N x C x D1 x D2, ..., Dk), its region is"
-    "{X[n, i, d1, ..., dk] | max(0, c - floor((size - 1) / 2)) <= i <= min(C - 1, c + ceil((size - 1) / 2))}."
-    ""
-    "square_sum[n, c, d1, ..., dk] = sum(X[n, i, d1, ..., dk] ^ 2),"
-    "where max(0, c - floor((size - 1) / 2)) <= i <= min(C - 1, c + ceil((size - 1) / 2))."
-    ""
-    "Y[n, c, d1, ..., dk] = X[n, c, d1, ..., dk] / (bias + alpha / size * square_sum[n, c, d1, ..., dk] ) ^ beta"
+  "Local Response Normalization proposed in the [AlexNet paper](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)."
+  "It normalizes over local input regions."
+  "The local region is defined across the channels. For an element X[n, c, d1, ..., dk] in a tensor"
+  "of shape (N x C x D1 x D2, ..., Dk), its region is"
+  "{X[n, i, d1, ..., dk] | max(0, c - floor((size - 1) / 2)) <= i <= min(C - 1, c + ceil((size - 1) / 2))}."
+  ""
+  "square_sum[n, c, d1, ..., dk] = sum(X[n, i, d1, ..., dk] ^ 2),"
+  "where max(0, c - floor((size - 1) / 2)) <= i <= min(C - 1, c + ceil((size - 1) / 2))."
+  ""
+  "Y[n, c, d1, ..., dk] = X[n, c, d1, ..., dk] / (bias + alpha / size * square_sum[n, c, d1, ..., dk] ) ^ beta"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           DefaultValuedAttr<F32Attr, "0.0001">:$alpha,
-           DefaultValuedAttr<F32Attr, "0.75">:$beta,
-           DefaultValuedAttr<F32Attr, "1.0">:$bias,
-           I64Attr:$size);
+    DefaultValuedAttr<F32Attr, "0.0001">:$alpha,
+    DefaultValuedAttr<F32Attr, "0.75">:$beta,
+    DefaultValuedAttr<F32Attr, "1.0">:$bias,
+    I64Attr:$size);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXLSTMOp:ONNX_Op<"LSTM", 
-    [NoSideEffect]> {
+def ONNXLSTMOp:ONNX_Op<"LSTM",
+  [NoSideEffect]> {
   let summary = "ONNX LSTM operation";
   let description = [{
-    "Computes an one-layer LSTM. This operator is usually supported via some"
-    "custom implementation such as CuDNN."
-    ""
-    "Notations:"
-    ""
-    "`X` - input tensor"
-    ""
-    "`i` - input gate"
-    ""
-    "`o` - output gate"
-    ""
-    "`f` - forget gate"
-    ""
-    "`c` - cell gate"
-    ""
-    "`t` - time step (t-1 means previous time step)"
-    ""
-    "`W[iofc]` - W parameter weight matrix for input, output, forget, and cell gates"
-    ""
-    "`R[iofc]` - R recurrence weight matrix for input, output, forget, and cell gates"
-    ""
-    "`Wb[iofc]` - W bias vectors for input, output, forget, and cell gates"
-    ""
-    "`Rb[iofc]` - R bias vectors for input, output, forget, and cell gates"
-    ""
-    "`P[iof]`  - P peephole weight vector for input, output, and forget gates"
-    ""
-    "`WB[iofc]` - W parameter weight matrix for backward input, output, forget, and cell gates"
-    ""
-    "`RB[iofc]` - R recurrence weight matrix for backward input, output, forget, and cell gates"
-    ""
-    "`WBb[iofc]` - W bias vectors for backward input, output, forget, and cell gates"
-    ""
-    "`RBb[iofc]` - R bias vectors for backward input, output, forget, and cell gates"
-    ""
-    "`PB[iof]`  - P peephole weight vector for backward input, output, and forget gates"
-    ""
-    "`H` - Hidden state"
-    ""
-    "`num_directions` - 2 if direction == bidirectional else 1"
-    ""
-    "Activation functions:"
-    ""
-    "  Relu(x)                - max(0, x)"
-    ""
-    "  Tanh(x)                - (1 - e^{-2x})/(1 + e^{-2x})"
-    ""
-    "  Sigmoid(x)             - 1/(1 + e^{-x})"
-    ""
-    "  (NOTE: Below are optional)"
-    ""
-    "  Affine(x)              - alpha*x + beta"
-    ""
-    "  LeakyRelu(x)           - x if x >= 0 else alpha * x"
-    ""
-    "  ThresholdedRelu(x)     - x if x >= alpha else 0"
-    ""
-    "  ScaledTanh(x)          - alpha*Tanh(beta*x)"
-    ""
-    "  HardSigmoid(x)         - min(max(alpha*x + beta, 0), 1)"
-    ""
-    "  Elu(x)                 - x if x >= 0 else alpha*(e^x - 1)"
-    ""
-    "  Softsign(x)            - x/(1 + |x|)"
-    ""
-    "  Softplus(x)            - log(1 + e^x)"
-    ""
-    "Equations (Default: f=Sigmoid, g=Tanh, h=Tanh):"
-    ""
-    "  - it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi)"
-    ""
-    "  - ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf)"
-    ""
-    "  - ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc)"
-    ""
-    "  - Ct = ft (.) Ct-1 + it (.) ct"
-    ""
-    "  - ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo)"
-    ""
-    "  - Ht = ot (.) h(Ct)"
-    "This operator has **optional** inputs/outputs. See [the doc](IR.md) for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted."
+  "Computes an one-layer LSTM. This operator is usually supported via some"
+  "custom implementation such as CuDNN."
+  ""
+  "Notations:"
+  ""
+  "`X` - input tensor"
+  ""
+  "`i` - input gate"
+  ""
+  "`o` - output gate"
+  ""
+  "`f` - forget gate"
+  ""
+  "`c` - cell gate"
+  ""
+  "`t` - time step (t-1 means previous time step)"
+  ""
+  "`W[iofc]` - W parameter weight matrix for input, output, forget, and cell gates"
+  ""
+  "`R[iofc]` - R recurrence weight matrix for input, output, forget, and cell gates"
+  ""
+  "`Wb[iofc]` - W bias vectors for input, output, forget, and cell gates"
+  ""
+  "`Rb[iofc]` - R bias vectors for input, output, forget, and cell gates"
+  ""
+  "`P[iof]`  - P peephole weight vector for input, output, and forget gates"
+  ""
+  "`WB[iofc]` - W parameter weight matrix for backward input, output, forget, and cell gates"
+  ""
+  "`RB[iofc]` - R recurrence weight matrix for backward input, output, forget, and cell gates"
+  ""
+  "`WBb[iofc]` - W bias vectors for backward input, output, forget, and cell gates"
+  ""
+  "`RBb[iofc]` - R bias vectors for backward input, output, forget, and cell gates"
+  ""
+  "`PB[iof]`  - P peephole weight vector for backward input, output, and forget gates"
+  ""
+  "`H` - Hidden state"
+  ""
+  "`num_directions` - 2 if direction == bidirectional else 1"
+  ""
+  "Activation functions:"
+  ""
+  "  Relu(x)                - max(0, x)"
+  ""
+  "  Tanh(x)                - (1 - e^{-2x})/(1 + e^{-2x})"
+  ""
+  "  Sigmoid(x)             - 1/(1 + e^{-x})"
+  ""
+  "  (NOTE: Below are optional)"
+  ""
+  "  Affine(x)              - alpha*x + beta"
+  ""
+  "  LeakyRelu(x)           - x if x >= 0 else alpha * x"
+  ""
+  "  ThresholdedRelu(x)     - x if x >= alpha else 0"
+  ""
+  "  ScaledTanh(x)          - alpha*Tanh(beta*x)"
+  ""
+  "  HardSigmoid(x)         - min(max(alpha*x + beta, 0), 1)"
+  ""
+  "  Elu(x)                 - x if x >= 0 else alpha*(e^x - 1)"
+  ""
+  "  Softsign(x)            - x/(1 + |x|)"
+  ""
+  "  Softplus(x)            - log(1 + e^x)"
+  ""
+  "Equations (Default: f=Sigmoid, g=Tanh, h=Tanh):"
+  ""
+  "  - it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi)"
+  ""
+  "  - ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf)"
+  ""
+  "  - ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc)"
+  ""
+  "  - Ct = ft (.) Ct-1 + it (.) ct"
+  ""
+  "  - ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo)"
+  ""
+  "  - Ht = ot (.) h(Ct)"
+  "This operator has **optional** inputs/outputs. See [the doc](IR.md) for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$W,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$R,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$sequence_lens,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$initial_h,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$initial_c,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$P,
-           OptionalAttr<F32ArrayAttr>:$activation_alpha,
-           OptionalAttr<F32ArrayAttr>:$activation_beta,
-           OptionalAttr<StrArrayAttr>:$activations,
-           OptionalAttr<F32Attr>:$clip,
-           DefaultValuedAttr<StrAttr, "forward">:$direction,
-           OptionalAttr<I64Attr>:$hidden_size,
-           DefaultValuedAttr<I64Attr, "0">:$input_forget);
-  let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y_h,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y_c);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$W,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$R,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$B,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$sequence_lens,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$initial_h,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$initial_c,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$P,
+    OptionalAttr<F32ArrayAttr>:$activation_alpha,
+    OptionalAttr<F32ArrayAttr>:$activation_beta,
+    OptionalAttr<StrArrayAttr>:$activations,
+    OptionalAttr<F32Attr>:$clip,
+    DefaultValuedAttr<StrAttr, "forward">:$direction,
+    OptionalAttr<I64Attr>:$hidden_size,
+    DefaultValuedAttr<I64Attr, "0">:$input_forget);
+  let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$Y,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$Y_h,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$Y_c);
 }
 
-def ONNXLeakyReluOp:ONNX_Op<"LeakyRelu", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXLeakyReluOp:ONNX_Op<"LeakyRelu",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX LeakyRelu operation";
   let description = [{
-    "LeakyRelu takes input data (Tensor<T>) and an argument alpha, and produces one"
-    "output data (Tensor<T>) where the function `f(x) = alpha * x for x < 0`,"
-    "`f(x) = x for x >= 0`, is applied to the data tensor elementwise."
+  "LeakyRelu takes input data (Tensor<T>) and an argument alpha, and produces one"
+  "output data (Tensor<T>) where the function `f(x) = alpha * x for x < 0`,"
+  "`f(x) = x for x >= 0`, is applied to the data tensor elementwise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           DefaultValuedAttr<F32Attr, "0.01">:$alpha);
+    DefaultValuedAttr<F32Attr, "0.01">:$alpha);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXLessOp:ONNX_Op<"Less", 
-    [NoSideEffect]> {
+def ONNXLessOp:ONNX_Op<"Less",
+  [NoSideEffect]> {
   let summary = "ONNX Less operation";
   let description = [{
-    "Returns the tensor resulted from performing the `less` logical operation"
-    "elementwise on the input tensors `A` and `B` (with Numpy-style broadcasting support)."
-    ""
-    "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Returns the tensor resulted from performing the `less` logical operation"
+  "elementwise on the input tensors `A` and `B` (with Numpy-style broadcasting support)."
+  ""
+  "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$A,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$C);
 }
 
-def ONNXLogOp:ONNX_Op<"Log", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXLogOp:ONNX_Op<"Log",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Log operation";
   let description = [{
-    "Calculates the natural log of the given input tensor, element-wise."
+  "Calculates the natural log of the given input tensor, element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXLogSoftmaxOp:ONNX_Op<"LogSoftmax", 
-    [NoSideEffect]> {
+def ONNXLogSoftmaxOp:ONNX_Op<"LogSoftmax",
+  [NoSideEffect]> {
   let summary = "ONNX LogSoftmax operation";
   let description = [{
-    "The operator computes the logsoftmax (log of softmax) values for each layer in the batch"
-    " of the given input."
-    ""
-    "The input does not need to explicitly be a 2D vector; rather, it will be"
-    "coerced into one. For an arbitrary n-dimensional tensor"
-    "input \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1\}\] and k is"
-    "the axis provided, then input will be coerced into a 2-dimensional tensor with"
-    "dimensions [a_0 * ... * a_{k-1}, a_k * ... * a_{n-1\}\]. For the default"
-    "case where axis=1, this means the input tensor will be coerced into a 2D tensor"
-    "of dimensions [a_0, a_1 * ... * a_{n-1\}\], where a_0 is often the batch size."
-    "In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D."
-    "Each of these dimensions must be matched correctly, or else the operator"
-    "will throw errors. The output tensor has the same shape"
-    "and contains the logsoftmax values of the corresponding input."
+  "The operator computes the logsoftmax (log of softmax) values for each layer in the batch"
+  " of the given input."
+  ""
+  "The input does not need to explicitly be a 2D vector; rather, it will be"
+  "coerced into one. For an arbitrary n-dimensional tensor"
+  "input \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1\}\] and k is"
+  "the axis provided, then input will be coerced into a 2-dimensional tensor with"
+  "dimensions [a_0 * ... * a_{k-1}, a_k * ... * a_{n-1\}\]. For the default"
+  "case where axis=1, this means the input tensor will be coerced into a 2D tensor"
+  "of dimensions [a_0, a_1 * ... * a_{n-1\}\], where a_0 is often the batch size."
+  "In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D."
+  "Each of these dimensions must be matched correctly, or else the operator"
+  "will throw errors. The output tensor has the same shape"
+  "and contains the logsoftmax values of the corresponding input."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           DefaultValuedAttr<I64Attr, "1">:$axis);
+    DefaultValuedAttr<I64Attr, "1">:$axis);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXLoopOp:ONNX_Op<"Loop", 
-    [NoSideEffect]> {
+def ONNXLoopOp:ONNX_Op<"Loop",
+  [NoSideEffect]> {
   let summary = "ONNX Loop operation";
   let description = [{
-    "Generic Looping construct. This loop has multiple termination conditions:"
-    ""
-    "1) Trip count. Iteration count specified at runtime. Set by"
-    "   specifying the input M. Optional. Set to empty string to omit."
-    "   Note that a static trip count (specified at graph construction time) can be"
-    "   specified by passing in a constant node for input M."
-    "2) Loop termination condition. This is an input to the op that determines"
-    "   whether to run the first iteration and also a loop-carried dependency for"
-    "   the body graph. The body graph must yield a value for the condition variable,"
-    "   whether this input is provided or not."
-    ""
-    "This table summarizes the operating modes of this operator with equivalent"
-    "C-style code:"
-    ""
-    "    Operator inputs defined as (max_trip_count, condition_var)."
-    ""
-    "    input ("", ""):"
-    "        for (int i=0; ; ++i) {"
-    "          cond = ... // Note this value is ignored, but is required in the body"
-    "        }"
-    ""
-    "    input ("", cond) // Note this is analogous to a while loop"
-    "        bool cond = ...;"
-    "        for (int i=0; cond; ++i) {"
-    "          cond = ...;"
-    "        }"
-    ""
-    "    input ("", 1) // Note this is analogous to a do-while loop"
-    "        bool cond = true"
-    "        for (int i=0; cond; ++i) {"
-    "          cond = ...;"
-    "        }"
-    ""
-    "    input (trip_count, "") // Note this is analogous to a for loop"
-    "        int trip_count = ..."
-    "        for (int i=0; i < trip_count; ++i) {"
-    "          cond = ...; // ignored"
-    "        }"
-    ""
-    "    input (trip_count, cond)"
-    "        int trip_count = ...;"
-    "        bool cond = ...;"
-    "        for (int i=0; i < trip_count && cond; ++i) {"
-    "          cond = ...;"
-    "        }"
-    ""
-    ""
-    "*Sample usage - cond as well as trip count*"
-    ""
-    "    graph predict-net {"
-    "      %a = Constant[value = <Scalar Tensor [3]>]()"
-    "      %b = Constant[value = <Scalar Tensor [6]>]()"
-    "      %keepgoing = Constant[value = <Scalar Tensor [1]>]()"
-    "      %max_trip_count = Constant[value = <Scalar Tensor [10]>]()"
-    "      %keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph body-net>](%max_trip_count, %keepgoing, %b)"
-    "      return"
-    "    }"
-    ""
-    "    graph body-net ("
-    "      %i[INT32, scalar]           // iteration number"
-    "      %keepgoing_in[BOOL, scalar] // incoming loop-termination-condition; not used"
-    "      %b_in[INT32, scalar]        // incoming value of loop-carried-dependency b"
-    "    ) {"
-    "      %my_local = Add(%a, %b_in)"
-    "      %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-dependency b"
-    "      %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-termination-condition"
-    "      %user_defined_val = Add(%b_in, %b_in) // scan-output value to be accumulated"
-    "      return %keepgoing_out, %b_out, %user_defined_val"
-    "    }"
-    ""
-    "*Sample equivalent C code*"
-    ""
-    "    {"
-    "      /* User-defined code (enclosing scope) */"
-    "      int a = 3, b = 6;"
-    "      bool keepgoing = true; // Analogous to input cond"
-    "      /* End user-defined code */"
-    ""
-    "      /* Implicitly-defined code */"
-    "      const int max_trip_count = 10; // Analogous to input M"
-    "      int user_defined_vals[]; // Imagine this is resizable"
-    "      /* End implicitly-defined code */"
-    "      /* initialize loop-carried variables and scan-output variables */"
-    "      bool keepgoing_out = keepgoing"
-    "      int b_out = b"
-    ""
-    "      for (int i=0; i < max_trip_count && keepgoing_out; ++i) {"
-    "        /* Implicitly-defined code: bind actual parameter values"
-    "           to formal parameter variables of loop-body */"
-    "        bool keepgoing_in = keepgoing_out; "
-    "        bool b_in = b_out;"
-    ""
-    "        /* User-defined code (loop body) */"
-    "        int my_local = a + b_in; // Reading value "a" from the enclosing scope is fine"
-    "        b_out = a - b_in;"
-    "        keepgoing_out = my_local > b_out; "
-    "        user_defined_val = b_in + b_in; // b_in and b_out are different variables"
-    "        /* End user-defined code */"
-    ""
-    "        /* Implicitly defined-code */"
-    "        user_defined_vals[i] = user_defined_val // accumulate scan-output values"
-    "      }"
-    "      // int t = my_local; // Can't do this. my_local is not accessible here."
-    ""
-    "      // The values below are bound to the output variables of the loop and therefore accessible"
-    "      // b_out; user_defined_vals; keepgoing_out;"
-    "    }"
-    ""
-    "There are several things of note in this code snippet:"
-    ""
-    "1) Values from the enclosing scope (i.e. variable "a" here) are in scope and can"
-    "   be referenced in the inputs of the loop."
-    "2) Any values computed in the loop body that needs to be used in a subsequent"
-    "   iteration or after the loop are modelled using a pair of variables in the loop-body,"
-    "   consisting of an input variable (eg., b_in) and an output variable (eg., b_out)."
-    "   These are referred to as loop-carried dependences. The loop operation node"
-    "   supplies the input value of the input variable for the first iteration, and"
-    "   returns the output value of the output variable produced by the final"
-    "   iteration."
-    "3) Scan_output variables are used to implicitly concatenate values computed across"
-    "   all the iterations. In the above example, the value of user_defined_val computed"
-    "   over all iterations are concatenated and returned as the value of user_defined_vals"
-    "   after the loop."
-    "4) Values created in the body cannot be accessed in the enclosing scope,"
-    "   except using the mechanism described above."
-    ""
-    "Note that the semantics of this op support "diagonal" or "wavefront" execution."
-    "(See Step 3 here for an example:"
-    "https://devblogs.nvidia.com/optimizing-recurrent-neural-networks-cudnn-5/)."
-    "Frontends should emit multi-layer RNNs as a series of While operators (with"
-    "time being the inner looping dimension), with each successive layer consuming"
-    "the scan_outputs from the previous layer, possibly going through several"
-    "point-wise operators (e.g. dropout, residual connections, linear layer)."
+  "Generic Looping construct. This loop has multiple termination conditions:"
+  ""
+  "1) Trip count. Iteration count specified at runtime. Set by"
+  "   specifying the input M. Optional. Set to empty string to omit."
+  "   Note that a static trip count (specified at graph construction time) can be"
+  "   specified by passing in a constant node for input M."
+  "2) Loop termination condition. This is an input to the op that determines"
+  "   whether to run the first iteration and also a loop-carried dependency for"
+  "   the body graph. The body graph must yield a value for the condition variable,"
+  "   whether this input is provided or not."
+  ""
+  "This table summarizes the operating modes of this operator with equivalent"
+  "C-style code:"
+  ""
+  "    Operator inputs defined as (max_trip_count, condition_var)."
+  ""
+  "    input (\"\", \"\"):"
+  "        for (int i=0; ; ++i) {"
+  "          cond = ... // Note this value is ignored, but is required in the body"
+  "        }"
+  ""
+  "    input (\"\", cond) // Note this is analogous to a while loop"
+  "        bool cond = ...;"
+  "        for (int i=0; cond; ++i) {"
+  "          cond = ...;"
+  "        }"
+  ""
+  "    input (\"\", 1) // Note this is analogous to a do-while loop"
+  "        bool cond = true"
+  "        for (int i=0; cond; ++i) {"
+  "          cond = ...;"
+  "        }"
+  ""
+  "    input (trip_count, \"\") // Note this is analogous to a for loop"
+  "        int trip_count = ..."
+  "        for (int i=0; i < trip_count; ++i) {"
+  "          cond = ...; // ignored"
+  "        }"
+  ""
+  "    input (trip_count, cond)"
+  "        int trip_count = ...;"
+  "        bool cond = ...;"
+  "        for (int i=0; i < trip_count && cond; ++i) {"
+  "          cond = ...;"
+  "        }"
+  ""
+  ""
+  "*Sample usage - cond as well as trip count*"
+  ""
+  "    graph predict-net {"
+  "      %a = Constant[value = <Scalar Tensor [3]>]()"
+  "      %b = Constant[value = <Scalar Tensor [6]>]()"
+  "      %keepgoing = Constant[value = <Scalar Tensor [1]>]()"
+  "      %max_trip_count = Constant[value = <Scalar Tensor [10]>]()"
+  "      %keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph body-net>](%max_trip_count, %keepgoing, %b)"
+  "      return"
+  "    }"
+  ""
+  "    graph body-net ("
+  "      %i[INT32, scalar]"
+  "      %keepgoing[BOOL, scalar]"
+  "      %b[INT32, scalar]"
+  "    ) {"
+  "      %my_local = Add(%a, %b)"
+  "      %b_out = Sub(%a, %b)"
+  "      %keepgoing_out = Greater(%my_local, %b_out)"
+  "      %user_defined_vals = Add(%b, %b)"
+  "      return %keepgoing_out, %b_out, %user_defined_vals"
+  "    }"
+  ""
+  "*Sample equivalent C code*"
+  ""
+  "    {"
+  "      /* User-defined code (enclosing scope) */"
+  "      int a = 3, b = 6;"
+  "      bool keepgoing = true; // Analogous to input cond"
+  "      /* End user-defined code */"
+  ""
+  "      /* Implicitly-defined code */"
+  "      const int max_trip_count = 10; // Analogous to input M"
+  "      int user_defined_vals[]; // Imagine this is resizable"
+  "      /* End implicitly-defined code */"
+  "      for (int i=0; i < max_trip_count && keepgoing; ++i) {"
+  "        /* User-defined code (loop body) */"
+  "        int my_local = a + b; // Reading values in the enclosing scope is fine"
+  "        b = a - b; // writes fine if we specify b as a loop-carried dependency"
+  "        keepgoing = my_local > b; // keepgoing is a loop-carried dependency"
+  "        user_defined_vals[i] = b + b;"
+  "        /* End user-defined code */"
+  "      }"
+  "      // my_local = 123; // Can't do this. my_local was defined in the the body"
+  ""
+  "      // These below values are live-out from the loop and therefore accessible"
+  "      b_out; user_defined_vals; keepgoing_out;"
+  "    }"
+  ""
+  "There are several things of note in this code snippet:"
+  ""
+  "1) Values from the enclosing scope (i.e. variable a here) are in scope and can"
+  "   be referenced in the inputs of the loop."
+  "2) Any variables which you wish to make available in the enclosing scope (i.e."
+  "   the variables b and keepgoing) must be declared as either loop-carried"
+  "   dependencies (both at the op inputs and output and at the body net input and"
+  "   output) or scan_outputs."
+  "3) Values created in the body cannot be accessed in the enclosing scope."
+  ""
+  "Note that the semantics of this op support \"diagonal\" or \"wavefront\" execution."
+  "(See Step 3 here for an example:"
+  "https://devblogs.nvidia.com/optimizing-recurrent-neural-networks-cudnn-5/)."
+  "Frontends should emit multi-layer RNNs as a series of While operators (with"
+  "time being the inner looping dimension), with each successive layer consuming"
+  "the scan_outputs from the previous layer, possibly going through several"
+  "point-wise operators (e.g. dropout, residual connections, linear layer)."
   }];
-  let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$M,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$cond,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$v_initial,
-           AnyAttr:$body);
+  let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$M,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$cond,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$v_initial,
+    AnyAttr:$body);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$v_final_and_scan_outputs);
 }
 
-def ONNXLpNormalizationOp:ONNX_Op<"LpNormalization", 
-    [NoSideEffect]> {
+def ONNXLpNormalizationOp:ONNX_Op<"LpNormalization",
+  [NoSideEffect]> {
   let summary = "ONNX LpNormalization operation";
   let description = [{
-    "Given a matrix, apply Lp-normalization along the provided axis."
+  "Given a matrix, apply Lp-normalization along the provided axis."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           DefaultValuedAttr<I64Attr, "-1">:$axis,
-           DefaultValuedAttr<I64Attr, "2">:$p);
+    DefaultValuedAttr<I64Attr, "-1">:$axis,
+    DefaultValuedAttr<I64Attr, "2">:$p);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXLpPoolOp:ONNX_Op<"LpPool", 
-    [NoSideEffect]> {
+def ONNXLpPoolOp:ONNX_Op<"LpPool",
+  [NoSideEffect]> {
   let summary = "ONNX LpPool operation";
   let description = [{
-    "LpPool consumes an input tensor X and applies Lp pooling across"
-    " the tensor according to kernel sizes, stride sizes, and pad lengths."
-    " Lp pooling consisting of computing the Lp norm on all values of a subset"
-    " of the input tensor according to the kernel size and downsampling the"
-    " data into the output tensor Y for further processing."
+  "LpPool consumes an input tensor X and applies Lp pooling across"
+  " the tensor according to kernel sizes, stride sizes, and pad lengths."
+  " Lp pooling consisting of computing the Lp norm on all values of a subset"
+  " of the input tensor according to the kernel size and downsampling the"
+  " data into the output tensor Y for further processing."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           DefaultValuedAttr<StrAttr, "NOTSET">:$auto_pad,
-           I64ArrayAttr:$kernel_shape,
-           DefaultValuedAttr<I64Attr, "2">:$p,
-           OptionalAttr<I64ArrayAttr>:$pads,
-           OptionalAttr<I64ArrayAttr>:$strides);
+    DefaultValuedAttr<StrAttr, "NOTSET">:$auto_pad,
+    I64ArrayAttr:$kernel_shape,
+    DefaultValuedAttr<I64Attr, "2">:$p,
+    OptionalAttr<I64ArrayAttr>:$pads,
+    OptionalAttr<I64ArrayAttr>:$strides);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXMatMulOp:ONNX_Op<"MatMul", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXMatMulOp:ONNX_Op<"MatMul",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX MatMul operation";
   let description = [{
-    "Matrix product that behaves like numpy.matmul: https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.matmul.html"
+  "Matrix product that behaves like numpy.matmul: https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.matmul.html"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$A,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXMatMulIntegerOp:ONNX_Op<"MatMulInteger", 
-    [NoSideEffect]> {
+def ONNXMatMulIntegerOp:ONNX_Op<"MatMulInteger",
+  [NoSideEffect]> {
   let summary = "ONNX MatMulInteger operation";
   let description = [{
-    "Matrix product that behaves like numpy.matmul: https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.matmul.html."
-    "The production MUST never overflow. The accumulation may overflow if and only if in 32 bits."
+  "Matrix product that behaves like numpy.matmul: https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.matmul.html."
+  "The production MUST never overflow. The accumulation may overflow if and only if in 32 bits."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$A,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$a_zero_point,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$b_zero_point);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$a_zero_point,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$b_zero_point);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXMaxOp:ONNX_Op<"Max", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXMaxOp:ONNX_Op<"Max",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Max operation";
   let description = [{
-    "Element-wise max of each of the input tensors (with Numpy-style broadcasting support)."
-    "All inputs and outputs must have the same data type."
-    "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Element-wise max of each of the input tensors (with Numpy-style broadcasting support)."
+  "All inputs and outputs must have the same data type."
+  "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins Variadic<AnyTypeOf<[AnyMemRef, AnyTensor]>>:$data_0);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$max);
 }
 
-def ONNXMaxPoolOp:ONNX_Op<"MaxPool", 
-    [NoSideEffect]> {
+def ONNXMaxPoolOp:ONNX_Op<"MaxPool",
+  [NoSideEffect]> {
   let summary = "ONNX MaxPool operation";
   let description = [{
-    "MaxPool consumes an input tensor X and applies max pooling across"
-    " the tensor according to kernel sizes, stride sizes, and pad lengths."
-    " max pooling consisting of computing the max on all values of a"
-    " subset of the input tensor according to the kernel size and downsampling the"
-    " data into the output tensor Y for further processing. The output spatial shape will be following:"
-    " ```"
-    " output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - ((kernel_spatial_shape[i] - 1) * dilations[i] + 1)) / strides_spatial_shape[i] + 1)"
-    " ```"
-    " or"
-    " ```"
-    " output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - ((kernel_spatial_shape[i] - 1) * dilations[i] + 1)) / strides_spatial_shape[i] + 1)"
-    " ```"
-    " if ceil_mode is enabled"
-    ""
-    " ```"
-    " * pad_shape[i] is sum of pads along axis i"
-    " ```"
-    ""
-    " `auto_pad` is a DEPRECATED attribute. If you are using them currently, the output spatial shape will be following:"
-    " ```"
-    " VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - ((kernel_spatial_shape[i] - 1) * dilations[i] + 1) + 1) / strides_spatial_shape[i])"
-    " SAME_UPPER or SAME_LOWER: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i])"
-    " ```"
-    " And pad shape will be following if `SAME_UPPER` or `SAME_LOWER`:"
-    " ```"
-    " pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + ((kernel_spatial_shape[i] - 1) * dilations[i] + 1) - input_spatial_shape[i]"
-    " ```"
-    " The output of each pooling window is maximum number of elements exclude pad. "
-    " "
+  "MaxPool consumes an input tensor X and applies max pooling across"
+  " the tensor according to kernel sizes, stride sizes, and pad lengths."
+  " max pooling consisting of computing the max on all values of a"
+  " subset of the input tensor according to the kernel size and downsampling the"
+  " data into the output tensor Y for further processing. The output spatial shape will be following:"
+  " ```"
+  " output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - ((kernel_spatial_shape[i] - 1) * dilations[i] + 1)) / strides_spatial_shape[i] + 1)"
+  " ```"
+  " or"
+  " ```"
+  " output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - ((kernel_spatial_shape[i] - 1) * dilations[i] + 1)) / strides_spatial_shape[i] + 1)"
+  " ```"
+  " if ceil_mode is enabled"
+  ""
+  " ```"
+  " * pad_shape[i] is sum of pads along axis i"
+  " ```"
+  ""
+  " `auto_pad` is a DEPRECATED attribute. If you are using them currently, the output spatial shape will be following:"
+  " ```"
+  " VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - ((kernel_spatial_shape[i] - 1) * dilations[i] + 1) + 1) / strides_spatial_shape[i])"
+  " SAME_UPPER or SAME_LOWER: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i])"
+  " ```"
+  " And pad shape will be following if `SAME_UPPER` or `SAME_LOWER`:"
+  " ```"
+  " pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + ((kernel_spatial_shape[i] - 1) * dilations[i] + 1) - input_spatial_shape[i]"
+  " ```"
+  " The output of each pooling window is maximum number of elements exclude pad."
+  " "
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           DefaultValuedAttr<StrAttr, "NOTSET">:$auto_pad,
-           DefaultValuedAttr<I64Attr, "0">:$ceil_mode,
-           OptionalAttr<I64ArrayAttr>:$dilations,
-           I64ArrayAttr:$kernel_shape,
-           OptionalAttr<I64ArrayAttr>:$pads,
-           DefaultValuedAttr<I64Attr, "0">:$storage_order,
-           OptionalAttr<I64ArrayAttr>:$strides);
+    DefaultValuedAttr<StrAttr, "NOTSET">:$auto_pad,
+    DefaultValuedAttr<I64Attr, "0">:$ceil_mode,
+    OptionalAttr<I64ArrayAttr>:$dilations,
+    I64ArrayAttr:$kernel_shape,
+    OptionalAttr<I64ArrayAttr>:$pads,
+    DefaultValuedAttr<I64Attr, "0">:$storage_order,
+    OptionalAttr<I64ArrayAttr>:$strides);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$Indices);
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$Indices);
 }
 
-def ONNXMaxRoiPoolOp:ONNX_Op<"MaxRoiPool", 
-    [NoSideEffect]> {
+def ONNXMaxRoiPoolOp:ONNX_Op<"MaxRoiPool",
+  [NoSideEffect]> {
   let summary = "ONNX MaxRoiPool operation";
   let description = [{
-    "ROI max pool consumes an input tensor X and region of interests (RoIs) to"
-    " apply max pooling across each RoI, to produce output 4-D tensor of shape"
-    " (num_rois, channels, pooled_shape[0], pooled_shape[1])."
+  "ROI max pool consumes an input tensor X and region of interests (RoIs) to"
+  " apply max pooling across each RoI, to produce output 4-D tensor of shape"
+  " (num_rois, channels, pooled_shape[0], pooled_shape[1])."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$rois,
-           I64ArrayAttr:$pooled_shape,
-           DefaultValuedAttr<F32Attr, "1.0">:$spatial_scale);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$rois,
+    I64ArrayAttr:$pooled_shape,
+    DefaultValuedAttr<F32Attr, "1.0">:$spatial_scale);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXMaxUnpoolOp:ONNX_Op<"MaxUnpool", 
-    [NoSideEffect]> {
+def ONNXMaxUnpoolOp:ONNX_Op<"MaxUnpool",
+  [NoSideEffect]> {
   let summary = "ONNX MaxUnpool operation";
   let description = [{
-    "MaxUnpool essentially computes the partial inverse of the MaxPool op."
-    " The input information to this op is typically the the output information from a MaxPool op. The first"
-    " input tensor X is the tensor that needs to be unpooled, which is typically the pooled tensor (first output)"
-    " from MaxPool. The second input tensor, I, contains the indices to the (locally maximal) elements corrsponding"
-    " to the elements in the first input tensor X. Input tensor I is typically the second output of the MaxPool op."
-    " The third (optional) input is a tensor that specifies the output size of the unpooling operation."
-    ""
-    "MaxUnpool is intended to do 'partial' inverse of the MaxPool op. 'Partial' because all the non-maximal"
-    " values from the original input to MaxPool are set to zero in the output of the MaxUnpool op. Pooling"
-    " the result of an unpooling operation should give back the original input to the unpooling op."
-    ""
-    "MaxUnpool can produce the same output size for several input sizes, which makes unpooling op ambiguous."
-    " The third input argument, output_size, is meant to disambiguate the op and produce output tensor of"
-    " known/predictable size."
-    ""
-    "In addition to the inputs, MaxUnpool takes three attributes, namely kernel_shape, strides, and pads,"
-    " which define the exact unpooling op. The attributes typically have the same values as the corrsponding"
-    " pooling op that the unpooling op is trying to invert."
+  "MaxUnpool essentially computes the partial inverse of the MaxPool op."
+  " The input information to this op is typically the the output information from a MaxPool op. The first"
+  " input tensor X is the tensor that needs to be unpooled, which is typically the pooled tensor (first output)"
+  " from MaxPool. The second input tensor, I, contains the indices to the (locally maximal) elements corrsponding"
+  " to the elements in the first input tensor X. Input tensor I is typically the second output of the MaxPool op."
+  " The third (optional) input is a tensor that specifies the output size of the unpooling operation."
+  ""
+  "MaxUnpool is intended to do 'partial' inverse of the MaxPool op. 'Partial' because all the non-maximal"
+  " values from the original input to MaxPool are set to zero in the output of the MaxUnpool op. Pooling"
+  " the result of an unpooling operation should give back the original input to the unpooling op."
+  ""
+  "MaxUnpool can produce the same output size for several input sizes, which makes unpooling op ambiguous."
+  " The third input argument, output_size, is meant to disambiguate the op and produce output tensor of"
+  " known/predictable size."
+  ""
+  "In addition to the inputs, MaxUnpool takes three attributes, namely kernel_shape, strides, and pads,"
+  " which define the exact unpooling op. The attributes typically have the same values as the corrsponding"
+  " pooling op that the unpooling op is trying to invert."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$I,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$output_shape,
-           I64ArrayAttr:$kernel_shape,
-           OptionalAttr<I64ArrayAttr>:$pads,
-           OptionalAttr<I64ArrayAttr>:$strides);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$I,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$output_shape,
+    I64ArrayAttr:$kernel_shape,
+    OptionalAttr<I64ArrayAttr>:$pads,
+    OptionalAttr<I64ArrayAttr>:$strides);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXMeanOp:ONNX_Op<"Mean", 
-    [NoSideEffect]> {
+def ONNXMeanOp:ONNX_Op<"Mean",
+  [NoSideEffect]> {
   let summary = "ONNX Mean operation";
   let description = [{
-    "Element-wise mean of each of the input tensors (with Numpy-style broadcasting support)."
-    "All inputs and outputs must have the same data type."
-    "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Element-wise mean of each of the input tensors (with Numpy-style broadcasting support)."
+  "All inputs and outputs must have the same data type."
+  "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins Variadic<AnyTypeOf<[AnyMemRef, AnyTensor]>>:$data_0);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$mean);
 }
 
-def ONNXMeanVarianceNormalizationOp:ONNX_Op<"MeanVarianceNormalization", 
-    [NoSideEffect]> {
+def ONNXMeanVarianceNormalizationOp:ONNX_Op<"MeanVarianceNormalization",
+  [NoSideEffect]> {
   let summary = "ONNX MeanVarianceNormalization operation";
   let description = [{
-    "A MeanVarianceNormalization Function: Perform mean variance normalization"
-    "      on the input tensor X using formula: <br/> ``` (X-EX)/sqrt(E(X-EX)^2) ```"
+  "A MeanVarianceNormalization Function: Perform mean variance normalization"
+  "      on the input tensor X using formula: <br/> ``` (X-EX)/sqrt(E(X-EX)^2) ```"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           DefaultValuedAttr<I64ArrayAttr, "{0, 2, 3}">:$axes);
+    DefaultValuedAttr<I64ArrayAttr, "{0, 2, 3}">:$axes);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXMinOp:ONNX_Op<"Min", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXMinOp:ONNX_Op<"Min",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Min operation";
   let description = [{
-    "Element-wise min of each of the input tensors (with Numpy-style broadcasting support)."
-    "All inputs and outputs must have the same data type."
-    "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Element-wise min of each of the input tensors (with Numpy-style broadcasting support)."
+  "All inputs and outputs must have the same data type."
+  "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins Variadic<AnyTypeOf<[AnyMemRef, AnyTensor]>>:$data_0);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$min);
 }
 
-def ONNXModOp:ONNX_Op<"Mod", 
-    [NoSideEffect]> {
+def ONNXModOp:ONNX_Op<"Mod",
+  [NoSideEffect]> {
   let summary = "ONNX Mod operation";
   let description = [{
-    "Performs element-wise binary modulus (with Numpy-style broadcasting support). "
-    "    The sign of the remainder is the same as that of the Divisor."
-    "  "
-    "    Mod operator can also behave like C fmod() or numpy.fmod. In this case, the sign of the remainder however, will be the same as the Dividend "
-    "    (in contrast to integer mod). To force a behavior like numpy.fmod() an 'fmod' Attribute is provided."
-    "    This attribute is set to 0 by default causing the behavior to be like integer mod. "
-    "    Setting this attribute to 1 causes the remainder to be calculated similar to that of numpy.fmod()."
-    ""
-    "    If the input type is floating point, then `fmod` attribute must be set to 1."
-    "  "
-    "    In case of dividend being zero, the results will be platform dependent."
-    ""
-    "  This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Performs element-wise binary modulus (with Numpy-style broadcasting support). "
+  "    The sign of the remainder is the same as that of the Divisor."
+  "  "
+  "    Mod operator can also behave like C fmod() or numpy.fmod. In this case, the sign of the remainder however, will be the same as the Dividend "
+  "    (in contrast to integer mod). To force a behavior like numpy.fmod() an 'fmod' Attribute is provided."
+  "    This attribute is set to 0 by default causing the behavior to be like integer mod. "
+  "    Setting this attribute to 1 causes the remainder to be calculated similar to that of numpy.fmod()."
+  ""
+  "    If the input type is floating point, then `fmod` attribute must be set to 1."
+  "  "
+  "    In case of dividend being zero, the results will be platform dependent."
+  ""
+  "  This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$A,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
-           DefaultValuedAttr<I64Attr, "0">:$fmod);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
+    DefaultValuedAttr<I64Attr, "0">:$fmod);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$C);
 }
 
-def ONNXMulOp:ONNX_Op<"Mul", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXMulOp:ONNX_Op<"Mul",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Mul operation";
   let description = [{
-    "Performs element-wise binary multiplication (with Numpy-style broadcasting support)."
-    ""
-    "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Performs element-wise binary multiplication (with Numpy-style broadcasting support)."
+  ""
+  "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$A,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$C);
   let builders = [
     OpBuilder<"Builder *builder, OperationState &state, Value A, Value B", [{
@@ -1798,674 +1780,674 @@ def ONNXMulOp:ONNX_Op<"Mul",
       outputTypes.emplace_back(UnrankedTensorType::get(elementType));
       build(builder, state, outputTypes, operands, attributes);
     }]>
-  ];
+    ];
 }
 
-def ONNXMultinomialOp:ONNX_Op<"Multinomial", 
-    [NoSideEffect]> {
+def ONNXMultinomialOp:ONNX_Op<"Multinomial",
+  [NoSideEffect]> {
   let summary = "ONNX Multinomial operation";
   let description = [{
-    "Generate a tensor of samples from a multinomial distribution according to the probabilities"
-    "of each of the possible outcomes."
+  "Generate a tensor of samples from a multinomial distribution according to the probabilities"
+  "of each of the possible outcomes."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           DefaultValuedAttr<I64Attr, "6">:$dtype,
-           DefaultValuedAttr<I64Attr, "1">:$sample_size,
-           OptionalAttr<F32Attr>:$seed);
+    DefaultValuedAttr<I64Attr, "6">:$dtype,
+    DefaultValuedAttr<I64Attr, "1">:$sample_size,
+    OptionalAttr<F32Attr>:$seed);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXNegOp:ONNX_Op<"Neg", 
-    [NoSideEffect]> {
+def ONNXNegOp:ONNX_Op<"Neg",
+  [NoSideEffect]> {
   let summary = "ONNX Neg operation";
   let description = [{
-    "Neg takes one input data (Tensor<T>) and produces one output data"
-    "(Tensor<T>) where each element flipped sign, y = -x, is applied to"
-    "the tensor elementwise."
+  "Neg takes one input data (Tensor<T>) and produces one output data"
+  "(Tensor<T>) where each element flipped sign, y = -x, is applied to"
+  "the tensor elementwise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXNonMaxSuppressionOp:ONNX_Op<"NonMaxSuppression", 
-    [NoSideEffect]> {
+def ONNXNonMaxSuppressionOp:ONNX_Op<"NonMaxSuppression",
+  [NoSideEffect]> {
   let summary = "ONNX NonMaxSuppression operation";
   let description = [{
-    "Filter out boxes that have high intersection-over-union (IOU) overlap with previously selected boxes."
-    "Bounding boxes with score less than score_threshold are removed. Bounding box format is indicated by attribute center_point_box."
-    "Note that this algorithm is agnostic to where the origin is in the coordinate system and more generally is invariant to"
-    "orthogonal transformations and translations of the coordinate system; thus translating or reflections of the coordinate system"
-    "result in the same boxes being selected by the algorithm."
-    "The selected_indices output is a set of integers indexing into the input collection of bounding boxes representing the selected boxes."
-    "The bounding box coordinates corresponding to the selected indices can then be obtained using the Gather or GatherND operation."
+  "Filter out boxes that have high intersection-over-union (IOU) overlap with previously selected boxes."
+  "Bounding boxes with score less than score_threshold are removed. Bounding box format is indicated by attribute center_point_box."
+  "Note that this algorithm is agnostic to where the origin is in the coordinate system and more generally is invariant to"
+  "orthogonal transformations and translations of the coordinate system; thus translating or reflections of the coordinate system"
+  "result in the same boxes being selected by the algorithm."
+  "The selected_indices output is a set of integers indexing into the input collection of bounding boxes representing the selected boxes."
+  "The bounding box coordinates corresponding to the selected indices can then be obtained using the Gather or GatherND operation."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$boxes,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$scores,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$max_output_boxes_per_class,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$iou_threshold,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$score_threshold,
-           DefaultValuedAttr<I64Attr, "0">:$center_point_box);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$scores,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$max_output_boxes_per_class,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$iou_threshold,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$score_threshold,
+    DefaultValuedAttr<I64Attr, "0">:$center_point_box);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$selected_indices);
 }
 
-def ONNXNonZeroOp:ONNX_Op<"NonZero", 
-    [NoSideEffect]> {
+def ONNXNonZeroOp:ONNX_Op<"NonZero",
+  [NoSideEffect]> {
   let summary = "ONNX NonZero operation";
   let description = [{
-    "Returns the indices of the elements that are non-zero"
-    "    (in row-major order - by dimension)."
-    "    NonZero behaves similar to numpy.nonzero:"
-    "    https://docs.scipy.org/doc/numpy/reference/generated/numpy.nonzero.html"
+  "Returns the indices of the elements that are non-zero"
+  "    (in row-major order - by dimension)."
+  "    NonZero behaves similar to numpy.nonzero:"
+  "    https://docs.scipy.org/doc/numpy/reference/generated/numpy.nonzero.html"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXNotOp:ONNX_Op<"Not", 
-    [NoSideEffect]> {
+def ONNXNotOp:ONNX_Op<"Not",
+  [NoSideEffect]> {
   let summary = "ONNX Not operation";
   let description = [{
-    "Returns the negation of the input tensor element-wise."
+  "Returns the negation of the input tensor element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXOneHotOp:ONNX_Op<"OneHot", 
-    [NoSideEffect]> {
+def ONNXOneHotOp:ONNX_Op<"OneHot",
+  [NoSideEffect]> {
   let summary = "ONNX OneHot operation";
   let description = [{
-    "Produces a one-hot tensor based on inputs."
-    "    The locations represented by the index values in the 'indices' input tensor will have 'on_value'"
-    "    and the other locations will have 'off_value' in the output tensor, where 'on_value' and 'off_value'"
-    "    are specified as part of required input argument 'values', which is a two-element tensor of format"
-    "    [off_value, on_value]. The rank of the output tensor will be one greater than the rank of the"
-    "    input tensor. The additional dimension is for one-hot representation. The additional dimension will"
-    "    be inserted at the position specified by 'axis'. If 'axis' is not specified then then additional"
-    "    dimension will be inserted as the innermost dimension, i.e. axis=-1. The size of the additional"
-    "    dimension is specified by required scalar input 'depth'. The type of the output tensor is the same"
-    "    as the type of the 'values' input. Any entries in the 'indices' input tensor with values outside"
-    "    the range [-depth, depth-1] will result in one-hot representation with all 'off_value' values in the"
-    "    output tensor."
-    ""
-    "    when axis = 0:"
-    "    output[input[i, j, k], i, j, k] = 1 for all i, j, k and 0 otherwise."
-    ""
-    "    when axis = -1:"
-    "    output[i, j, k, input[i, j, k]] = 1 for all i, j, k and 0 otherwise."
-    ""
+  "Produces a one-hot tensor based on inputs."
+  "    The locations represented by the index values in the 'indices' input tensor will have 'on_value'"
+  "    and the other locations will have 'off_value' in the output tensor, where 'on_value' and 'off_value'"
+  "    are specified as part of required input argument 'values', which is a two-element tensor of format"
+  "    [off_value, on_value]. The rank of the output tensor will be one greater than the rank of the"
+  "    input tensor. The additional dimension is for one-hot representation. The additional dimension will"
+  "    be inserted at the position specified by 'axis'. If 'axis' is not specified then then additional"
+  "    dimension will be inserted as the innermost dimension, i.e. axis=-1. The size of the additional"
+  "    dimension is specified by required scalar input 'depth'. The type of the output tensor is the same"
+  "    as the type of the 'values' input. Any entries in the 'indices' input tensor with values outside"
+  "    the range [-depth, depth-1] will result in one-hot representation with all 'off_value' values in the"
+  "    output tensor."
+  ""
+  "    when axis = 0:"
+  "    output[input[i, j, k], i, j, k] = 1 for all i, j, k and 0 otherwise."
+  ""
+  "    when axis = -1:"
+  "    output[i, j, k, input[i, j, k]] = 1 for all i, j, k and 0 otherwise."
+  ""
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$indices,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$depth,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$values,
-           DefaultValuedAttr<I64Attr, "-1">:$axis);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$depth,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$values,
+    DefaultValuedAttr<I64Attr, "-1">:$axis);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXOrOp:ONNX_Op<"Or", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXOrOp:ONNX_Op<"Or",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Or operation";
   let description = [{
-    "Returns the tensor resulted from performing the `or` logical operation"
-    "elementwise on the input tensors `A` and `B` (with Numpy-style broadcasting support)."
-    ""
-    "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Returns the tensor resulted from performing the `or` logical operation"
+  "elementwise on the input tensors `A` and `B` (with Numpy-style broadcasting support)."
+  ""
+  "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$A,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$C);
 }
 
-def ONNXPReluOp:ONNX_Op<"PRelu", 
-    [NoSideEffect]> {
+def ONNXPReluOp:ONNX_Op<"PRelu",
+  [NoSideEffect]> {
   let summary = "ONNX PRelu operation";
   let description = [{
-    "PRelu takes input data (Tensor<T>) and slope tensor as input, and produces one"
-    "output data (Tensor<T>) where the function `f(x) = slope * x for x < 0`,"
-    "`f(x) = x for x >= 0`., is applied to the data tensor elementwise."
-    "This operator supports **unidirectional broadcasting** (tensor slope should be unidirectional broadcastable to input tensor X); for more details please check [the doc](Broadcasting.md)."
+  "PRelu takes input data (Tensor<T>) and slope tensor as input, and produces one"
+  "output data (Tensor<T>) where the function `f(x) = slope * x for x < 0`,"
+  "`f(x) = x for x >= 0`., is applied to the data tensor elementwise."
+  "This operator supports **unidirectional broadcasting** (tensor slope should be unidirectional broadcastable to input tensor X); for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$slope);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$slope);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXPadOp:ONNX_Op<"Pad", 
-    [NoSideEffect]> {
+def ONNXPadOp:ONNX_Op<"Pad",
+  [NoSideEffect]> {
   let summary = "ONNX Pad operation";
   let description = [{
-    "Given a tensor containing the data to be padded (`data`), a tensor containing the number of start and end pad values for axis (`pads`), (optionally) a `mode`, and (optionally) `constant_value`, "
-    "a padded tensor (`output`) is generated."
-    ""
-    "The three supported `modes` are (similar to corresponding modes supported by `numpy.pad`):"
-    ""
-    "1) `constant`(default) - pads with a given constant value as specified by `constant_value` (which defaults to 0)"
-    ""
-    "2) `reflect` - pads with the reflection of the vector mirrored on the first and last values of the vector along each axis"
-    ""
-    "3) `edge` - pads with the edge values of array"
-    ""
-    ""
-    "Example 1 (`constant` mode):"
-    "  Insert 0 pads to the beginning of the second dimension."
-    ""
-    "  data = "
-    "  ["
-    "      [1.0, 1.2],"
-    "      [2.3, 3.4],"
-    "      [4.5, 5.7],"
-    "  ] "
-    ""
-    "  pads = [0, 2, 0, 0]"
-    ""
-    "  mode = 'constant'"
-    ""
-    "  constant_value = 0.0"
-    ""
-    "  output = "
-    "  ["
-    "      ["
-    "          [0.0, 0.0, 1.0, 1.2],"
-    "          [0.0, 0.0, 2.3, 3.4],"
-    "          [0.0, 0.0, 4.5, 5.7],"
-    "      ],"
-    "  ]"
-    ""
-    ""
-    "Example 2 (`reflect` mode):"
-    "  data = "
-    "  ["
-    "      [1.0, 1.2],"
-    "      [2.3, 3.4],"
-    "      [4.5, 5.7],"
-    "  ] "
-    ""
-    "  pads = [0, 2, 0, 0]"
-    ""
-    "  mode = 'reflect'"
-    ""
-    "  output = "
-    "  ["
-    "      ["
-    "          [1.0, 1.2, 1.0, 1.2],"
-    "          [2.3, 3.4, 2.3, 3.4],"
-    "          [4.5, 5.7, 4.5, 5.7],"
-    "      ],"
-    "  ]"
-    ""
-    ""
-    "Example 3 (`edge` mode):"
-    "  data = "
-    "  ["
-    "      [1.0, 1.2],"
-    "      [2.3, 3.4],"
-    "      [4.5, 5.7],"
-    "  ] "
-    ""
-    "  pads = [0, 2, 0, 0]"
-    ""
-    "  mode = 'edge'"
-    ""
-    "  output = "
-    "  ["
-    "      ["
-    "          [1.0, 1.0, 1.0, 1.2],"
-    "          [2.3, 2.3, 2.3, 3.4],"
-    "          [4.5, 4.5, 4.5, 5.7],"
-    "      ],"
-    "  ]"
-    ""
+  "Given a tensor containing the data to be padded (`data`), a tensor containing the number of start and end pad values for axis (`pads`), (optionally) a `mode`, and (optionally) `constant_value`, "
+  "a padded tensor (`output`) is generated."
+  ""
+  "The three supported `modes` are (similar to corresponding modes supported by `numpy.pad`):"
+  ""
+  "1) `constant`(default) - pads with a given constant value as specified by `constant_value` (which defaults to 0)"
+  ""
+  "2) `reflect` - pads with the reflection of the vector mirrored on the first and last values of the vector along each axis"
+  ""
+  "3) `edge` - pads with the edge values of array"
+  ""
+  ""
+  "Example 1 (`constant` mode):"
+  "  Insert 0 pads to the beginning of the second dimension."
+  ""
+  "  data = "
+  "  ["
+  "      [1.0, 1.2],"
+  "      [2.3, 3.4],"
+  "      [4.5, 5.7],"
+  "  ] "
+  ""
+  "  pads = [0, 2, 0, 0]"
+  ""
+  "  mode = 'constant'"
+  ""
+  "  constant_value = 0.0"
+  ""
+  "  output = "
+  "  ["
+  "      ["
+  "          [0.0, 0.0, 1.0, 1.2],"
+  "          [0.0, 0.0, 2.3, 3.4],"
+  "          [0.0, 0.0, 4.5, 5.7],"
+  "      ],"
+  "  ]"
+  ""
+  ""
+  "Example 2 (`reflect` mode):"
+  "  data = "
+  "  ["
+  "      [1.0, 1.2],"
+  "      [2.3, 3.4],"
+  "      [4.5, 5.7],"
+  "  ] "
+  ""
+  "  pads = [0, 2, 0, 0]"
+  ""
+  "  mode = 'reflect'"
+  ""
+  "  output = "
+  "  ["
+  "      ["
+  "          [1.0, 1.2, 1.0, 1.2],"
+  "          [2.3, 3.4, 2.3, 3.4],"
+  "          [4.5, 5.7, 4.5, 5.7],"
+  "      ],"
+  "  ]"
+  ""
+  ""
+  "Example 3 (`edge` mode):"
+  "  data = "
+  "  ["
+  "      [1.0, 1.2],"
+  "      [2.3, 3.4],"
+  "      [4.5, 5.7],"
+  "  ] "
+  ""
+  "  pads = [0, 2, 0, 0]"
+  ""
+  "  mode = 'edge'"
+  ""
+  "  output = "
+  "  ["
+  "      ["
+  "          [1.0, 1.0, 1.0, 1.2],"
+  "          [2.3, 2.3, 2.3, 3.4],"
+  "          [4.5, 4.5, 4.5, 5.7],"
+  "      ],"
+  "  ]"
+  ""
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$pads,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$constant_value,
-           DefaultValuedAttr<StrAttr, "constant">:$mode);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$pads,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$constant_value,
+    DefaultValuedAttr<StrAttr, "constant">:$mode);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXPowOp:ONNX_Op<"Pow", 
-    [NoSideEffect]> {
+def ONNXPowOp:ONNX_Op<"Pow",
+  [NoSideEffect]> {
   let summary = "ONNX Pow operation";
   let description = [{
-    "Pow takes input data (Tensor<T>) and exponent Tensor, and"
-    "produces one output data (Tensor<T>) where the function `f(x) = x^exponent`,"
-    "is applied to the data tensor elementwise."
-    "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Pow takes input data (Tensor<T>) and exponent Tensor, and"
+  "produces one output data (Tensor<T>) where the function `f(x) = x^exponent`,"
+  "is applied to the data tensor elementwise."
+  "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Z);
 }
 
-def ONNXQLinearConvOp:ONNX_Op<"QLinearConv", 
-    [NoSideEffect]> {
+def ONNXQLinearConvOp:ONNX_Op<"QLinearConv",
+  [NoSideEffect]> {
   let summary = "ONNX QLinearConv operation";
   let description = [{
-    "The convolution operator consumes a quantized input tensor, its scale and zero point,"
-    "a quantized filter, its scale and zero point, and output's scale and zero point,"
-    "and computes the quantized output. Each scale and zero-point pair must have same shape."
-    "It means they must be either scalars (per tensor) or 1-D tensors (per output channel)."
-    "Each input or output and its related zero point must have same type."
+  "The convolution operator consumes a quantized input tensor, its scale and zero point,"
+  "a quantized filter, its scale and zero point, and output's scale and zero point,"
+  "and computes the quantized output. Each scale and zero-point pair must have same shape."
+  "It means they must be either scalars (per tensor) or 1-D tensors (per output channel)."
+  "Each input or output and its related zero point must have same type."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$x,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$x_scale,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$x_zero_point,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$w,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$w_scale,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$w_zero_point,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$y_scale,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$y_zero_point,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
-           DefaultValuedAttr<StrAttr, "NOTSET">:$auto_pad,
-           OptionalAttr<I64ArrayAttr>:$dilations,
-           DefaultValuedAttr<I64Attr, "1">:$group,
-           OptionalAttr<I64ArrayAttr>:$kernel_shape,
-           OptionalAttr<I64ArrayAttr>:$pads,
-           OptionalAttr<I64ArrayAttr>:$strides);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$x_scale,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$x_zero_point,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$w,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$w_scale,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$w_zero_point,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$y_scale,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$y_zero_point,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$B,
+    DefaultValuedAttr<StrAttr, "NOTSET">:$auto_pad,
+    OptionalAttr<I64ArrayAttr>:$dilations,
+    DefaultValuedAttr<I64Attr, "1">:$group,
+    OptionalAttr<I64ArrayAttr>:$kernel_shape,
+    OptionalAttr<I64ArrayAttr>:$pads,
+    OptionalAttr<I64ArrayAttr>:$strides);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$y);
 }
 
-def ONNXQLinearMatMulOp:ONNX_Op<"QLinearMatMul", 
-    [NoSideEffect]> {
+def ONNXQLinearMatMulOp:ONNX_Op<"QLinearMatMul",
+  [NoSideEffect]> {
   let summary = "ONNX QLinearMatMul operation";
   let description = [{
-    "Matrix product that behaves like numpy.matmul: https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.matmul.html."
-    "It consumes two quantized input tensors, their scales and zero points, scale and zero point of output, and computes the quantized output."
-    "The quantization formula is y = saturate((x / y_scale) + y_zero_point). For (x / y_scale), it is rounding to nearest ties to even."
-    "Refer to https://en.wikipedia.org/wiki/Rounding for details. Scale and zero point must have same shape."
-    "They must be either scalar (per tensor) or 1-D tensor (per row for 'a' and per column for 'b'). If scale and zero point are 1-D tensor,"
-    "the number of elements of scale and zero point tensor of input 'a' and output 'y' should be equal to the number of rows of input 'a',"
-    "and the number of elements of scale and zero point tensor of input 'b' should be equal to the number of columns of input 'b'."
-    "Production must never overflow, and accumulation may overflow if and only if in 32 bits."
+  "Matrix product that behaves like numpy.matmul: https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.matmul.html."
+  "It consumes two quantized input tensors, their scales and zero points, scale and zero point of output, and computes the quantized output."
+  "The quantization formula is y = saturate((x / y_scale) + y_zero_point). For (x / y_scale), it is rounding to nearest ties to even."
+  "Refer to https://en.wikipedia.org/wiki/Rounding for details. Scale and zero point must have same shape."
+  "They must be either scalar (per tensor) or 1-D tensor (per row for 'a' and per column for 'b'). If scale and zero point are 1-D tensor,"
+  "the number of elements of scale and zero point tensor of input 'a' and output 'y' should be equal to the number of rows of input 'a',"
+  "and the number of elements of scale and zero point tensor of input 'b' should be equal to the number of columns of input 'b'."
+  "Production must never overflow, and accumulation may overflow if and only if in 32 bits."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$a,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$a_scale,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$a_zero_point,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$b,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$b_scale,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$b_zero_point,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$y_scale,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$y_zero_point);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$a_scale,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$a_zero_point,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$b,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$b_scale,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$b_zero_point,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$y_scale,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$y_zero_point);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$y);
 }
 
-def ONNXQuantizeLinearOp:ONNX_Op<"QuantizeLinear", 
-    [NoSideEffect]> {
+def ONNXQuantizeLinearOp:ONNX_Op<"QuantizeLinear",
+  [NoSideEffect]> {
   let summary = "ONNX QuantizeLinear operation";
   let description = [{
-    "The linear per-tensor/layer quantization operator. It consumes a high precision tensor, a scale, a zero point to compute the low precision / quantized tensor."
-    "The quantization formula is y = saturate ((x / y_scale) + y_zero_point). For saturation, it saturates to [0, 255] if it's uint8, or [-128, 127] if it's int8."
-    "For (x / y_scale), it's rounding to nearest ties to even. Refer to https://en.wikipedia.org/wiki/Rounding for details. 'y_zero_point' and 'y' must have same type."
+  "The linear per-tensor/layer quantization operator. It consumes a high precision tensor, a scale, a zero point to compute the low precision / quantized tensor."
+  "The quantization formula is y = saturate ((x / y_scale) + y_zero_point). For saturation, it saturates to [0, 255] if it's uint8, or [-128, 127] if it's int8."
+  "For (x / y_scale), it's rounding to nearest ties to even. Refer to https://en.wikipedia.org/wiki/Rounding for details. 'y_zero_point' and 'y' must have same type."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$x,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$y_scale,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$y_zero_point);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$y_scale,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$y_zero_point);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$y);
 }
 
-def ONNXRNNOp:ONNX_Op<"RNN", 
-    [NoSideEffect]> {
+def ONNXRNNOp:ONNX_Op<"RNN",
+  [NoSideEffect]> {
   let summary = "ONNX RNN operation";
   let description = [{
-    "Computes an one-layer simple RNN. This operator is usually supported"
-    "via some custom implementation such as CuDNN."
-    ""
-    "Notations:"
-    ""
-    "`X` - input tensor"
-    ""
-    "`i` - input gate"
-    ""
-    "`t` - time step (t-1 means previous time step)"
-    ""
-    "`Wi` - W parameter weight matrix for input gate"
-    ""
-    "`Ri` - R recurrence weight matrix for input gate"
-    ""
-    "`Wbi` - W parameter bias vector for input gate"
-    ""
-    "`Rbi` - R parameter bias vector for input gate"
-    ""
-    "`WBi` - W parameter weight matrix for backward input gate"
-    ""
-    "`RBi` - R recurrence weight matrix for backward input gate"
-    ""
-    "`WBbi` - WR bias vectors for backward input gate"
-    ""
-    "`RBbi` - RR bias vectors for backward input gate"
-    ""
-    "`H` - Hidden state"
-    ""
-    "`num_directions` - 2 if direction == bidirectional else 1"
-    ""
-    "Activation functions:"
-    ""
-    "  Relu(x)                - max(0, x)"
-    ""
-    "  Tanh(x)                - (1 - e^{-2x})/(1 + e^{-2x})"
-    ""
-    "  Sigmoid(x)             - 1/(1 + e^{-x})"
-    ""
-    "  (NOTE: Below are optional)"
-    ""
-    "  Affine(x)              - alpha*x + beta"
-    ""
-    "  LeakyRelu(x)           - x if x >= 0 else alpha * x"
-    ""
-    "  ThresholdedRelu(x)     - x if x >= alpha else 0"
-    ""
-    "  ScaledTanh(x)          - alpha*Tanh(beta*x)"
-    ""
-    "  HardSigmoid(x)         - min(max(alpha*x + beta, 0), 1)"
-    ""
-    "  Elu(x)                 - x if x >= 0 else alpha*(e^x - 1)"
-    ""
-    "  Softsign(x)            - x/(1 + |x|)"
-    ""
-    "  Softplus(x)            - log(1 + e^x)"
-    ""
-    "Equations (Default: f=Tanh):"
-    ""
-    "  - Ht = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Wbi + Rbi)"
-    "This operator has **optional** inputs/outputs. See [the doc](IR.md) for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted."
+  "Computes an one-layer simple RNN. This operator is usually supported"
+  "via some custom implementation such as CuDNN."
+  ""
+  "Notations:"
+  ""
+  "`X` - input tensor"
+  ""
+  "`i` - input gate"
+  ""
+  "`t` - time step (t-1 means previous time step)"
+  ""
+  "`Wi` - W parameter weight matrix for input gate"
+  ""
+  "`Ri` - R recurrence weight matrix for input gate"
+  ""
+  "`Wbi` - W parameter bias vector for input gate"
+  ""
+  "`Rbi` - R parameter bias vector for input gate"
+  ""
+  "`WBi` - W parameter weight matrix for backward input gate"
+  ""
+  "`RBi` - R recurrence weight matrix for backward input gate"
+  ""
+  "`WBbi` - WR bias vectors for backward input gate"
+  ""
+  "`RBbi` - RR bias vectors for backward input gate"
+  ""
+  "`H` - Hidden state"
+  ""
+  "`num_directions` - 2 if direction == bidirectional else 1"
+  ""
+  "Activation functions:"
+  ""
+  "  Relu(x)                - max(0, x)"
+  ""
+  "  Tanh(x)                - (1 - e^{-2x})/(1 + e^{-2x})"
+  ""
+  "  Sigmoid(x)             - 1/(1 + e^{-x})"
+  ""
+  "  (NOTE: Below are optional)"
+  ""
+  "  Affine(x)              - alpha*x + beta"
+  ""
+  "  LeakyRelu(x)           - x if x >= 0 else alpha * x"
+  ""
+  "  ThresholdedRelu(x)     - x if x >= alpha else 0"
+  ""
+  "  ScaledTanh(x)          - alpha*Tanh(beta*x)"
+  ""
+  "  HardSigmoid(x)         - min(max(alpha*x + beta, 0), 1)"
+  ""
+  "  Elu(x)                 - x if x >= 0 else alpha*(e^x - 1)"
+  ""
+  "  Softsign(x)            - x/(1 + |x|)"
+  ""
+  "  Softplus(x)            - log(1 + e^x)"
+  ""
+  "Equations (Default: f=Tanh):"
+  ""
+  "  - Ht = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Wbi + Rbi)"
+  "This operator has **optional** inputs/outputs. See [the doc](IR.md) for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$W,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$R,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$sequence_lens,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$initial_h,
-           OptionalAttr<F32ArrayAttr>:$activation_alpha,
-           OptionalAttr<F32ArrayAttr>:$activation_beta,
-           DefaultValuedAttr<StrArrayAttr, "{\"Tanh\", \"Tanh\"}">:$activations,
-           OptionalAttr<F32Attr>:$clip,
-           DefaultValuedAttr<StrAttr, "forward">:$direction,
-           OptionalAttr<I64Attr>:$hidden_size);
-  let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y_h);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$W,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$R,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$B,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$sequence_lens,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$initial_h,
+    OptionalAttr<F32ArrayAttr>:$activation_alpha,
+    OptionalAttr<F32ArrayAttr>:$activation_beta,
+    DefaultValuedAttr<StrArrayAttr, "{\"Tanh\", \"Tanh\"}">:$activations,
+    OptionalAttr<F32Attr>:$clip,
+    DefaultValuedAttr<StrAttr, "forward">:$direction,
+    OptionalAttr<I64Attr>:$hidden_size);
+  let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$Y,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$Y_h);
 }
 
-def ONNXRandomNormalOp:ONNX_Op<"RandomNormal", 
-    [NoSideEffect]> {
+def ONNXRandomNormalOp:ONNX_Op<"RandomNormal",
+  [NoSideEffect]> {
   let summary = "ONNX RandomNormal operation";
   let description = [{
-    "Generate a tensor with random values drawn from a normal distribution. The shape"
-    "of the tensor is specified by the `shape` argument and the parameter of the normal distribution"
-    "specified by `mean` and `scale`."
-    ""
-    "The data type is specified by the 'dtype' argument. The 'dtype' argument must"
-    "be one of the data types specified in the 'DataType' enum field in the"
-    "TensorProto message."
+  "Generate a tensor with random values drawn from a normal distribution. The shape"
+  "of the tensor is specified by the `shape` argument and the parameter of the normal distribution"
+  "specified by `mean` and `scale`."
+  ""
+  "The data type is specified by the 'dtype' argument. The 'dtype' argument must"
+  "be one of the data types specified in the 'DataType' enum field in the"
+  "TensorProto message."
   }];
   let arguments = (ins DefaultValuedAttr<I64Attr, "1">:$dtype,
-           DefaultValuedAttr<F32Attr, "0.0">:$mean,
-           DefaultValuedAttr<F32Attr, "1.0">:$scale,
-           OptionalAttr<F32Attr>:$seed,
-           I64ArrayAttr:$shape);
+    DefaultValuedAttr<F32Attr, "0.0">:$mean,
+    DefaultValuedAttr<F32Attr, "1.0">:$scale,
+    OptionalAttr<F32Attr>:$seed,
+    I64ArrayAttr:$shape);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXRandomNormalLikeOp:ONNX_Op<"RandomNormalLike", 
-    [NoSideEffect]> {
+def ONNXRandomNormalLikeOp:ONNX_Op<"RandomNormalLike",
+  [NoSideEffect]> {
   let summary = "ONNX RandomNormalLike operation";
   let description = [{
-    "Generate a tensor with random values drawn from a normal distribution."
-    "The shape of the output tensor is copied from the shape of the input tensor,"
-    "and the parameters of the normal distribution are specified by `mean` and `scale`."
-    ""
-    "The data type is specified by the 'dtype' argument, or copied from the input tensor if not provided."
-    "The 'dtype' argument must be one of the data types specified in the 'DataType' enum field in the"
-    "TensorProto message, and be valid as an output type."
+  "Generate a tensor with random values drawn from a normal distribution."
+  "The shape of the output tensor is copied from the shape of the input tensor,"
+  "and the parameters of the normal distribution are specified by `mean` and `scale`."
+  ""
+  "The data type is specified by the 'dtype' argument, or copied from the input tensor if not provided."
+  "The 'dtype' argument must be one of the data types specified in the 'DataType' enum field in the"
+  "TensorProto message, and be valid as an output type."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           OptionalAttr<I64Attr>:$dtype,
-           DefaultValuedAttr<F32Attr, "0.0">:$mean,
-           DefaultValuedAttr<F32Attr, "1.0">:$scale,
-           OptionalAttr<F32Attr>:$seed);
+    OptionalAttr<I64Attr>:$dtype,
+    DefaultValuedAttr<F32Attr, "0.0">:$mean,
+    DefaultValuedAttr<F32Attr, "1.0">:$scale,
+    OptionalAttr<F32Attr>:$seed);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXRandomUniformOp:ONNX_Op<"RandomUniform", 
-    [NoSideEffect]> {
+def ONNXRandomUniformOp:ONNX_Op<"RandomUniform",
+  [NoSideEffect]> {
   let summary = "ONNX RandomUniform operation";
   let description = [{
-    "Generate a tensor with random values drawn from a uniform distribution. The shape"
-    "of the tensor is specified by the `shape` argument and the range by `low` and `high`."
-    ""
-    "The data type is specified by the 'dtype' argument. The 'dtype' argument must"
-    "be one of the data types specified in the 'DataType' enum field in the"
-    "TensorProto message."
+  "Generate a tensor with random values drawn from a uniform distribution. The shape"
+  "of the tensor is specified by the `shape` argument and the range by `low` and `high`."
+  ""
+  "The data type is specified by the 'dtype' argument. The 'dtype' argument must"
+  "be one of the data types specified in the 'DataType' enum field in the"
+  "TensorProto message."
   }];
   let arguments = (ins DefaultValuedAttr<I64Attr, "1">:$dtype,
-           DefaultValuedAttr<F32Attr, "1.0">:$high,
-           DefaultValuedAttr<F32Attr, "0.0">:$low,
-           OptionalAttr<F32Attr>:$seed,
-           I64ArrayAttr:$shape);
+    DefaultValuedAttr<F32Attr, "1.0">:$high,
+    DefaultValuedAttr<F32Attr, "0.0">:$low,
+    OptionalAttr<F32Attr>:$seed,
+    I64ArrayAttr:$shape);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXRandomUniformLikeOp:ONNX_Op<"RandomUniformLike", 
-    [NoSideEffect]> {
+def ONNXRandomUniformLikeOp:ONNX_Op<"RandomUniformLike",
+  [NoSideEffect]> {
   let summary = "ONNX RandomUniformLike operation";
   let description = [{
-    "Generate a tensor with random values drawn from a uniform distribution."
-    "The shape of the output tensor is copied from the shape of the input tensor,"
-    "and the parameters of the uniform distribution are specified by `low` and `high`."
-    ""
-    "The data type is specified by the 'dtype' argument, or copied from the input tensor if not provided."
-    "The 'dtype' argument must be one of the data types specified in the 'DataType' enum field in the"
-    "TensorProto message and be valid as an output type."
+  "Generate a tensor with random values drawn from a uniform distribution."
+  "The shape of the output tensor is copied from the shape of the input tensor,"
+  "and the parameters of the uniform distribution are specified by `low` and `high`."
+  ""
+  "The data type is specified by the 'dtype' argument, or copied from the input tensor if not provided."
+  "The 'dtype' argument must be one of the data types specified in the 'DataType' enum field in the"
+  "TensorProto message and be valid as an output type."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           OptionalAttr<I64Attr>:$dtype,
-           DefaultValuedAttr<F32Attr, "1.0">:$high,
-           DefaultValuedAttr<F32Attr, "0.0">:$low,
-           OptionalAttr<F32Attr>:$seed);
+    OptionalAttr<I64Attr>:$dtype,
+    DefaultValuedAttr<F32Attr, "1.0">:$high,
+    DefaultValuedAttr<F32Attr, "0.0">:$low,
+    OptionalAttr<F32Attr>:$seed);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXRangeOp:ONNX_Op<"Range", 
-    [NoSideEffect]> {
+def ONNXRangeOp:ONNX_Op<"Range",
+  [NoSideEffect]> {
   let summary = "ONNX Range operation";
   let description = [{
-    "Generate a tensor containing a sequence of numbers that begin at `start` and extends by increments of `delta` "
-    "up to `limit` (exclusive)."
-    ""
-    "The number of elements in the output of range is computed as below-"
-    ""
-    "`number_of_elements = max( ceil( (limit - start) / delta ) , 0 )`"
-    ""
-    "The pseudocode determining the contents of the output is shown below-"
-    ""
-    "`for(int i=0; i<number_of_elements; ++i)`"
-    ""
-    "`{`"
-    "   "
-    "`    output[i] =  start + (i * delta);  ` "
-    ""
-    "`}`	"
-    ""
-    "`Example 1`"
-    "Inputs: start = 3, limit = 9, delta = 3"
-    "Output: [3, 6]"
-    ""
-    "`Example 2`"
-    "Inputs: start = 10, limit = 4, delta = -2"
-    "Output: [10, 8, 6]"
-    ""
+  "Generate a tensor containing a sequence of numbers that begin at `start` and extends by increments of `delta` "
+  "up to `limit` (exclusive)."
+  ""
+  "The number of elements in the output of range is computed as below-"
+  ""
+  "`number_of_elements = max( ceil( (limit - start) / delta ) , 0 )`"
+  ""
+  "The pseudocode determining the contents of the output is shown below-"
+  ""
+  "`for(int i=0; i<number_of_elements; ++i)`"
+  ""
+  "`{`"
+  "   "
+  "`    output[i] =  start + (i * delta);  ` "
+  ""
+  "`}`	"
+  ""
+  "`Example 1`"
+  "Inputs: start = 3, limit = 9, delta = 3"
+  "Output: [3, 6]"
+  ""
+  "`Example 2`"
+  "Inputs: start = 10, limit = 4, delta = -2"
+  "Output: [10, 8, 6]"
+  ""
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$start,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$limit,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$delta);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$limit,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$delta);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXReciprocalOp:ONNX_Op<"Reciprocal", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXReciprocalOp:ONNX_Op<"Reciprocal",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Reciprocal operation";
   let description = [{
-    "Reciprocal takes one input data (Tensor<T>) and produces one output data"
-    "(Tensor<T>) where the reciprocal is, y = 1/x, is applied to"
-    "the tensor elementwise."
+  "Reciprocal takes one input data (Tensor<T>) and produces one output data"
+  "(Tensor<T>) where the reciprocal is, y = 1/x, is applied to"
+  "the tensor elementwise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXReduceL1Op:ONNX_Op<"ReduceL1", 
-    [NoSideEffect]> {
+def ONNXReduceL1Op:ONNX_Op<"ReduceL1",
+  [NoSideEffect]> {
   let hasCanonicalizer = 1;
   let summary = "ONNX ReduceL1 operation";
   let description = [{
-    "Computes the L1 norm of the input tensor's element along the provided axes. The resulted"
-    "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
-    "the resulted tensor have the reduced dimension pruned."
-    ""
-    "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
-    "False instead of True."
+  "Computes the L1 norm of the input tensor's element along the provided axes. The resulted"
+  "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
+  "the resulted tensor have the reduced dimension pruned."
+  ""
+  "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
+  "False instead of True."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           OptionalAttr<I64ArrayAttr>:$axes,
-           DefaultValuedAttr<I64Attr, "1">:$keepdims);
+    OptionalAttr<I64ArrayAttr>:$axes,
+    DefaultValuedAttr<I64Attr, "1">:$keepdims);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$reduced);
 }
 
-def ONNXReduceL2Op:ONNX_Op<"ReduceL2", 
-    [NoSideEffect]> {
+def ONNXReduceL2Op:ONNX_Op<"ReduceL2",
+  [NoSideEffect]> {
   let hasCanonicalizer = 1;
   let summary = "ONNX ReduceL2 operation";
   let description = [{
-    "Computes the L2 norm of the input tensor's element along the provided axes. The resulted"
-    "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
-    "the resulted tensor have the reduced dimension pruned."
-    ""
-    "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
-    "False instead of True."
+  "Computes the L2 norm of the input tensor's element along the provided axes. The resulted"
+  "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
+  "the resulted tensor have the reduced dimension pruned."
+  ""
+  "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
+  "False instead of True."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           OptionalAttr<I64ArrayAttr>:$axes,
-           DefaultValuedAttr<I64Attr, "1">:$keepdims);
+    OptionalAttr<I64ArrayAttr>:$axes,
+    DefaultValuedAttr<I64Attr, "1">:$keepdims);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$reduced);
 }
 
-def ONNXReduceLogSumOp:ONNX_Op<"ReduceLogSum", 
-    [NoSideEffect]> {
+def ONNXReduceLogSumOp:ONNX_Op<"ReduceLogSum",
+  [NoSideEffect]> {
   let hasCanonicalizer = 1;
   let summary = "ONNX ReduceLogSum operation";
   let description = [{
-    "Computes the log sum of the input tensor's element along the provided axes. The resulted"
-    "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
-    "the resulted tensor have the reduced dimension pruned."
-    ""
-    "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
-    "False instead of True."
+  "Computes the log sum of the input tensor's element along the provided axes. The resulted"
+  "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
+  "the resulted tensor have the reduced dimension pruned."
+  ""
+  "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
+  "False instead of True."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           OptionalAttr<I64ArrayAttr>:$axes,
-           DefaultValuedAttr<I64Attr, "1">:$keepdims);
+    OptionalAttr<I64ArrayAttr>:$axes,
+    DefaultValuedAttr<I64Attr, "1">:$keepdims);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$reduced);
 }
 
-def ONNXReduceLogSumExpOp:ONNX_Op<"ReduceLogSumExp", 
-    [NoSideEffect]> {
+def ONNXReduceLogSumExpOp:ONNX_Op<"ReduceLogSumExp",
+  [NoSideEffect]> {
   let hasCanonicalizer = 1;
   let summary = "ONNX ReduceLogSumExp operation";
   let description = [{
-    "Computes the log sum exponent of the input tensor's element along the provided axes. The resulted"
-    "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
-    "the resulted tensor have the reduced dimension pruned."
-    ""
-    "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
-    "False instead of True."
+  "Computes the log sum exponent of the input tensor's element along the provided axes. The resulted"
+  "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
+  "the resulted tensor have the reduced dimension pruned."
+  ""
+  "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
+  "False instead of True."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           OptionalAttr<I64ArrayAttr>:$axes,
-           DefaultValuedAttr<I64Attr, "1">:$keepdims);
+    OptionalAttr<I64ArrayAttr>:$axes,
+    DefaultValuedAttr<I64Attr, "1">:$keepdims);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$reduced);
 }
 
-def ONNXReduceMaxOp:ONNX_Op<"ReduceMax", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXReduceMaxOp:ONNX_Op<"ReduceMax",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX ReduceMax operation";
   let description = [{
-    "Computes the max of the input tensor's element along the provided axes. The resulted"
-    "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
-    "the resulted tensor have the reduced dimension pruned."
-    ""
-    "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
-    "False instead of True."
+  "Computes the max of the input tensor's element along the provided axes. The resulted"
+  "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
+  "the resulted tensor have the reduced dimension pruned."
+  ""
+  "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
+  "False instead of True."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           OptionalAttr<I64ArrayAttr>:$axes,
-           DefaultValuedAttr<I64Attr, "1">:$keepdims);
+    OptionalAttr<I64ArrayAttr>:$axes,
+    DefaultValuedAttr<I64Attr, "1">:$keepdims);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$reduced);
 }
 
-def ONNXReduceMeanOp:ONNX_Op<"ReduceMean", 
-    [NoSideEffect]> {
+def ONNXReduceMeanOp:ONNX_Op<"ReduceMean",
+  [NoSideEffect]> {
   let summary = "ONNX ReduceMean operation";
   let description = [{
-    "Computes the mean of the input tensor's element along the provided axes. The resulted"
-    "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
-    "the resulted tensor have the reduced dimension pruned."
-    ""
-    "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
-    "False instead of True."
+  "Computes the mean of the input tensor's element along the provided axes. The resulted"
+  "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
+  "the resulted tensor have the reduced dimension pruned."
+  ""
+  "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
+  "False instead of True."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           OptionalAttr<I64ArrayAttr>:$axes,
-           DefaultValuedAttr<I64Attr, "1">:$keepdims);
+    OptionalAttr<I64ArrayAttr>:$axes,
+    DefaultValuedAttr<I64Attr, "1">:$keepdims);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$reduced);
 }
 
-def ONNXReduceMinOp:ONNX_Op<"ReduceMin", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXReduceMinOp:ONNX_Op<"ReduceMin",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX ReduceMin operation";
   let description = [{
-    "Computes the min of the input tensor's element along the provided axes. The resulted"
-    "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
-    "the resulted tensor have the reduced dimension pruned."
-    ""
-    "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
-    "False instead of True."
+  "Computes the min of the input tensor's element along the provided axes. The resulted"
+  "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
+  "the resulted tensor have the reduced dimension pruned."
+  ""
+  "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
+  "False instead of True."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           OptionalAttr<I64ArrayAttr>:$axes,
-           DefaultValuedAttr<I64Attr, "1">:$keepdims);
+    OptionalAttr<I64ArrayAttr>:$axes,
+    DefaultValuedAttr<I64Attr, "1">:$keepdims);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$reduced);
 }
 
-def ONNXReduceProdOp:ONNX_Op<"ReduceProd", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXReduceProdOp:ONNX_Op<"ReduceProd",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX ReduceProd operation";
   let description = [{
-    "Computes the product of the input tensor's element along the provided axes. The resulted"
-    "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
-    "the resulted tensor have the reduced dimension pruned."
-    ""
-    "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
-    "False instead of True."
+  "Computes the product of the input tensor's element along the provided axes. The resulted"
+  "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
+  "the resulted tensor have the reduced dimension pruned."
+  ""
+  "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
+  "False instead of True."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           OptionalAttr<I64ArrayAttr>:$axes,
-           DefaultValuedAttr<I64Attr, "1">:$keepdims);
+    OptionalAttr<I64ArrayAttr>:$axes,
+    DefaultValuedAttr<I64Attr, "1">:$keepdims);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$reduced);
 }
 
-def ONNXReduceSumOp:ONNX_Op<"ReduceSum", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXReduceSumOp:ONNX_Op<"ReduceSum",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX ReduceSum operation";
   let description = [{
-    "Computes the sum of the input tensor's element along the provided axes. The resulted"
-    "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
-    "the resulted tensor have the reduced dimension pruned."
-    ""
-    "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
-    "False instead of True."
+  "Computes the sum of the input tensor's element along the provided axes. The resulted"
+  "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
+  "the resulted tensor have the reduced dimension pruned."
+  ""
+  "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
+  "False instead of True."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           OptionalAttr<I64ArrayAttr>:$axes,
-           DefaultValuedAttr<I64Attr, "1">:$keepdims);
+    OptionalAttr<I64ArrayAttr>:$axes,
+    DefaultValuedAttr<I64Attr, "1">:$keepdims);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$reduced);
   let builders = [
     OpBuilder<"Builder *builder, OperationState &state, Value data, ArrayAttr axes, IntegerAttr keepdims", [{
@@ -2478,24 +2460,24 @@ def ONNXReduceSumOp:ONNX_Op<"ReduceSum",
       outputTypes.emplace_back(UnrankedTensorType::get(elementType));
       build(builder, state, outputTypes, operands, attributes);
     }]>
-  ];
+    ];
 }
 
-def ONNXReduceSumSquareOp:ONNX_Op<"ReduceSumSquare", 
-    [NoSideEffect]> {
+def ONNXReduceSumSquareOp:ONNX_Op<"ReduceSumSquare",
+  [NoSideEffect]> {
   let hasCanonicalizer = 1;
   let summary = "ONNX ReduceSumSquare operation";
   let description = [{
-    "Computes the sum square of the input tensor's element along the provided axes. The resulted"
-    "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
-    "the resulted tensor have the reduced dimension pruned."
-    ""
-    "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
-    "False instead of True."
+  "Computes the sum square of the input tensor's element along the provided axes. The resulted"
+  "tensor has the same rank as the input if keepdims equal 1. If keepdims equal 0, then"
+  "the resulted tensor have the reduced dimension pruned."
+  ""
+  "The above behavior is similar to numpy, with the exception that numpy default keepdims to"
+  "False instead of True."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           OptionalAttr<I64ArrayAttr>:$axes,
-           DefaultValuedAttr<I64Attr, "1">:$keepdims);
+    OptionalAttr<I64ArrayAttr>:$axes,
+    DefaultValuedAttr<I64Attr, "1">:$keepdims);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$reduced);
   let builders = [
     OpBuilder<"Builder *builder, OperationState &state, Value data, ArrayAttr axes, IntegerAttr keepdims", [{
@@ -2508,1147 +2490,1148 @@ def ONNXReduceSumSquareOp:ONNX_Op<"ReduceSumSquare",
       outputTypes.emplace_back(UnrankedTensorType::get(elementType));
       build(builder, state, outputTypes, operands, attributes);
     }]>
-  ];
+    ];
 }
 
-def ONNXReluOp:ONNX_Op<"Relu", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXReluOp:ONNX_Op<"Relu",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Relu operation";
   let description = [{
-    "Relu takes one input data (Tensor<T>) and produces one output data"
-    "(Tensor<T>) where the rectified linear function, y = max(0, x), is applied to"
-    "the tensor elementwise."
+  "Relu takes one input data (Tensor<T>) and produces one output data"
+  "(Tensor<T>) where the rectified linear function, y = max(0, x), is applied to"
+  "the tensor elementwise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXReshapeOp:ONNX_Op<"Reshape", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXReshapeOp:ONNX_Op<"Reshape",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Reshape operation";
   let description = [{
-    "Reshape the input tensor similar to numpy.reshape."
-    "First input is the data tensor, second input is a shape tensor which specifies the output shape. It outputs the reshaped tensor."
-    "At most one dimension of the new shape can be -1. In this case, the value is"
-    "inferred from the size of the tensor and the remaining dimensions. A dimension"
-    "could also be 0, in which case the actual dimension value is unchanged (i.e. taken"
-    "from the input tensor)."
+  "Reshape the input tensor similar to numpy.reshape."
+  "First input is the data tensor, second input is a shape tensor which specifies the output shape. It outputs the reshaped tensor."
+  "At most one dimension of the new shape can be -1. In this case, the value is"
+  "inferred from the size of the tensor and the remaining dimensions. A dimension"
+  "could also be 0, in which case the actual dimension value is unchanged (i.e. taken"
+  "from the input tensor)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$shape);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$shape);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$reshaped);
 }
 
-def ONNXResizeOp:ONNX_Op<"Resize", 
-    [NoSideEffect]> {
+def ONNXResizeOp:ONNX_Op<"Resize",
+  [NoSideEffect]> {
   let summary = "ONNX Resize operation";
   let description = [{
-    "Resize the input tensor. In general, it calculates every value in the output tensor as a weighted average of neighborhood (a.k.a. sampling locations) in the input tensor."
-    "Each dimension value of the output tensor is:"
-    "  output_dimension = floor(input_dimension * (roi_end - roi_start) * scale) if input \"sizes\" is not specified."
+  "Resize the input tensor. In general, it calculates every value in the output tensor as a weighted average of neighborhood (a.k.a. sampling locations) in the input tensor."
+  "Each dimension value of the output tensor is:"
+  "  output_dimension = floor(input_dimension * (roi_end - roi_start) * scale) if input \\"sizes\\" is not specified."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$roi,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$scales,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$sizes,
-           DefaultValuedAttr<StrAttr, "half_pixel">:$coordinate_transformation_mode,
-           DefaultValuedAttr<F32Attr, "-0.75">:$cubic_coeff_a,
-           DefaultValuedAttr<I64Attr, "0">:$exclude_outside,
-           DefaultValuedAttr<F32Attr, "0.0">:$extrapolation_value,
-           DefaultValuedAttr<StrAttr, "nearest">:$mode,
-           DefaultValuedAttr<StrAttr, "round_prefer_floor">:$nearest_mode);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$roi,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$scales,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$sizes,
+    DefaultValuedAttr<StrAttr, "half_pixel">:$coordinate_transformation_mode,
+    DefaultValuedAttr<F32Attr, "-0.75">:$cubic_coeff_a,
+    DefaultValuedAttr<I64Attr, "0">:$exclude_outside,
+    DefaultValuedAttr<F32Attr, "0.0">:$extrapolation_value,
+    DefaultValuedAttr<StrAttr, "nearest">:$mode,
+    DefaultValuedAttr<StrAttr, "round_prefer_floor">:$nearest_mode);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXReverseSequenceOp:ONNX_Op<"ReverseSequence", 
-    [NoSideEffect]> {
+def ONNXReverseSequenceOp:ONNX_Op<"ReverseSequence",
+  [NoSideEffect]> {
   let summary = "ONNX ReverseSequence operation";
   let description = [{
-    "Reverse batch of sequences having different lengths specified by `sequence_lens`."
-    ""
-    "For each slice i iterating on batch axis, the operator reverses the first sequence_lens[i] elements on time axis,"
-    "and copies elements whose index's beyond sequence_lens[i] to the output. So the output slice i contains reversed"
-    "sequences on the first sequence_lens[i] elements, then have original values copied for the other elements."
-    ""
-    "Example 1:"
-    "  input = [[0.0, 4.0, 8.0,  12.0],"
-    "           [1.0, 5.0, 9.0,  13.0],"
-    "           [2.0, 6.0, 10.0, 14.0],"
-    "           [3.0, 7.0, 11.0, 15.0]]"
-    "  sequence_lens = [4, 3, 2, 1]"
-    "  time_axis = 0"
-    "  batch_axis = 1"
-    ""
-    "  output = [[3.0, 6.0, 9.0,  12.0],"
-    "            [2.0, 5.0, 8.0,  13.0],"
-    "            [1.0, 4.0, 10.0, 14.0],"
-    "            [0.0, 7.0, 11.0, 15.0]]"
-    ""
-    "Example 2:"
-    "  input = [[0.0,  1.0,  2.0,  3.0 ],"
-    "           [4.0,  5.0,  6.0,  7.0 ],"
-    "           [8.0,  9.0,  10.0, 11.0],"
-    "           [12.0, 13.0, 14.0, 15.0]]"
-    "  sequence_lens = [1, 2, 3, 4]"
-    "  time_axis = 1"
-    "  batch_axis = 0"
-    ""
-    "  output = [[0.0,  1.0,  2.0,  3.0 ],"
-    "            [5.0,  4.0,  6.0,  7.0 ],"
-    "            [10.0, 9.0,  8.0,  11.0],"
-    "            [15.0, 14.0, 13.0, 12.0]]"
+  "Reverse batch of sequences having different lengths specified by `sequence_lens`."
+  ""
+  "For each slice i iterating on batch axis, the operator reverses the first sequence_lens[i] elements on time axis,"
+  "and copies elements whose index's beyond sequence_lens[i] to the output. So the output slice i contains reversed"
+  "sequences on the first sequence_lens[i] elements, then have original values copied for the other elements."
+  ""
+  "Example 1:"
+  "  input = [[0.0, 4.0, 8.0,  12.0],"
+  "           [1.0, 5.0, 9.0,  13.0],"
+  "           [2.0, 6.0, 10.0, 14.0],"
+  "           [3.0, 7.0, 11.0, 15.0]]"
+  "  sequence_lens = [4, 3, 2, 1]"
+  "  time_axis = 0"
+  "  batch_axis = 1"
+  ""
+  "  output = [[3.0, 6.0, 9.0,  12.0],"
+  "            [2.0, 5.0, 8.0,  13.0],"
+  "            [1.0, 4.0, 10.0, 14.0],"
+  "            [0.0, 7.0, 11.0, 15.0]]"
+  ""
+  "Example 2:"
+  "  input = [[0.0,  1.0,  2.0,  3.0 ],"
+  "           [4.0,  5.0,  6.0,  7.0 ],"
+  "           [8.0,  9.0,  10.0, 11.0],"
+  "           [12.0, 13.0, 14.0, 15.0]]"
+  "  sequence_lens = [1, 2, 3, 4]"
+  "  time_axis = 1"
+  "  batch_axis = 0"
+  ""
+  "  output = [[0.0,  1.0,  2.0,  3.0 ],"
+  "            [5.0,  4.0,  6.0,  7.0 ],"
+  "            [10.0, 9.0,  8.0,  11.0],"
+  "            [15.0, 14.0, 13.0, 12.0]]"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$sequence_lens,
-           DefaultValuedAttr<I64Attr, "1">:$batch_axis,
-           DefaultValuedAttr<I64Attr, "0">:$time_axis);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$sequence_lens,
+    DefaultValuedAttr<I64Attr, "1">:$batch_axis,
+    DefaultValuedAttr<I64Attr, "0">:$time_axis);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXRoiAlignOp:ONNX_Op<"RoiAlign", 
-    [NoSideEffect]> {
+def ONNXRoiAlignOp:ONNX_Op<"RoiAlign",
+  [NoSideEffect]> {
   let summary = "ONNX RoiAlign operation";
   let description = [{
-    "Region of Interest (RoI) align operation described in the"
-    "[Mask R-CNN paper](https://arxiv.org/abs/1703.06870)."
-    "RoiAlign consumes an input tensor X and region of interests (rois)"
-    "to apply pooling across each RoI; it produces a 4-D tensor of shape"
-    "(num_rois, C, output_height, output_width)."
-    ""
-    "RoiAlign is proposed to avoid the misalignment by removing"
-    "quantizations while converting from original image into feature"
-    "map and from feature map into RoI feature; in each ROI bin,"
-    "the value of the sampled locations are computed directly"
-    "through bilinear interpolation."
+  "Region of Interest (RoI) align operation described in the"
+  "[Mask R-CNN paper](https://arxiv.org/abs/1703.06870)."
+  "RoiAlign consumes an input tensor X and region of interests (rois)"
+  "to apply pooling across each RoI; it produces a 4-D tensor of shape"
+  "(num_rois, C, output_height, output_width)."
+  ""
+  "RoiAlign is proposed to avoid the misalignment by removing"
+  "quantizations while converting from original image into feature"
+  "map and from feature map into RoI feature; in each ROI bin,"
+  "the value of the sampled locations are computed directly"
+  "through bilinear interpolation."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$rois,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$batch_indices,
-           DefaultValuedAttr<StrAttr, "avg">:$mode,
-           DefaultValuedAttr<I64Attr, "1">:$output_height,
-           DefaultValuedAttr<I64Attr, "1">:$output_width,
-           DefaultValuedAttr<I64Attr, "0">:$sampling_ratio,
-           DefaultValuedAttr<F32Attr, "1.0">:$spatial_scale);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$rois,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$batch_indices,
+    DefaultValuedAttr<StrAttr, "avg">:$mode,
+    DefaultValuedAttr<I64Attr, "1">:$output_height,
+    DefaultValuedAttr<I64Attr, "1">:$output_width,
+    DefaultValuedAttr<I64Attr, "0">:$sampling_ratio,
+    DefaultValuedAttr<F32Attr, "1.0">:$spatial_scale);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXRoundOp:ONNX_Op<"Round", 
-    [NoSideEffect]> {
+def ONNXRoundOp:ONNX_Op<"Round",
+  [NoSideEffect]> {
   let summary = "ONNX Round operation";
   let description = [{
-    "Round takes one input Tensor and rounds the values, element-wise, meaning"
-    "it finds the nearest integer for each value."
-    "In case of halfs, the rule is to round them to the nearest even integer."
-    "The output tensor has the same shape and type as the input."
-    ""
-    "Examples:"
-    "```"
-    "round([0.9]) = [1.0]"
-    "round([2.5]) = [2.0]"
-    "round([2.3]) = [2.0]"
-    "round([1.5]) = [2.0]"
-    "round([-4.5]) = [-4.0]"
-    "```"
+  "Round takes one input Tensor and rounds the values, element-wise, meaning"
+  "it finds the nearest integer for each value."
+  "In case of halfs, the rule is to round them to the nearest even integer."
+  "The output tensor has the same shape and type as the input."
+  ""
+  "Examples:"
+  "```"
+  "round([0.9]) = [1.0]"
+  "round([2.5]) = [2.0]"
+  "round([2.3]) = [2.0]"
+  "round([1.5]) = [2.0]"
+  "round([-4.5]) = [-4.0]"
+  "```"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXScanOp:ONNX_Op<"Scan", 
-    [NoSideEffect]> {
+def ONNXScanOp:ONNX_Op<"Scan",
+  [NoSideEffect]> {
   let summary = "ONNX Scan operation";
   let description = [{
-    "Scan can be used to iterate over one or more scan_input tensors,"
-    "constructing zero or more scan_output tensors. It combines ideas from general recurrences,"
-    "functional programming constructs such as scan, fold, map, and zip and is intended to enable"
-    "generalizations of RNN-like constructs for sequence-to-sequence processing."
-    "Other tensors (referred to as state_variables here) can be used to carry a state"
-    "when iterating from one element to another (similar to hidden-state in RNNs, also referred"
-    "to as loop-carried dependences in the context of loops)."
-    "Many common usages involve a single scan_input tensor (where functionality"
-    "similar to scan, fold and map can be obtained). When more than one scan_input is used,"
-    "a behavior similar to zip is obtained."
-    ""
-    "The attribute body must be a graph, specifying the computation to be performed in"
-    "every iteration. It takes as input the current values of the state_variables and"
-    "the current iterated element of the scan_inputs. It must return the (updated) values"
-    "of the state_variables and zero or more scan_output_element tensors. The values of the"
-    "scan_output_element tensors are concatenated over all the iterations to produce the"
-    "scan_output values of the scan construct (similar to the concatenated intermediate"
-    "hidden-state values of RNN-like constructs). All the output tensors (state_variables as"
-    "well as scan_output_element tensors) are required to have the same shape in each iteration"
-    "of the loop (a restriction imposed to enable efficient memory allocation)."
-    ""
-    "Note that the iterated element passed to the body subgraph does not have a sequence"
-    "axis. It will have a rank one less than the rank of the corresponding scan_input."
-    ""
-    "The scan operation returns the final values of the state_variables as well as the"
-    "scan_outputs."
-    ""
-    "The optional attribute scan_input_directions specifies the direction (forward or backward)"
-    "for each scan input. If this attribute is omitted, all sequences are scanned in the forward"
-    "direction. A bidirectional scan may be performed by specifying the same tensor input twice"
-    "in the scan_inputs, once with a forward direction, and once with a backward direction."
-    ""
-    "The scan_output of the operation is produced by concatenating the scan_output_element"
-    "values produced by the body in each iteration.  The optional attribute scan_output_directions"
-    "specifies the direction in which scan_output is constructed (by appending or prepending the"
-    "scan_output_element to scan_output in each iteration) for each scan_output. If this attribute"
-    "is omitted, the scan_output_element is appended to the scan_output in each iteration."
-    ""
-    "The optional attribute scan_input_axes specifies the axis to be scanned for each scan_input."
-    "If omitted, every scan_input will be scanned in axis 0. For example, if axis 0 is the"
-    "batch axis and axis 1 is the time axis (to be scanned), specify an axis value of 1."
-    "Note that scanning a non-zero axis may be less efficient than scanning axis zero."
-    ""
-    "The optional attribute scan_output_axes specifies the axis along which the scan_outputs"
-    "are accumulated for each scan_output. For example, if axis 1 is the time axis (to be"
-    "scanned) for both inputs and outputs, specify a scan_input axis and scan_output axis"
-    "value of 1."
-    ""
-    "Note that because of the ONNX restriction that only the last parameter of an operator can"
-    "be variadic, the initial-states and scan-inputs are listed together as one input parameter."
-    "Similarly, the final-states and scan-outputs are listed together as one output parameter."
-    "The attribute num_scan_inputs indicates the number M of scan-inputs."
-    ""
-    "The behavior of"
-    ""
-    "    Scan <"
-    "        num_scan_inputs = m,"
-    "        body = loop-body,"
-    "        scan_input_axes = [axis_1, ..., axis_m]"
-    "    > (init_1, ..., init_n, scan_1, ..., scan_m)"
-    ""
-    "is equivalent to the following pseudo-code:"
-    ""
-    "    // scan_i.shape[axis_i] denotes the (max) sequence-length of scan_i"
-    "    // scan_i.shape[axis_i] is required to be equal to scan_j.shape[axis_j] for all i,j."
-    "    sequence_length = scan_1.shape[axis_1];"
-    ""
-    "    // initialize state-variables"
-    "    st_1 = init_1; ... st_n = init_n;"
-    "    // initialize scan-output variables: [] denotes an empty tensor"
-    "    scan_out_1 = []; ...; scan_out_k = [];"
-    "    // identify number of iterations:"
-    ""
-    "    // execute loop"
-    "    for (int t = 0; t < sequence_length; ++t) {"
-    "        // generate the scan-input elements: the notation T<axis=k>[t] indicates the sub-tensor"
-    "        // of rank one less than T obtained by indexing T at position t along axis k."
-    "        si_1 = scan_1<axis=axis_1>[t];"
-    "        ... ;"
-    "        si_m = scan_m<axis=axis_m>[t];"
-    "        // execute loop-body"
-    "        st_1, ..., st_n, so_1, ..., so_k = loop-body(st_1, ..., st_n, si_1, ..., si_m)"
-    "        // accumulate the scan-output elements"
-    "        scan_out_1 = Concat<axis=0>(scan_out_1, so_1); ... ; scan_out_k = Concat<axis=0>(scan_out_k, so_k);"
-    "    }"
-    ""
-    "    return st_1, ..., st_n, scan_out_1, ..., scan_out_k;"
-    ""
-    "*Sample usage: Encoding RNN using a Scan*"
-    ""
-    "The following example shows how a simple RNN over an input tensor %X, with weight tensor %Wi,"
-    "recurrence weight tensor %Ri, bias tensors %Wbi and %Rbi, and initial hidden-state %H_0 can"
-    "be encoded as a ScanLoop. Note that the loop-body is a nested graph, and it directly computes"
-    "%Wi, %Ri, %Wbi, and %Rbi (typically constants or initializers in the body graph). If these"
-    "values are computed in the outer graph, they need to be passed in as extra state_variables."
-    ""
-    "    graph rnn-encoding {"
-    "      %H_0 = ... "
-    "      %X = ..."
-    "      %Y_h, %Y = Scan[body = <graph rnn-cell-1>, num_scan_inputs=1](%H_0, %X)"
-    "      return %Y, %Y_h"
-    "    }"
-    ""
-    "    graph rnn-cell-1 ("
-    "      %H_tminus1[FLOAT, tensor]"
-    "      %X_t[FLOAT, tensor]"
-    "    ) {"
-    "      %Wi = ..."
-    "      %Ri = ..."
-    "      %Wbi = ..."
-    "      %Rbi = ..."
-    "      %t1 = X_t * (Wi^T)"
-    "      %t2 = H_tminus1*(Ri^T)"
-    "      %t3 = Add(%t1, %t2)"
-    "      %t4 = Add(%t3, %Wbi)"
-    "      %t5 = Add(%t4, %Rbi)"
-    "      %Ht = Tanh(%t5)"
-    "      %Accumulate = Identity(%Ht)"
-    "      return %Ht, %Accumulate"
-    "    }"
-    ""
+  "Scan can be used to iterate over one or more scan_input tensors,"
+  "constructing zero or more scan_output tensors. It combines ideas from general recurrences,"
+  "functional programming constructs such as scan, fold, map, and zip and is intended to enable"
+  "generalizations of RNN-like constructs for sequence-to-sequence processing."
+  "Other tensors (referred to as state_variables here) can be used to carry a state"
+  "when iterating from one element to another (similar to hidden-state in RNNs, also referred"
+  "to as loop-carried dependences in the context of loops)."
+  "Many common usages involve a single scan_input tensor (where functionality"
+  "similar to scan, fold and map can be obtained). When more than one scan_input is used,"
+  "a behavior similar to zip is obtained."
+  ""
+  "The attribute body must be a graph, specifying the computation to be performed in"
+  "every iteration. It takes as input the current values of the state_variables and"
+  "the current iterated element of the scan_inputs. It must return the (updated) values"
+  "of the state_variables and zero or more scan_output_element tensors. The values of the"
+  "scan_output_element tensors are concatenated over all the iterations to produce the"
+  "scan_output values of the scan construct (similar to the concatenated intermediate"
+  "hidden-state values of RNN-like constructs). All the output tensors (state_variables as"
+  "well as scan_output_element tensors) are required to have the same shape in each iteration"
+  "of the loop (a restriction imposed to enable efficient memory allocation)."
+  ""
+  "Note that the iterated element passed to the body subgraph does not have a sequence"
+  "axis. It will have a rank one less than the rank of the corresponding scan_input."
+  ""
+  "The scan operation returns the final values of the state_variables as well as the"
+  "scan_outputs."
+  ""
+  "The optional attribute scan_input_directions specifies the direction (forward or backward)"
+  "for each scan input. If this attribute is omitted, all sequences are scanned in the forward"
+  "direction. A bidirectional scan may be performed by specifying the same tensor input twice"
+  "in the scan_inputs, once with a forward direction, and once with a backward direction."
+  ""
+  "The scan_output of the operation is produced by concatenating the scan_output_element"
+  "values produced by the body in each iteration.  The optional attribute scan_output_directions"
+  "specifies the direction in which scan_output is constructed (by appending or prepending the"
+  "scan_output_element to scan_output in each iteration) for each scan_output. If this attribute"
+  "is omitted, the scan_output_element is appended to the scan_output in each iteration."
+  ""
+  "The optional attribute scan_input_axes specifies the axis to be scanned for each scan_input."
+  "If omitted, every scan_input will be scanned in axis 0. For example, if axis 0 is the"
+  "batch axis and axis 1 is the time axis (to be scanned), specify an axis value of 1."
+  "Note that scanning a non-zero axis may be less efficient than scanning axis zero."
+  ""
+  "The optional attribute scan_output_axes specifies the axis along which the scan_outputs"
+  "are accumulated for each scan_output. For example, if axis 1 is the time axis (to be"
+  "scanned) for both inputs and outputs, specify a scan_input axis and scan_output axis"
+  "value of 1."
+  ""
+  "Note that because of the ONNX restriction that only the last parameter of an operator can"
+  "be variadic, the initial-states and scan-inputs are listed together as one input parameter."
+  "Similarly, the final-states and scan-outputs are listed together as one output parameter."
+  "The attribute num_scan_inputs indicates the number M of scan-inputs."
+  ""
+  "The behavior of"
+  ""
+  "    Scan <"
+  "        num_scan_inputs = m,"
+  "        body = loop-body,"
+  "        scan_input_axes = [axis_1, ..., axis_m]"
+  "    > (init_1, ..., init_n, scan_1, ..., scan_m)"
+  ""
+  "is equivalent to the following pseudo-code:"
+  ""
+  "    // scan_i.shape[axis_i] denotes the (max) sequence-length of scan_i"
+  "    // scan_i.shape[axis_i] is required to be equal to scan_j.shape[axis_j] for all i,j."
+  "    sequence_length = scan_1.shape[axis_1];"
+  ""
+  "    // initialize state-variables"
+  "    st_1 = init_1; ... st_n = init_n;"
+  "    // initialize scan-output variables: [] denotes an empty tensor"
+  "    scan_out_1 = []; ...; scan_out_k = [];"
+  "    // identify number of iterations:"
+  ""
+  "    // execute loop"
+  "    for (int t = 0; t < sequence_length; ++t) {"
+  "        // generate the scan-input elements: the notation T<axis=k>[t] indicates the sub-tensor"
+  "        // of rank one less than T obtained by indexing T at position t along axis k."
+  "        si_1 = scan_1<axis=axis_1>[t];"
+  "        ... ;"
+  "        si_m = scan_m<axis=axis_m>[t];"
+  "        // execute loop-body"
+  "        st_1, ..., st_n, so_1, ..., so_k = loop-body(st_1, ..., st_n, si_1, ..., si_m)"
+  "        // accumulate the scan-output elements"
+  "        scan_out_1 = Concat<axis=0>(scan_out_1, so_1); ... ; scan_out_k = Concat<axis=0>(scan_out_k, so_k);"
+  "    }"
+  ""
+  "    return st_1, ..., st_n, scan_out_1, ..., scan_out_k;"
+  ""
+  "*Sample usage: Encoding RNN using a Scan*"
+  ""
+  "The following example shows how a simple RNN over an input tensor %X, with weight tensor %Wi,"
+  "recurrence weight tensor %Ri, bias tensors %Wbi and %Rbi, and initial hidden-state %H_0 can"
+  "be encoded as a ScanLoop. Note that the loop-body is a nested graph, and it directly computes"
+  "%Wi, %Ri, %Wbi, and %Rbi (typically constants or initializers in the body graph). If these"
+  "values are computed in the outer graph, they need to be passed in as extra state_variables."
+  ""
+  "    graph rnn-encoding {"
+  "      %H_0 = ... "
+  "      %X = ..."
+  "      %Y_h, %Y = Scan[body = <graph rnn-cell-1>, num_scan_inputs=1](%H_0, %X)"
+  "      return %Y, %Y_h"
+  "    }"
+  ""
+  "    graph rnn-cell-1 ("
+  "      %H_tminus1[FLOAT, tensor]"
+  "      %X_t[FLOAT, tensor]"
+  "    ) {"
+  "      %Wi = ..."
+  "      %Ri = ..."
+  "      %Wbi = ..."
+  "      %Rbi = ..."
+  "      %t1 = X_t * (Wi^T)"
+  "      %t2 = H_tminus1*(Ri^T)"
+  "      %t3 = Add(%t1, %t2)"
+  "      %t4 = Add(%t3, %Wbi)"
+  "      %t5 = Add(%t4, %Rbi)"
+  "      %Ht = Tanh(%t5)"
+  "      %Accumulate = Identity(%Ht)"
+  "      return %Ht, %Accumulate"
+  "    }"
+  ""
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$initial_state_and_scan_inputs,
-           AnyAttr:$body,
-           I64Attr:$num_scan_inputs,
-           OptionalAttr<I64ArrayAttr>:$scan_input_axes,
-           OptionalAttr<I64ArrayAttr>:$scan_input_directions,
-           OptionalAttr<I64ArrayAttr>:$scan_output_axes,
-           OptionalAttr<I64ArrayAttr>:$scan_output_directions);
+    AnyAttr:$body,
+    I64Attr:$num_scan_inputs,
+    OptionalAttr<I64ArrayAttr>:$scan_input_axes,
+    OptionalAttr<I64ArrayAttr>:$scan_input_directions,
+    OptionalAttr<I64ArrayAttr>:$scan_output_axes,
+    OptionalAttr<I64ArrayAttr>:$scan_output_directions);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$final_state_and_scan_outputs);
 }
 
-def ONNXScatterOp:ONNX_Op<"Scatter", 
-    [NoSideEffect]> {
+def ONNXScatterOp:ONNX_Op<"Scatter",
+  [NoSideEffect]> {
   let summary = "ONNX Scatter operation";
   let description = [{
-    "This operator is deprecated. Please use ScatterElements, which provides the same functionality."
-    ""
-    "Scatter takes three inputs `data`, `updates`, and `indices` of the same"
-    "rank r >= 1 and an optional attribute axis that identifies an axis of `data`"
-    "(by default, the outer-most axis, that is axis 0). The output of the operation"
-    "is produced by creating a copy of the input `data`, and then updating its value"
-    "to values specified by `updates` at specific index positions specified by"
-    "`indices`. Its output shape is the same as the shape of `data`."
-    ""
-    "For each entry in `updates`, the target index in `data` is obtained by combining"
-    "the corresponding entry in `indices` with the index of the entry itself: the"
-    "index-value for dimension = axis is obtained from the value of the corresponding"
-    "entry in `indices` and the index-value for dimension != axis is obtained from the"
-    "index of the entry itself."
-    ""
-    "For instance, in a 2-D tensor case, the update corresponding to the [i][j] entry"
-    "is performed as below:"
-    "```"
-    "  output[indices[i][j]][j] = updates[i][j] if axis = 0, "
-    "  output[i][indices[i][j]] = updates[i][j] if axis = 1,"
-    "```"
-    ""
-    "This operator is the inverse of GatherElements. It is similar to Torch's Scatter operation."
-    ""
-    "Example 1:"
-    "```"
-    "  data = ["
-    "      [0.0, 0.0, 0.0],"
-    "      [0.0, 0.0, 0.0],"
-    "      [0.0, 0.0, 0.0],"
-    "  ]"
-    "  indices = ["
-    "      [1, 0, 2],"
-    "      [0, 2, 1],"
-    "  ]"
-    "  updates = ["
-    "      [1.0, 1.1, 1.2],"
-    "      [2.0, 2.1, 2.2],"
-    "  ]"
-    "  output = ["
-    "      [2.0, 1.1, 0.0]"
-    "      [1.0, 0.0, 2.2]"
-    "      [0.0, 2.1, 1.2]"
-    "  ]"
-    "```"
-    "Example 2:"
-    "```"
-    "  data = [[1.0, 2.0, 3.0, 4.0, 5.0]]"
-    "  indices = [[1, 3]]"
-    "  updates = [[1.1, 2.1]]"
-    "  axis = 1"
-    "  output = [[1.0, 1.1, 3.0, 2.1, 5.0]]"
-    "```"
+  "This operator is deprecated. Please use ScatterElements, which provides the same functionality."
+  ""
+  "Scatter takes three inputs `data`, `updates`, and `indices` of the same"
+  "rank r >= 1 and an optional attribute axis that identifies an axis of `data`"
+  "(by default, the outer-most axis, that is axis 0). The output of the operation"
+  "is produced by creating a copy of the input `data`, and then updating its value"
+  "to values specified by `updates` at specific index positions specified by"
+  "`indices`. Its output shape is the same as the shape of `data`."
+  ""
+  "For each entry in `updates`, the target index in `data` is obtained by combining"
+  "the corresponding entry in `indices` with the index of the entry itself: the"
+  "index-value for dimension = axis is obtained from the value of the corresponding"
+  "entry in `indices` and the index-value for dimension != axis is obtained from the"
+  "index of the entry itself."
+  ""
+  "For instance, in a 2-D tensor case, the update corresponding to the [i][j] entry"
+  "is performed as below:"
+  "```"
+  "  output[indices[i][j]][j] = updates[i][j] if axis = 0, "
+  "  output[i][indices[i][j]] = updates[i][j] if axis = 1,"
+  "```"
+  ""
+  "This operator is the inverse of GatherElements. It is similar to Torch's Scatter operation."
+  ""
+  "Example 1:"
+  "```"
+  "  data = ["
+  "      [0.0, 0.0, 0.0],"
+  "      [0.0, 0.0, 0.0],"
+  "      [0.0, 0.0, 0.0],"
+  "  ]"
+  "  indices = ["
+  "      [1, 0, 2],"
+  "      [0, 2, 1],"
+  "  ]"
+  "  updates = ["
+  "      [1.0, 1.1, 1.2],"
+  "      [2.0, 2.1, 2.2],"
+  "  ]"
+  "  output = ["
+  "      [2.0, 1.1, 0.0]"
+  "      [1.0, 0.0, 2.2]"
+  "      [0.0, 2.1, 1.2]"
+  "  ]"
+  "```"
+  "Example 2:"
+  "```"
+  "  data = [[1.0, 2.0, 3.0, 4.0, 5.0]]"
+  "  indices = [[1, 3]]"
+  "  updates = [[1.1, 2.1]]"
+  "  axis = 1"
+  "  output = [[1.0, 1.1, 3.0, 2.1, 5.0]]"
+  "```"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$indices,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$updates,
-           DefaultValuedAttr<I64Attr, "0">:$axis);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$indices,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$updates,
+    DefaultValuedAttr<I64Attr, "0">:$axis);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXScatterElementsOp:ONNX_Op<"ScatterElements", 
-    [NoSideEffect]> {
+def ONNXScatterElementsOp:ONNX_Op<"ScatterElements",
+  [NoSideEffect]> {
   let summary = "ONNX ScatterElements operation";
   let description = [{
-    "ScatterElements takes three inputs `data`, `updates`, and `indices` of the same"
-    "rank r >= 1 and an optional attribute axis that identifies an axis of `data`"
-    "(by default, the outer-most axis, that is axis 0). The output of the operation"
-    "is produced by creating a copy of the input `data`, and then updating its value"
-    "to values specified by `updates` at specific index positions specified by"
-    "`indices`. Its output shape is the same as the shape of `data`."
-    ""
-    "For each entry in `updates`, the target index in `data` is obtained by combining"
-    "the corresponding entry in `indices` with the index of the entry itself: the"
-    "index-value for dimension = axis is obtained from the value of the corresponding"
-    "entry in `indices` and the index-value for dimension != axis is obtained from the"
-    "index of the entry itself."
-    ""
-    "For instance, in a 2-D tensor case, the update corresponding to the [i][j] entry"
-    "is performed as below:"
-    "```"
-    "  output[indices[i][j]][j] = updates[i][j] if axis = 0, "
-    "  output[i][indices[i][j]] = updates[i][j] if axis = 1,"
-    "```"
-    ""
-    "This operator is the inverse of GatherElements. It is similar to Torch's Scatter operation."
-    ""
-    "Example 1:"
-    "```"
-    "  data = ["
-    "      [0.0, 0.0, 0.0],"
-    "      [0.0, 0.0, 0.0],"
-    "      [0.0, 0.0, 0.0],"
-    "  ]"
-    "  indices = ["
-    "      [1, 0, 2],"
-    "      [0, 2, 1],"
-    "  ]"
-    "  updates = ["
-    "      [1.0, 1.1, 1.2],"
-    "      [2.0, 2.1, 2.2],"
-    "  ]"
-    "  output = ["
-    "      [2.0, 1.1, 0.0]"
-    "      [1.0, 0.0, 2.2]"
-    "      [0.0, 2.1, 1.2]"
-    "  ]"
-    "```"
-    "Example 2:"
-    "```"
-    "  data = [[1.0, 2.0, 3.0, 4.0, 5.0]]"
-    "  indices = [[1, 3]]"
-    "  updates = [[1.1, 2.1]]"
-    "  axis = 1"
-    "  output = [[1.0, 1.1, 3.0, 2.1, 5.0]]"
-    "```"
+  "ScatterElements takes three inputs `data`, `updates`, and `indices` of the same"
+  "rank r >= 1 and an optional attribute axis that identifies an axis of `data`"
+  "(by default, the outer-most axis, that is axis 0). The output of the operation"
+  "is produced by creating a copy of the input `data`, and then updating its value"
+  "to values specified by `updates` at specific index positions specified by"
+  "`indices`. Its output shape is the same as the shape of `data`."
+  ""
+  "For each entry in `updates`, the target index in `data` is obtained by combining"
+  "the corresponding entry in `indices` with the index of the entry itself: the"
+  "index-value for dimension = axis is obtained from the value of the corresponding"
+  "entry in `indices` and the index-value for dimension != axis is obtained from the"
+  "index of the entry itself."
+  ""
+  "For instance, in a 2-D tensor case, the update corresponding to the [i][j] entry"
+  "is performed as below:"
+  "```"
+  "  output[indices[i][j]][j] = updates[i][j] if axis = 0, "
+  "  output[i][indices[i][j]] = updates[i][j] if axis = 1,"
+  "```"
+  ""
+  "This operator is the inverse of GatherElements. It is similar to Torch's Scatter operation."
+  ""
+  "Example 1:"
+  "```"
+  "  data = ["
+  "      [0.0, 0.0, 0.0],"
+  "      [0.0, 0.0, 0.0],"
+  "      [0.0, 0.0, 0.0],"
+  "  ]"
+  "  indices = ["
+  "      [1, 0, 2],"
+  "      [0, 2, 1],"
+  "  ]"
+  "  updates = ["
+  "      [1.0, 1.1, 1.2],"
+  "      [2.0, 2.1, 2.2],"
+  "  ]"
+  "  output = ["
+  "      [2.0, 1.1, 0.0]"
+  "      [1.0, 0.0, 2.2]"
+  "      [0.0, 2.1, 1.2]"
+  "  ]"
+  "```"
+  "Example 2:"
+  "```"
+  "  data = [[1.0, 2.0, 3.0, 4.0, 5.0]]"
+  "  indices = [[1, 3]]"
+  "  updates = [[1.1, 2.1]]"
+  "  axis = 1"
+  "  output = [[1.0, 1.1, 3.0, 2.1, 5.0]]"
+  "```"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$indices,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$updates,
-           DefaultValuedAttr<I64Attr, "0">:$axis);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$indices,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$updates,
+    DefaultValuedAttr<I64Attr, "0">:$axis);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXScatterNDOp:ONNX_Op<"ScatterND", 
-    [NoSideEffect]> {
+def ONNXScatterNDOp:ONNX_Op<"ScatterND",
+  [NoSideEffect]> {
   let summary = "ONNX ScatterND operation";
   let description = [{
-    "ScatterND takes three inputs `data` tensor of rank r >= 1, `indices` tensor of rank q >= 1,"
-    "and `updates` tensor of rank q + r - indices.shape[-1] - 1. The output of the operation"
-    "is produced by creating a copy of the input `data`, and then updating its value to values"
-    "specified by `updates` at specific index positions specified by `indices`. Its output shape"
-    "is the same as the shape of `data`. Note that `indices` should not have duplicate entries."
-    "That is, two or more `updates` for the same index-location is not supported."
-    ""
-    "`indices` is an integer tensor. Let k denote indices.shape[-1], the last dimension in the shape of `indices`."
-    " `indices` is treated as a (q-1)-dimensional tensor of k-tuples, where each k-tuple is a partial-index into `data`."
-    "Hence, k can be a value at most the rank of `data`. When k equals rank(data), each update entry specifies an"
-    "update to a single element of the tensor. When k is less than rank(data) each update entry specifies an"
-    "update to a slice of the tensor."
-    ""
-    "`updates` is treated as a (q-1)-dimensional tensor of replacement-slice-values. Thus, the"
-    "first (q-1) dimensions of updates.shape must match the first (q-1) dimensions of indices.shape."
-    "The remaining dimensions of `updates` correspond to the dimensions of the"
-    "replacement-slice-values. Each replacement-slice-value is a (r-k) dimensional tensor,"
-    "corresponding to the trailing (r-k) dimensions of `data`.  Thus, the shape of `updates`"
-    "must equal indices.shape[0:q-1] ++ data.shape[k:r-1], where ++ denotes the concatenation"
-    "of shapes."
-    ""
-    "The `output` is calculated via the following equation:"
-    ""
-    "    output = np.copy(data)"
-    "    update_indices = indices.shape[:-1]"
-    "    for idx in np.ndindex(update_indices):"
-    "        output[indices[idx]] = updates[idx]"
-    ""
-    "The order of iteration in the above loop is not specified."
-    "In particular, indices should not have duplicate entries: that is, if idx1 != idx2, then indices[idx1] != indices[idx2]."
-    "This ensures that the output value does not depend on the iteration order."
-    ""
-    "This operator is the inverse of GatherND."
-    ""
-    "Example 1:"
-    "```"
-    "  data    = [1, 2, 3, 4, 5, 6, 7, 8]"
-    "  indices = [[4], [3], [1], [7]]"
-    "  updates = [9, 10, 11, 12]"
-    "  output  = [1, 11, 3, 10, 9, 6, 7, 12]"
-    "```"
-    ""
-    "Example 2:"
-    "```"
-    "  data    = [[[1, 2, 3, 4], [5, 6, 7, 8], [8, 7, 6, 5], [4, 3, 2, 1]],"
-    "             [[1, 2, 3, 4], [5, 6, 7, 8], [8, 7, 6, 5], [4, 3, 2, 1]],"
-    "             [[8, 7, 6, 5], [4, 3, 2, 1], [1, 2, 3, 4], [5, 6, 7, 8]],"
-    "             [[8, 7, 6, 5], [4, 3, 2, 1], [1, 2, 3, 4], [5, 6, 7, 8]]]"
-    "  indices = [[0], [2]]"
-    "  updates = [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],"
-    "             [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3], [4, 4, 4, 4]]]"
-    "  output  = [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],"
-    "             [[1, 2, 3, 4], [5, 6, 7, 8], [8, 7, 6, 5], [4, 3, 2, 1]],"
-    "             [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3], [4, 4, 4, 4]],"
-    "             [[8, 7, 6, 5], [4, 3, 2, 1], [1, 2, 3, 4], [5, 6, 7, 8]]]"
-    "```"
+  "ScatterND takes three inputs `data` tensor of rank r >= 1, `indices` tensor of rank q >= 1,"
+  "and `updates` tensor of rank q + r - indices.shape[-1] - 1. The output of the operation"
+  "is produced by creating a copy of the input `data`, and then updating its value to values"
+  "specified by `updates` at specific index positions specified by `indices`. Its output shape"
+  "is the same as the shape of `data`. Note that `indices` should not have duplicate entries."
+  "That is, two or more `updates` for the same index-location is not supported."
+  ""
+  "`indices` is an integer tensor. Let k denote indices.shape[-1], the last dimension in the shape of `indices`."
+  " `indices` is treated as a (q-1)-dimensional tensor of k-tuples, where each k-tuple is a partial-index into `data`."
+  "Hence, k can be a value at most the rank of `data`. When k equals rank(data), each update entry specifies an"
+  "update to a single element of the tensor. When k is less than rank(data) each update entry specifies an"
+  "update to a slice of the tensor."
+  ""
+  "`updates` is treated as a (q-1)-dimensional tensor of replacement-slice-values. Thus, the"
+  "first (q-1) dimensions of updates.shape must match the first (q-1) dimensions of indices.shape."
+  "The remaining dimensions of `updates` correspond to the dimensions of the"
+  "replacement-slice-values. Each replacement-slice-value is a (r-k) dimensional tensor,"
+  "corresponding to the trailing (r-k) dimensions of `data`.  Thus, the shape of `updates`"
+  "must equal indices.shape[0:q-1] ++ data.shape[k:r-1], where ++ denotes the concatenation"
+  "of shapes."
+  ""
+  "The `output` is calculated via the following equation:"
+  ""
+  "    output = np.copy(data)"
+  "    update_indices = indices.shape[:-1]"
+  "    for idx in np.ndindex(update_indices):"
+  "        output[indices[idx]] = updates[idx]"
+  ""
+  "The order of iteration in the above loop is not specified."
+  "In particular, indices should not have duplicate entries: that is, if idx1 != idx2, then indices[idx1] != indices[idx2]."
+  "This ensures that the output value does not depend on the iteration order."
+  ""
+  "This operator is the inverse of GatherND."
+  ""
+  "Example 1:"
+  "```"
+  "  data    = [1, 2, 3, 4, 5, 6, 7, 8]"
+  "  indices = [[4], [3], [1], [7]]"
+  "  updates = [9, 10, 11, 12]"
+  "  output  = [1, 11, 3, 10, 9, 6, 7, 12]"
+  "```"
+  ""
+  "Example 2:"
+  "```"
+  "  data    = [[[1, 2, 3, 4], [5, 6, 7, 8], [8, 7, 6, 5], [4, 3, 2, 1]],"
+  "             [[1, 2, 3, 4], [5, 6, 7, 8], [8, 7, 6, 5], [4, 3, 2, 1]],"
+  "             [[8, 7, 6, 5], [4, 3, 2, 1], [1, 2, 3, 4], [5, 6, 7, 8]],"
+  "             [[8, 7, 6, 5], [4, 3, 2, 1], [1, 2, 3, 4], [5, 6, 7, 8]]]"
+  "  indices = [[0], [2]]"
+  "  updates = [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],"
+  "             [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3], [4, 4, 4, 4]]]"
+  "  output  = [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],"
+  "             [[1, 2, 3, 4], [5, 6, 7, 8], [8, 7, 6, 5], [4, 3, 2, 1]],"
+  "             [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3], [4, 4, 4, 4]],"
+  "             [[8, 7, 6, 5], [4, 3, 2, 1], [1, 2, 3, 4], [5, 6, 7, 8]]]"
+  "```"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$indices,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$updates);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$indices,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$updates);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXSeluOp:ONNX_Op<"Selu", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXSeluOp:ONNX_Op<"Selu",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Selu operation";
   let description = [{
-    "Selu takes one input data (Tensor<T>) and produces one output data"
-    "(Tensor<T>) where the scaled exponential linear unit function,"
-    "`y = gamma * (alpha * e^x - alpha) for x <= 0`, `y = gamma * x for x > 0`,"
-    "is applied to the tensor elementwise."
+  "Selu takes one input data (Tensor<T>) and produces one output data"
+  "(Tensor<T>) where the scaled exponential linear unit function,"
+  "`y = gamma * (alpha * e^x - alpha) for x <= 0`, `y = gamma * x for x > 0`,"
+  "is applied to the tensor elementwise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           DefaultValuedAttr<F32Attr, "1.67326">:$alpha,
-           DefaultValuedAttr<F32Attr, "1.0507">:$gamma);
+    DefaultValuedAttr<F32Attr, "1.67326">:$alpha,
+    DefaultValuedAttr<F32Attr, "1.0507">:$gamma);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXSequenceAtOp:ONNX_Op<"SequenceAt", 
-    [NoSideEffect]> {
+def ONNXSequenceAtOp:ONNX_Op<"SequenceAt",
+  [NoSideEffect]> {
   let summary = "ONNX SequenceAt operation";
   let description = [{
-    "Outputs a tensor copy from the tensor at 'position' in 'input_sequence'."
-    "Accepted range for 'position' is in `[-n, n - 1]`, where `n` is the number of tensors in 'input_sequence'."
-    "Negative value means counting positions from the back."
+  "Outputs a tensor copy from the tensor at 'position' in 'input_sequence'."
+  "Accepted range for 'position' is in `[-n, n - 1]`, where `n` is the number of tensors in 'input_sequence'."
+  "Negative value means counting positions from the back."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input_sequence,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$position);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$position);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$tensor);
 }
 
-def ONNXSequenceConstructOp:ONNX_Op<"SequenceConstruct", 
-    [NoSideEffect]> {
+def ONNXSequenceConstructOp:ONNX_Op<"SequenceConstruct",
+  [NoSideEffect]> {
   let summary = "ONNX SequenceConstruct operation";
   let description = [{
-    "Construct a tensor sequence containing 'inputs' tensors."
-    "All tensors in 'inputs' must have the same data type."
+  "Construct a tensor sequence containing 'inputs' tensors."
+  "All tensors in 'inputs' must have the same data type."
   }];
   let arguments = (ins Variadic<AnyTypeOf<[AnyMemRef, AnyTensor]>>:$inputs);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output_sequence);
 }
 
-def ONNXSequenceEmptyOp:ONNX_Op<"SequenceEmpty", 
-    [NoSideEffect]> {
+def ONNXSequenceEmptyOp:ONNX_Op<"SequenceEmpty",
+  [NoSideEffect]> {
   let summary = "ONNX SequenceEmpty operation";
   let description = [{
-    "Construct an empty tensor sequence, with given data type."
+  "Construct an empty tensor sequence, with given data type."
   }];
   let arguments = (ins OptionalAttr<I64Attr>:$dtype);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXSequenceEraseOp:ONNX_Op<"SequenceErase", 
-    [NoSideEffect]> {
+def ONNXSequenceEraseOp:ONNX_Op<"SequenceErase",
+  [NoSideEffect]> {
   let summary = "ONNX SequenceErase operation";
   let description = [{
-    "Outputs a tensor sequence that removes the tensor at 'position' from 'input_sequence'."
-    "Accepted range for 'position' is in `[-n, n - 1]`, where `n` is the number of tensors in 'input_sequence'."
-    "Negative value means counting positions from the back."
-    "'position' is optional, by default it erases the last tensor from 'input_sequence'."
+  "Outputs a tensor sequence that removes the tensor at 'position' from 'input_sequence'."
+  "Accepted range for 'position' is in `[-n, n - 1]`, where `n` is the number of tensors in 'input_sequence'."
+  "Negative value means counting positions from the back."
+  "'position' is optional, by default it erases the last tensor from 'input_sequence'."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input_sequence,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$position);
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$position);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output_sequence);
 }
 
-def ONNXSequenceInsertOp:ONNX_Op<"SequenceInsert", 
-    [NoSideEffect]> {
+def ONNXSequenceInsertOp:ONNX_Op<"SequenceInsert",
+  [NoSideEffect]> {
   let summary = "ONNX SequenceInsert operation";
   let description = [{
-    "Outputs a tensor sequence that inserts 'tensor' into 'input_sequence' at 'position'."
-    "'tensor' must have the same data type as 'input_sequence'."
-    "Accepted range for 'position' is in `[-n, n]`, where `n` is the number of tensors in 'input_sequence'."
-    "Negative value means counting positions from the back."
-    "'position' is optional, by default it inserts 'tensor' to the back of 'input_sequence'."
+  "Outputs a tensor sequence that inserts 'tensor' into 'input_sequence' at 'position'."
+  "'tensor' must have the same data type as 'input_sequence'."
+  "Accepted range for 'position' is in `[-n, n]`, where `n` is the number of tensors in 'input_sequence'."
+  "Negative value means counting positions from the back."
+  "'position' is optional, by default it inserts 'tensor' to the back of 'input_sequence'."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input_sequence,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$tensor,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$position);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$tensor,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$position);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output_sequence);
 }
 
-def ONNXSequenceLengthOp:ONNX_Op<"SequenceLength", 
-    [NoSideEffect]> {
+def ONNXSequenceLengthOp:ONNX_Op<"SequenceLength",
+  [NoSideEffect]> {
   let summary = "ONNX SequenceLength operation";
   let description = [{
-    "Produces a scalar(tensor of empty shape) containing the number of tensors in 'input_sequence'."
+  "Produces a scalar(tensor of empty shape) containing the number of tensors in 'input_sequence'."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input_sequence);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$length);
 }
 
-def ONNXShapeOp:ONNX_Op<"Shape", 
-    [NoSideEffect]> {
+def ONNXShapeOp:ONNX_Op<"Shape",
+  [NoSideEffect]> {
   let summary = "ONNX Shape operation";
   let description = [{
-    "Takes a tensor as input and outputs an 1D int64 tensor containing the shape of the input tensor."
+  "Takes a tensor as input and outputs an 1D int64 tensor containing the shape of the input tensor."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$shape);
 }
 
-def ONNXShrinkOp:ONNX_Op<"Shrink", 
-    [NoSideEffect]> {
+def ONNXShrinkOp:ONNX_Op<"Shrink",
+  [NoSideEffect]> {
   let summary = "ONNX Shrink operation";
   let description = [{
-    "Shrink takes one input data (Tensor<numeric>) and produces one Tensor output,"
-    "having same datatype and shape with input. It has two attributes, lambd and"
-    "bias. The formula of this operator is: If x < -lambd, y = x + bias;"
-    "If x > lambd, y = x - bias; Otherwise, y = 0."
+  "Shrink takes one input data (Tensor<numeric>) and produces one Tensor output,"
+  "having same datatype and shape with input. It has two attributes, lambd and"
+  "bias. The formula of this operator is: If x < -lambd, y = x + bias;"
+  "If x > lambd, y = x - bias; Otherwise, y = 0."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           DefaultValuedAttr<F32Attr, "0.0">:$bias,
-           DefaultValuedAttr<F32Attr, "0.5">:$lambd);
+    DefaultValuedAttr<F32Attr, "0.0">:$bias,
+    DefaultValuedAttr<F32Attr, "0.5">:$lambd);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXSigmoidOp:ONNX_Op<"Sigmoid", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXSigmoidOp:ONNX_Op<"Sigmoid",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Sigmoid operation";
   let description = [{
-    "Sigmoid takes one input data (Tensor<T>) and produces one output data"
-    "(Tensor<T>) where the sigmoid function, y = 1 / (1 + exp(-x)), is applied to the"
-    "tensor elementwise."
+  "Sigmoid takes one input data (Tensor<T>) and produces one output data"
+  "(Tensor<T>) where the sigmoid function, y = 1 / (1 + exp(-x)), is applied to the"
+  "tensor elementwise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXSignOp:ONNX_Op<"Sign", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXSignOp:ONNX_Op<"Sign",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Sign operation";
   let description = [{
-    "Calculate the sign of the given input tensor element-wise."
-    "If input > 0, output 1. if input < 0, output -1. if input == 0, output 0."
+  "Calculate the sign of the given input tensor element-wise."
+  "If input > 0, output 1. if input < 0, output -1. if input == 0, output 0."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXSinOp:ONNX_Op<"Sin", 
-    [NoSideEffect]> {
+def ONNXSinOp:ONNX_Op<"Sin",
+  [NoSideEffect]> {
   let summary = "ONNX Sin operation";
   let description = [{
-    "Calculates the sine of the given input tensor, element-wise."
+  "Calculates the sine of the given input tensor, element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXSinhOp:ONNX_Op<"Sinh", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXSinhOp:ONNX_Op<"Sinh",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Sinh operation";
   let description = [{
-    "Calculates the hyperbolic sine of the given input tensor element-wise."
+  "Calculates the hyperbolic sine of the given input tensor element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXSizeOp:ONNX_Op<"Size", 
-    [NoSideEffect]> {
+def ONNXSizeOp:ONNX_Op<"Size",
+  [NoSideEffect]> {
   let summary = "ONNX Size operation";
   let description = [{
-    "Takes a tensor as input and outputs a int64 scalar that equals to the total number of elements of the input tensor."
+  "Takes a tensor as input and outputs a int64 scalar that equals to the total number of elements of the input tensor."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$size);
 }
 
-def ONNXSliceOp:ONNX_Op<"Slice", 
-    [NoSideEffect]> {
+def ONNXSliceOp:ONNX_Op<"Slice",
+  [NoSideEffect]> {
   let summary = "ONNX Slice operation";
   let description = [{
-    "Produces a slice of the input tensor along multiple axes. Similar to numpy:"
-    "https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html"
-    "Slices uses `starts`, `ends`, `axes` and `steps` inputs to specify the start and end"
-    "dimension and step for each axis in the list of axes, it uses this information to"
-    "slice the input `data` tensor. If a negative value is passed for any of the"
-    "start or end indices, it represent number of elements before the end of that"
-    "dimension. If the value passed to start or end is larger than the `n` (the"
-    "number of elements in this dimension), it represents `n`. For slicing to the"
-    "end of a dimension with unknown size, it is recommended to pass in `INT_MAX`."
-    "If a negative value is passed for step, it represents slicing backward."
-    "If `axes` are omitted, they are set to `[0, ..., ndim-1]`."
-    "If `steps` are omitted, they are set to `[1, ..., 1]` of length `len(starts)`"
-    "Example 1:"
-    "  data = ["
-    "      [1, 2, 3, 4],"
-    "      [5, 6, 7, 8],"
-    "  ]"
-    "  axes = [0, 1]"
-    "  starts = [1, 0]"
-    "  ends = [2, 3]"
-    "  steps = [1, 2]"
-    "  result = ["
-    "      [5, 7],"
-    "  ]"
-    "Example 2:"
-    "  data = ["
-    "      [1, 2, 3, 4],"
-    "      [5, 6, 7, 8],"
-    "  ]"
-    "  starts = [0, 1]"
-    "  ends = [-1, 1000]"
-    "  result = ["
-    "      [2, 3, 4],"
-    "  ]"
+  "Produces a slice of the input tensor along multiple axes. Similar to numpy:"
+  "https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html"
+  "Slices uses `starts`, `ends`, `axes` and `steps` inputs to specify the start and end"
+  "dimension and step for each axis in the list of axes, it uses this information to"
+  "slice the input `data` tensor. If a negative value is passed for any of the"
+  "start or end indices, it represent number of elements before the end of that"
+  "dimension. If the value passed to start or end is larger than the `n` (the"
+  "number of elements in this dimension), it represents `n`. For slicing to the"
+  "end of a dimension with unknown size, it is recommended to pass in `INT_MAX`."
+  "If a negative value is passed for step, it represents slicing backward."
+  "If `axes` are omitted, they are set to `[0, ..., ndim-1]`."
+  "If `steps` are omitted, they are set to `[1, ..., 1]` of length `len(starts)`"
+  "Example 1:"
+  "  data = ["
+  "      [1, 2, 3, 4],"
+  "      [5, 6, 7, 8],"
+  "  ]"
+  "  axes = [0, 1]"
+  "  starts = [1, 0]"
+  "  ends = [2, 3]"
+  "  steps = [1, 2]"
+  "  result = ["
+  "      [5, 7],"
+  "  ]"
+  "Example 2:"
+  "  data = ["
+  "      [1, 2, 3, 4],"
+  "      [5, 6, 7, 8],"
+  "  ]"
+  "  starts = [0, 1]"
+  "  ends = [-1, 1000]"
+  "  result = ["
+  "      [2, 3, 4],"
+  "  ]"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$starts,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$ends,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$axes,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$steps);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$starts,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$ends,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$axes,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$steps);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXSoftmaxOp:ONNX_Op<"Softmax", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXSoftmaxOp:ONNX_Op<"Softmax",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Softmax operation";
   let description = [{
-    "The operator computes the softmax (normalized exponential) values for each layer in the batch"
-    " of the given input."
-    ""
-    "The input does not need to explicitly be a 2D vector; rather, it will be"
-    "coerced into one. For an arbitrary n-dimensional tensor"
-    "input \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1\}\] and k is"
-    "the axis provided, then input will be coerced into a 2-dimensional tensor with"
-    "dimensions [a_0 * ... * a_{k-1}, a_k * ... * a_{n-1\}\]. For the default"
-    "case where axis=1, this means the input tensor will be coerced into a 2D tensor"
-    "of dimensions [a_0, a_1 * ... * a_{n-1\}\], where a_0 is often the batch size."
-    "In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D."
-    "Each of these dimensions must be matched correctly, or else the operator"
-    "will throw errors. The output tensor has the same shape"
-    "and contains the softmax values of the corresponding input."
+  "The operator computes the softmax (normalized exponential) values for each layer in the batch"
+  " of the given input."
+  ""
+  "The input does not need to explicitly be a 2D vector; rather, it will be"
+  "coerced into one. For an arbitrary n-dimensional tensor"
+  "input \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1\}\] and k is"
+  "the axis provided, then input will be coerced into a 2-dimensional tensor with"
+  "dimensions [a_0 * ... * a_{k-1}, a_k * ... * a_{n-1\}\]. For the default"
+  "case where axis=1, this means the input tensor will be coerced into a 2D tensor"
+  "of dimensions [a_0, a_1 * ... * a_{n-1\}\], where a_0 is often the batch size."
+  "In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D."
+  "Each of these dimensions must be matched correctly, or else the operator"
+  "will throw errors. The output tensor has the same shape"
+  "and contains the softmax values of the corresponding input."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           DefaultValuedAttr<I64Attr, "1">:$axis);
+    DefaultValuedAttr<I64Attr, "1">:$axis);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXSoftplusOp:ONNX_Op<"Softplus", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXSoftplusOp:ONNX_Op<"Softplus",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Softplus operation";
   let description = [{
-    "Softplus takes one input data (Tensor<T>) and produces one output data"
-    "(Tensor<T>) where the softplus function, y = ln(exp(x) + 1), is applied to"
-    "the tensor elementwise."
+  "Softplus takes one input data (Tensor<T>) and produces one output data"
+  "(Tensor<T>) where the softplus function, y = ln(exp(x) + 1), is applied to"
+  "the tensor elementwise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXSoftsignOp:ONNX_Op<"Softsign", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXSoftsignOp:ONNX_Op<"Softsign",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Softsign operation";
   let description = [{
-    "Calculates the softsign (x/(1+|x|)) of the given input tensor element-wise."
+  "Calculates the softsign (x/(1+|x|)) of the given input tensor element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXSpaceToDepthOp:ONNX_Op<"SpaceToDepth", 
-    [NoSideEffect]> {
+def ONNXSpaceToDepthOp:ONNX_Op<"SpaceToDepth",
+  [NoSideEffect]> {
   let summary = "ONNX SpaceToDepth operation";
   let description = [{
-    "SpaceToDepth rearranges blocks of spatial data into depth. More specifically,"
-    "this op outputs a copy of the input tensor where values from the height and width dimensions"
-    "are moved to the depth dimension."
+  "SpaceToDepth rearranges blocks of spatial data into depth. More specifically,"
+  "this op outputs a copy of the input tensor where values from the height and width dimensions"
+  "are moved to the depth dimension."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           I64Attr:$blocksize);
+    I64Attr:$blocksize);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXSplitOp:ONNX_Op<"Split", 
-    [NoSideEffect]> {
+def ONNXSplitOp:ONNX_Op<"Split",
+  [NoSideEffect]> {
   let summary = "ONNX Split operation";
   let description = [{
-    "Split a tensor into a list of tensors, along the specified"
-    "'axis'. Lengths of the parts can be specified using argument 'split'."
-    "Otherwise, the tensor is split to equal sized parts."
+  "Split a tensor into a list of tensors, along the specified"
+  "'axis'. Lengths of the parts can be specified using argument 'split'."
+  "Otherwise, the tensor is split to equal sized parts."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           DefaultValuedAttr<I64Attr, "0">:$axis,
-           OptionalAttr<I64ArrayAttr>:$split);
-  let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$outputs);
+    DefaultValuedAttr<I64Attr, "0">:$axis,
+    OptionalAttr<I64ArrayAttr>:$split);
+  let results = (outs Variadic<AnyTypeOf<[AnyMemRef, AnyTensor]>>:$outputs);
 }
 
-def ONNXSplitToSequenceOp:ONNX_Op<"SplitToSequence", 
-    [NoSideEffect]> {
+def ONNXSplitToSequenceOp:ONNX_Op<"SplitToSequence",
+  [NoSideEffect]> {
   let summary = "ONNX SplitToSequence operation";
   let description = [{
-    "Split a tensor into a sequence of tensors, along the specified"
-    "'axis'. Lengths of the parts can be specified using argument 'split'."
-    "'split' must contain only positive numbers."
-    "'split' is either a scalar (tensor of empty shape), or a 1-D tensor."
-    "If 'split' is a scalar, then 'input' will be split into equally sized chunks(if possible)."
-    "Last chunk will be smaller if the 'input' size along the given axis 'axis' is not divisible"
-    "by 'split'."
-    "Otherwise, the tensor is split into 'size(split)' chunks, with lengths of the parts on 'axis'"
-    "specified in 'split'. In this scenario, the sum of entries in 'split' must be equal to the"
-    "dimension size of input tensor on 'axis'."
+  "Split a tensor into a sequence of tensors, along the specified"
+  "'axis'. Lengths of the parts can be specified using argument 'split'."
+  "'split' must contain only positive numbers."
+  "'split' is either a scalar (tensor of empty shape), or a 1-D tensor."
+  "If 'split' is a scalar, then 'input' will be split into equally sized chunks(if possible)."
+  "Last chunk will be smaller if the 'input' size along the given axis 'axis' is not divisible"
+  "by 'split'."
+  "Otherwise, the tensor is split into 'size(split)' chunks, with lengths of the parts on 'axis'"
+  "specified in 'split'. In this scenario, the sum of entries in 'split' must be equal to the"
+  "dimension size of input tensor on 'axis'."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$split,
-           DefaultValuedAttr<I64Attr, "0">:$axis,
-           DefaultValuedAttr<I64Attr, "1">:$keepdims);
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$split,
+    DefaultValuedAttr<I64Attr, "0">:$axis,
+    DefaultValuedAttr<I64Attr, "1">:$keepdims);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output_sequence);
 }
 
-def ONNXSqrtOp:ONNX_Op<"Sqrt", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXSqrtOp:ONNX_Op<"Sqrt",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Sqrt operation";
   let description = [{
-    "Square root takes one input data (Tensor<T>) and produces one output data"
-    "(Tensor<T>) where the square root is, y = x^0.5, is applied to"
-    "the tensor elementwise. If x is negative, then it will return NaN."
+  "Square root takes one input data (Tensor<T>) and produces one output data"
+  "(Tensor<T>) where the square root is, y = x^0.5, is applied to"
+  "the tensor elementwise. If x is negative, then it will return NaN."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXSqueezeOp:ONNX_Op<"Squeeze", 
-    [NoSideEffect]> {
+def ONNXSqueezeOp:ONNX_Op<"Squeeze",
+  [NoSideEffect]> {
   let summary = "ONNX Squeeze operation";
   let description = [{
-    "Remove single-dimensional entries from the shape of a tensor."
-    "Takes a  parameter `axes` with a list of axes to squeeze."
-    "If `axes` is not provided, all the single dimensions will be removed from"
-    "the shape. If an axis is selected with shape entry not equal to one, an error is raised."
+  "Remove single-dimensional entries from the shape of a tensor."
+  "Takes a  parameter `axes` with a list of axes to squeeze."
+  "If `axes` is not provided, all the single dimensions will be removed from"
+  "the shape. If an axis is selected with shape entry not equal to one, an error is raised."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           OptionalAttr<I64ArrayAttr>:$axes);
+    OptionalAttr<I64ArrayAttr>:$axes);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$squeezed);
 }
 
-def ONNXStringNormalizerOp:ONNX_Op<"StringNormalizer", 
-    [NoSideEffect]> {
+def ONNXStringNormalizerOp:ONNX_Op<"StringNormalizer",
+  [NoSideEffect]> {
   let summary = "ONNX StringNormalizer operation";
   let description = [{
-    "StringNormalization performs string operations for basic cleaning."
-    "This operator has only one input (denoted by X) and only one output"
-    "(denoted by Y). This operator first examines the elements in the X,"
-    "and removes elements specified in "stopwords" attribute."
-    "After removing stop words, the intermediate result can be further lowercased,"
-    "uppercased, or just returned depending the "case_change_action" attribute."
-    "This operator only accepts [C]- and [1, C]-tensor."
-    "If all elements in X are dropped, the output will be the empty value of string tensor with shape [1]"
-    "if input shape is [C] and shape [1, 1] if input shape is [1, C]."
+  "StringNormalization performs string operations for basic cleaning."
+  "This operator has only one input (denoted by X) and only one output"
+  "(denoted by Y). This operator first examines the elements in the X,"
+  "and removes elements specified in \"stopwords\" attribute."
+  "After removing stop words, the intermediate result can be further lowercased,"
+  "uppercased, or just returned depending the \"case_change_action\" attribute."
+  "This operator only accepts [C]- and [1, C]-tensor."
+  "If all elements in X are dropped, the output will be the empty value of string tensor with shape [1]"
+  "if input shape is [C] and shape [1, 1] if input shape is [1, C]."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           DefaultValuedAttr<StrAttr, "NONE">:$case_change_action,
-           DefaultValuedAttr<I64Attr, "0">:$is_case_sensitive,
-           OptionalAttr<StrAttr>:$locale,
-           OptionalAttr<StrArrayAttr>:$stopwords);
+    DefaultValuedAttr<StrAttr, "NONE">:$case_change_action,
+    DefaultValuedAttr<I64Attr, "0">:$is_case_sensitive,
+    OptionalAttr<StrAttr>:$locale,
+    OptionalAttr<StrArrayAttr>:$stopwords);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXSubOp:ONNX_Op<"Sub", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXSubOp:ONNX_Op<"Sub",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Sub operation";
   let description = [{
-    "Performs element-wise binary subtraction (with Numpy-style broadcasting support)."
-    ""
-    "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Performs element-wise binary subtraction (with Numpy-style broadcasting support)."
+  ""
+  "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$A,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$C);
 }
 
-def ONNXSumOp:ONNX_Op<"Sum", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXSumOp:ONNX_Op<"Sum",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Sum operation";
   let description = [{
-    "Element-wise sum of each of the input tensors (with Numpy-style broadcasting support)."
-    "All inputs and outputs must have the same data type."
-    "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Element-wise sum of each of the input tensors (with Numpy-style broadcasting support)."
+  "All inputs and outputs must have the same data type."
+  "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins Variadic<AnyTypeOf<[AnyMemRef, AnyTensor]>>:$data_0);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$sum);
 }
 
-def ONNXTanOp:ONNX_Op<"Tan", 
-    [NoSideEffect]> {
+def ONNXTanOp:ONNX_Op<"Tan",
+  [NoSideEffect]> {
   let summary = "ONNX Tan operation";
   let description = [{
-    "Calculates the tangent of the given input tensor, element-wise."
+  "Calculates the tangent of the given input tensor, element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXTanhOp:ONNX_Op<"Tanh", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXTanhOp:ONNX_Op<"Tanh",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Tanh operation";
   let description = [{
-    "Calculates the hyperbolic tangent of the given input tensor element-wise."
+  "Calculates the hyperbolic tangent of the given input tensor element-wise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXTfIdfVectorizerOp:ONNX_Op<"TfIdfVectorizer", 
-    [NoSideEffect]> {
+def ONNXTfIdfVectorizerOp:ONNX_Op<"TfIdfVectorizer",
+  [NoSideEffect]> {
   let summary = "ONNX TfIdfVectorizer operation";
   let description = [{
-    "This transform extracts n-grams from the input sequence and save them as a vector. Input can"
-    "be either a 1-D or 2-D tensor. For 1-D input, output is the n-gram representation of that input."
-    "For 2-D input, the output is also a  2-D tensor whose i-th row is the n-gram representation of the i-th input row."
-    "More specifically, if input shape is [C], the corresponding output shape would be [max(ngram_indexes) + 1]."
-    "If input shape is [N, C], this operator produces a [N, max(ngram_indexes) + 1]-tensor."
-    ""
-    "In contrast to standard n-gram extraction, here, the indexes of extracting an n-gram from the original"
-    "sequence are not necessarily consecutive numbers. The discontinuity between indexes are controlled by the number of skips."
-    "If the number of skips is 2, we should skip two tokens when scanning through the original sequence."
-    "Let's consider an example. Assume that input sequence is [94, 17, 36, 12, 28] and the number of skips is 2."
-    "The associated 2-grams are [94, 12] and [17, 28] respectively indexed by [0, 3] and [1, 4]."
-    "If the number of skips becomes 0, the 2-grams generated are [94, 17], [17, 36], [36, 12], [12, 28]"
-    "indexed by [0, 1], [1, 2], [2, 3], [3, 4], respectively."
-    ""
-    "The output vector (denoted by Y) stores the count of each n-gram;"
-    "Y[ngram_indexes[i]] indicates the times that the i-th n-gram is found. The attribute ngram_indexes is used to determine the mapping"
-    "between index i and the corresponding n-gram's output coordinate. If pool_int64s is [94, 17, 17, 36], ngram_indexes is [1, 0],"
-    "ngram_counts=[0, 0], then the Y[0] (first element in Y) and Y[1] (second element in Y) are the counts of [17, 36] and [94, 17],"
-    "respectively. An n-gram which cannot be found in pool_strings/pool_int64s should be ignored and has no effect on the output."
-    "Note that we may consider all skips up to S when generating the n-grams."
-    ""
-    "The examples used above are true if mode is "TF". If mode is "IDF", all the counts larger than 1 would be truncated to 1 and"
-    "the i-th element in weights would be used to scale (by multiplication) the count of the i-th n-gram in pool. If mode is "TFIDF","
-    "this operator first computes the counts of all n-grams and then scale them by the associated values in the weights attribute."
-    ""
-    "Only one of pool_strings and pool_int64s can be set. If pool_int64s is set, the input should be an integer tensor."
-    "If pool_strings is set, the input must be a string tensor."
+  "This transform extracts n-grams from the input sequence and save them as a vector. Input can"
+  "be either a 1-D or 2-D tensor. For 1-D input, output is the n-gram representation of that input."
+  "For 2-D input, the output is also a  2-D tensor whose i-th row is the n-gram representation of the i-th input row."
+  "More specifically, if input shape is [C], the corresponding output shape would be [max(ngram_indexes) + 1]."
+  "If input shape is [N, C], this operator produces a [N, max(ngram_indexes) + 1]-tensor."
+  ""
+  "In contrast to standard n-gram extraction, here, the indexes of extracting an n-gram from the original"
+  "sequence are not necessarily consecutive numbers. The discontinuity between indexes are controlled by the number of skips."
+  "If the number of skips is 2, we should skip two tokens when scanning through the original sequence."
+  "Let's consider an example. Assume that input sequence is [94, 17, 36, 12, 28] and the number of skips is 2."
+  "The associated 2-grams are [94, 12] and [17, 28] respectively indexed by [0, 3] and [1, 4]."
+  "If the number of skips becomes 0, the 2-grams generated are [94, 17], [17, 36], [36, 12], [12, 28]"
+  "indexed by [0, 1], [1, 2], [2, 3], [3, 4], respectively."
+  ""
+  "The output vector (denoted by Y) stores the count of each n-gram;"
+  "Y[ngram_indexes[i]] indicates the times that the i-th n-gram is found. The attribute ngram_indexes is used to determine the mapping"
+  "between index i and the corresponding n-gram's output coordinate. If pool_int64s is [94, 17, 17, 36], ngram_indexes is [1, 0],"
+  "ngram_counts=[0, 0], then the Y[0] (first element in Y) and Y[1] (second element in Y) are the counts of [17, 36] and [94, 17],"
+  "respectively. An n-gram which cannot be found in pool_strings/pool_int64s should be ignored and has no effect on the output."
+  "Note that we may consider all skips up to S when generating the n-grams."
+  ""
+  "The examples used above are true if mode is \"TF\". If mode is \"IDF\", all the counts larger than 1 would be truncated to 1 and"
+  "the i-th element in weights would be used to scale (by multiplication) the count of the i-th n-gram in pool. If mode is \"TFIDF\","
+  "this operator first computes the counts of all n-grams and then scale them by the associated values in the weights attribute."
+  ""
+  "Only one of pool_strings and pool_int64s can be set. If pool_int64s is set, the input should be an integer tensor."
+  "If pool_strings is set, the input must be a string tensor."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           I64Attr:$max_gram_length,
-           I64Attr:$max_skip_count,
-           I64Attr:$min_gram_length,
-           StrAttr:$mode,
-           I64ArrayAttr:$ngram_counts,
-           I64ArrayAttr:$ngram_indexes,
-           OptionalAttr<I64ArrayAttr>:$pool_int64s,
-           OptionalAttr<StrArrayAttr>:$pool_strings,
-           OptionalAttr<F32ArrayAttr>:$weights);
+    I64Attr:$max_gram_length,
+    I64Attr:$max_skip_count,
+    I64Attr:$min_gram_length,
+    StrAttr:$mode,
+    I64ArrayAttr:$ngram_counts,
+    I64ArrayAttr:$ngram_indexes,
+    OptionalAttr<I64ArrayAttr>:$pool_int64s,
+    OptionalAttr<StrArrayAttr>:$pool_strings,
+    OptionalAttr<F32ArrayAttr>:$weights);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXThresholdedReluOp:ONNX_Op<"ThresholdedRelu", 
-    [NoSideEffect]> {
+def ONNXThresholdedReluOp:ONNX_Op<"ThresholdedRelu",
+  [NoSideEffect]> {
   let summary = "ONNX ThresholdedRelu operation";
   let description = [{
-    "ThresholdedRelu takes one input data (Tensor<T>) and produces one output data"
-    "(Tensor<T>) where the rectified linear function, y = x for x > alpha, y = 0 otherwise,"
-    "is applied to the tensor elementwise."
+  "ThresholdedRelu takes one input data (Tensor<T>) and produces one output data"
+  "(Tensor<T>) where the rectified linear function, y = x for x > alpha, y = 0 otherwise,"
+  "is applied to the tensor elementwise."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           DefaultValuedAttr<F32Attr, "1.0">:$alpha);
+    DefaultValuedAttr<F32Attr, "1.0">:$alpha);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXTileOp:ONNX_Op<"Tile", 
-    [NoSideEffect]> {
+def ONNXTileOp:ONNX_Op<"Tile",
+  [NoSideEffect]> {
   let summary = "ONNX Tile operation";
   let description = [{
-    "Constructs a tensor by tiling a given tensor."
-    "This is the same as function `tile` in Numpy, but no broadcast."
-    "For example A = [[1, 2], [3, 4]], B = [1, 2], tile(A, B) = [[1, 2, 1, 2], [3, 4, 3, 4]]"
+  "Constructs a tensor by tiling a given tensor."
+  "This is the same as function `tile` in Numpy, but no broadcast."
+  "For example A = [[1, 2], [3, 4]], B = [1, 2], tile(A, B) = [[1, 2, 1, 2], [3, 4, 3, 4]]"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$input,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$repeats);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$repeats);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXTopKOp:ONNX_Op<"TopK", 
-    [NoSideEffect]> {
+def ONNXTopKOp:ONNX_Op<"TopK",
+  [NoSideEffect]> {
   let summary = "ONNX TopK operation";
   let description = [{
-    "Retrieve the top-K largest or smallest elements along a specified axis. Given an input tensor of"
-    "shape [a_1, a_2, ..., a_n, r] and integer argument k, return two outputs:"
-    "  -Value tensor of shape [a_1, a_2, ..., a_{axis-1}, k, a_{axis+1}, ... a_n]"
-    "    which contains the values of the top k elements along the specified axis"
-    "  -Index tensor of shape [a_1, a_2, ..., a_{axis-1}, k, a_{axis+1}, ... a_n] which"
-    "   contains the indices of the top k elements (original indices from the input"
-    "   tensor)."
-    ""
-    "If "largest" is 1 (the default value) then the k largest elements are returned."
-    "If "sorted" is 1 (the default value) then the resulting k elements will be sorted."
-    "If "sorted" is 0, order of returned 'Values' and 'Indices' are undefined."
-    ""
-    "Given two equivalent values, this operator uses the indices along the axis as"
-    " a tiebreaker. That is, the element with the lower index will appear first."
+  "Retrieve the top-K largest or smallest elements along a specified axis. Given an input tensor of"
+  "shape [a_1, a_2, ..., a_n, r] and integer argument k, return two outputs:"
+  "  -Value tensor of shape [a_1, a_2, ..., a_{axis-1}, k, a_{axis+1}, ... a_n]"
+  "    which contains the values of the top k elements along the specified axis"
+  "  -Index tensor of shape [a_1, a_2, ..., a_{axis-1}, k, a_{axis+1}, ... a_n] which"
+  "   contains the indices of the top k elements (original indices from the input"
+  "   tensor)."
+  ""
+  "If \"largest\" is 1 (the default value) then the k largest elements are returned."
+  "If \"sorted\" is 1 (the default value) then the resulting k elements will be sorted."
+  "If \"sorted\" is 0, order of returned 'Values' and 'Indices' are undefined."
+  ""
+  "Given two equivalent values, this operator uses the indices along the axis as"
+  " a tiebreaker. That is, the element with the lower index will appear first."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$K,
-           DefaultValuedAttr<I64Attr, "-1">:$axis,
-           DefaultValuedAttr<I64Attr, "1">:$largest,
-           DefaultValuedAttr<I64Attr, "1">:$sorted);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$K,
+    DefaultValuedAttr<I64Attr, "-1">:$axis,
+    DefaultValuedAttr<I64Attr, "1">:$largest,
+    DefaultValuedAttr<I64Attr, "1">:$sorted);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Values,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$Indices);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$Indices);
 }
 
-def ONNXTransposeOp:ONNX_Op<"Transpose", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXTransposeOp:ONNX_Op<"Transpose",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Transpose operation";
   let description = [{
-    "Transpose the input tensor similar to numpy.transpose. For example, when"
-    "perm=(1, 0, 2), given an input tensor of shape (1, 2, 3), the output shape"
-    "will be (2, 1, 3)."
+  "Transpose the input tensor similar to numpy.transpose. For example, when"
+  "perm=(1, 0, 2), given an input tensor of shape (1, 2, 3), the output shape"
+  "will be (2, 1, 3)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           OptionalAttr<I64ArrayAttr>:$perm);
+    OptionalAttr<I64ArrayAttr>:$perm);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$transposed);
 }
 
-def ONNXUniqueOp:ONNX_Op<"Unique", 
-    [NoSideEffect]> {
+def ONNXUniqueOp:ONNX_Op<"Unique",
+  [NoSideEffect]> {
   let summary = "ONNX Unique operation";
   let description = [{
-    "Find the unique elements of a tensor. When an optional attribute 'axis' is provided, unique subtensors sliced along the 'axis' are returned. "
-    "Otherwise the input tensor is flattened and unique values of the flattened tensor are returned. "
-    ""
-    "This operator returns the unique values or sliced unique subtensors of the input tensor and three optional outputs. "
-    "The first output tensor 'Y' contains all unique values or subtensors of the input. "
-    "The second optional output tensor 'indices' contains indices of 'Y' elements' first occurance in 'X'.. "
-    "The third optional output tensor 'inverse_indices' contains, for elements of 'X', its corresponding indices in 'Y'. ". "
-    "The fourth optional output tensor 'counts' contains the count of each element of 'Y' in the input. "
-    ""
-    "Outputs are either sorted in ascending order or optionally in the order of the first occurrence of the values in the input. "
-    ""
-    "https://docs.scipy.org/doc/numpy/reference/generated/numpy.unique.html"
-    ""
-    "Example 1:"
-    "  input_X = [2, 1, 1, 3, 4, 3]"
-    "  attribute_sorted = 0"
-    "  attribute_axis = None"
-    "  output_Y = [2, 1, 3, 4]"
-    "  output_indices = [0, 1, 3, 4]"
-    "  output_inverse_indices = [0, 1, 1, 2, 3, 2]"
-    "  output_counts = [1, 2, 2, 1]"
-    ""
-    "Example 2:"
-    "  input_X = [[1, 3], [2, 3]]"
-    "  attribute_sorted = 1"
-    "  attribute_axis = None"
-    "  output_Y = [1, 2, 3]"
-    "  output_indices = [0, 2, 1]"
-    "  output_inverse_indices = [0, 2, 1, 2]"
-    "  output_counts = [1, 1, 2]"
-    ""
-    "Example 3:"
-    "  input_X = [[1, 0, 0], [1, 0, 0], [2, 3, 4]]"
-    "  attribute_sorted = 1"
-    "  attribute_axis = 0"
-    "  output_Y = [[1, 0, 0], [2, 3, 4]]"
-    "  output_indices = [0, 2]"
-    "  output_inverse_indices = [0, 0, 1]"
-    "  output_counts = [2, 1]"
-    ""
-    "Example 4:"
-    "  input_x = [[[1., 1.], [0., 1.], [2., 1.], [0., 1.]], "
-    "             [[1., 1.], [0., 1.], [2., 1.], [0., 1.]]]"
-    "  attribute_sorted = 1"
-    "  attribute_axis = 1"
-    ""
-    "  intermediate data are presented below for better understanding: "
-    "  "
-    "  there are 4 subtensors sliced along axis 1 of input_x (shape = (2, 4, 2)):"
-    "  A: [[1, 1], [1, 1]], "
-    "     [[0, 1], [0, 1]], "
-    "     [[2, 1], [2, 1]], "
-    "     [[0, 1], [0, 1]]."
-    "  "
-    "  there are 3 unique subtensors: "
-    "  [[1, 1], [1, 1]], "
-    "  [[0, 1], [0, 1]], "
-    "  [[2, 1], [2, 1]]."
-    "  "
-    "  sorted unique subtensors:"
-    "  B: [[0, 1], [0, 1]], "
-    "     [[1, 1], [1, 1]], "
-    "     [[2, 1], [2, 1]]."
-    "  "
-    "  output_Y is constructed from B:"
-    "  [[[0. 1.], [1. 1.], [2. 1.]], "
-    "   [[0. 1.], [1. 1.], [2. 1.]]]"
-    ""
-    "  output_indices is to map from B to A:"
-    "  [1, 0, 2]"
-    "  "
-    "  output_inverse_indices is to map from A to B:"
-    "  [1, 0, 2, 0]"
-    ""
-    "  output_counts = [2 1 1]"
+  "Find the unique elements of a tensor. When an optional attribute 'axis' is provided, unique subtensors sliced along the 'axis' are returned. "
+  "Otherwise the input tensor is flattened and unique values of the flattened tensor are returned. "
+  ""
+  "This operator returns the unique values or sliced unique subtensors of the input tensor and three optional outputs. "
+  "The first output tensor 'Y' contains all unique values or subtensors of the input. "
+  "The second optional output tensor 'indices' contains indices of 'Y' elements' first occurance in 'X'.. "
+  "The third optional output tensor 'inverse_indices' contains, for elements of 'X', its corresponding indices in 'Y'. \". "
+  "The fourth optional output tensor 'counts' contains the count of each element of 'Y' in the input. "
+  ""
+  "Outputs are either sorted in ascending order or optionally in the order of the first occurrence of the values in the input. "
+  ""
+  "https://docs.scipy.org/doc/numpy/reference/generated/numpy.unique.html"
+  ""
+  "Example 1:"
+  "  input_X = [2, 1, 1, 3, 4, 3]"
+  "  attribute_sorted = 0"
+  "  attribute_axis = None"
+  "  output_Y = [2, 1, 3, 4]"
+  "  output_indices = [0, 1, 3, 4]"
+  "  output_inverse_indices = [0, 1, 1, 2, 3, 2]"
+  "  output_counts = [1, 2, 2, 1]"
+  ""
+  "Example 2:"
+  "  input_X = [[1, 3], [2, 3]]"
+  "  attribute_sorted = 1"
+  "  attribute_axis = None"
+  "  output_Y = [1, 2, 3]"
+  "  output_indices = [0, 2, 1]"
+  "  output_inverse_indices = [0, 2, 1, 2]"
+  "  output_counts = [1, 1, 2]"
+  ""
+  "Example 3:"
+  "  input_X = [[1, 0, 0], [1, 0, 0], [2, 3, 4]]"
+  "  attribute_sorted = 1"
+  "  attribute_axis = 0"
+  "  output_Y = [[1, 0, 0], [2, 3, 4]]"
+  "  output_indices = [0, 2]"
+  "  output_inverse_indices = [0, 0, 1]"
+  "  output_counts = [2, 1]"
+  ""
+  "Example 4:"
+  "  input_x = [[[1., 1.], [0., 1.], [2., 1.], [0., 1.]], "
+  "             [[1., 1.], [0., 1.], [2., 1.], [0., 1.]]]"
+  "  attribute_sorted = 1"
+  "  attribute_axis = 1"
+  ""
+  "  intermediate data are presented below for better understanding: "
+  "  "
+  "  there are 4 subtensors sliced along axis 1 of input_x (shape = (2, 4, 2)):"
+  "  A: [[1, 1], [1, 1]], "
+  "     [[0, 1], [0, 1]], "
+  "     [[2, 1], [2, 1]], "
+  "     [[0, 1], [0, 1]]."
+  "  "
+  "  there are 3 unique subtensors: "
+  "  [[1, 1], [1, 1]], "
+  "  [[0, 1], [0, 1]], "
+  "  [[2, 1], [2, 1]]."
+  "  "
+  "  sorted unique subtensors:"
+  "  B: [[0, 1], [0, 1]], "
+  "     [[1, 1], [1, 1]], "
+  "     [[2, 1], [2, 1]]."
+  "  "
+  "  output_Y is constructed from B:"
+  "  [[[0. 1.], [1. 1.], [2. 1.]], "
+  "   [[0. 1.], [1. 1.], [2. 1.]]]"
+  ""
+  "  output_indices is to map from B to A:"
+  "  [1, 0, 2]"
+  "  "
+  "  output_inverse_indices is to map from A to B:"
+  "  [1, 0, 2, 0]"
+  ""
+  "  output_counts = [2 1 1]"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           OptionalAttr<I64Attr>:$axis,
-           DefaultValuedAttr<I64Attr, "1">:$sorted);
+    OptionalAttr<I64Attr>:$axis,
+    DefaultValuedAttr<I64Attr, "1">:$sorted);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$indices,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$inverse_indices,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$counts);
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$indices,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$inverse_indices,
+    AnyTypeOf<[AnyMemRef, AnyTensor, NoneType]>:$counts);
 }
 
-def ONNXUnsqueezeOp:ONNX_Op<"Unsqueeze", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXUnsqueezeOp:ONNX_Op<"Unsqueeze",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Unsqueeze operation";
   let description = [{
-    "Insert single-dimensional entries to the shape of an input tensor (`data`)."
-    "Takes one required argument `axes` - which contains a list of dimension indices and this operator will insert a dimension of value `1` into the corresponding index of the output tensor (`expanded`)."
-    ""
-    "For example:"
-    "  Given an input tensor (`data`) of shape [3, 4, 5], then"
-    "  Unsqueeze(data, axes=[0, 4]) outputs a tensor (`expanded`) containing same data as `data` but with shape [1, 3, 4, 5, 1]."
-    ""
-    "The attribute `axes` should not contain any duplicate entries. It is an error if it contains duplicates."
-    "The rank of the output tensor (`output_rank`) is the rank of the input tensor (`data`) plus the number of values in `axes`."
-    "Each value in `axes` should be within the (inclusive) range [-output_rank , output_rank - 1]. "
-    "The order of values in `axes` does not matter and can come in any order. "
-    ""
+  "Insert single-dimensional entries to the shape of an input tensor (`data`)."
+  "Takes one required argument `axes` - which contains a list of dimension indices and this operator will insert a dimension of value `1` into the corresponding index of the output tensor (`expanded`)."
+  ""
+  "For example:"
+  "  Given an input tensor (`data`) of shape [3, 4, 5], then"
+  "  Unsqueeze(data, axes=[0, 4]) outputs a tensor (`expanded`) containing same data as `data` but with shape [1, 3, 4, 5, 1]."
+  ""
+  "The attribute `axes` should not contain any duplicate entries. It is an error if it contains duplicates."
+  "The rank of the output tensor (`output_rank`) is the rank of the input tensor (`data`) plus the number of values in `axes`."
+  "Each value in `axes` should be within the (inclusive) range [-output_rank , output_rank - 1]. "
+  "The order of values in `axes` does not matter and can come in any order. "
+  ""
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$data,
-           I64ArrayAttr:$axes);
+    I64ArrayAttr:$axes);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$expanded);
 }
 
-def ONNXUpsampleOp:ONNX_Op<"Upsample", 
-    [NoSideEffect]> {
+def ONNXUpsampleOp:ONNX_Op<"Upsample",
+  [NoSideEffect]> {
   let summary = "ONNX Upsample operation";
   let description = [{
-    "Upsample the input tensor."
-    "Each dimension value of the output tensor is:"
-    "  output_dimension = floor(input_dimension * scale)."
+  "Upsample the input tensor."
+  "Each dimension value of the output tensor is:"
+  "  output_dimension = floor(input_dimension * scale)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$scales,
-           DefaultValuedAttr<StrAttr, "nearest">:$mode);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$scales,
+    DefaultValuedAttr<StrAttr, "nearest">:$mode);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
 }
 
-def ONNXWhereOp:ONNX_Op<"Where", 
-    [NoSideEffect]> {
+def ONNXWhereOp:ONNX_Op<"Where",
+  [NoSideEffect]> {
   let summary = "ONNX Where operation";
   let description = [{
-    "Return elements, either from X or Y, depending on condition"
-    "    (with Numpy-style broadcasting support)."
-    "    Where behaves like numpy.where with three parameters:"
-    "    https://docs.scipy.org/doc/numpy/reference/generated/numpy.where.html"
+  "Return elements, either from X or Y, depending on condition"
+  "    (with Numpy-style broadcasting support)."
+  "    Where behaves like numpy.where with three parameters:"
+  "    https://docs.scipy.org/doc/numpy/reference/generated/numpy.where.html"
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$condition,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$X,
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$Y);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$output);
 }
 
-def ONNXXorOp:ONNX_Op<"Xor", 
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+def ONNXXorOp:ONNX_Op<"Xor",
+  [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "ONNX Xor operation";
   let description = [{
-    "Returns the tensor resulted from performing the `xor` logical operation"
-    "elementwise on the input tensors `A` and `B` (with Numpy-style broadcasting support)."
-    ""
-    "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
+  "Returns the tensor resulted from performing the `xor` logical operation"
+  "elementwise on the input tensors `A` and `B` (with Numpy-style broadcasting support)."
+  ""
+  "This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check [the doc](Broadcasting.md)."
   }];
   let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$A,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
+    AnyTypeOf<[AnyMemRef, AnyTensor]>:$B);
   let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$C);
 }
+
diff --git a/src/main.cpp b/src/main.cpp
index e3a36c5..e99329b 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -127,6 +127,10 @@ int main(int argc, char *argv[]) {
 
   if (emissionTarget >= EmitMLIR) {
     pm.addPass(mlir::createLowerToKrnlPass());
+    // An additional pass of canonicalization is helpful because lowering
+    // from ONNX dialect to Standard dialect exposes additional canonicalization
+    // oppertunities.
+    pm.addPass(mlir::createCanonicalizerPass());
     pm.addPass(mlir::createLowerKrnlPass());
   }
 
diff --git a/src/pass/onnx_combine.cpp b/src/pass/onnx_combine.cpp
index 31eb2d6..2382cc2 100644
--- a/src/pass/onnx_combine.cpp
+++ b/src/pass/onnx_combine.cpp
@@ -28,6 +28,11 @@ void ONNXAddOp::getCanonicalizationPatterns(
     OwningRewritePatternList& results, MLIRContext* context) {
   results.insert<MulAddToGemmOptPattern>(context);
 }
+
+void ONNXGemmOp::getCanonicalizationPatterns(
+        OwningRewritePatternList& results, MLIRContext* context) {
+    results.insert<FuseGemmFollowedByAddition>(context);
+}
 /// on the ONNXIdentityOp.
 void ONNXIdentityOp::getCanonicalizationPatterns(
     OwningRewritePatternList& results, MLIRContext* context) {
diff --git a/src/pass/onnx_combine.td b/src/pass/onnx_combine.td
index efcc34b..3674fd3 100644
--- a/src/pass/onnx_combine.td
+++ b/src/pass/onnx_combine.td
@@ -26,6 +26,7 @@ include "dialect/onnx/onnx.td"
 
 def HasOneUse : Constraint<CPred<"$0.hasOneUse()">>;
 class HasRankOf<int rank> : Constraint<CPred<"$0.getType().isa<ShapedType>() && $0.getType().cast<ShapedType>().getRank() == " # rank>>;
+def HasNoneType : Constraint<CPred<"$0.getType().isa<NoneType>()">>;
 
 //===----------------------------------------------------------------------===//
 // Pattern-Match and Rewrite
@@ -41,6 +42,11 @@ def MulAddToGemmOptPattern : Pat<(ONNXAddOp (ONNXMatMulOp:$res $m1, $m2), $m3),
                                  (ONNXGemmOp $m1, $m2, $m3, (GemmAlpha), (GemmBeta), (GemmTransA), (GemmTransB)),
                                  [(HasOneUse $res), (HasRankOf<2> $m1), (HasRankOf<2> $m2)]>;
 
+// onnx.add(onnx.Gemm(%X, %Y, None), %Z) = onnx.Gemm(%X, %Y, %Z)
+def FuseGemmFollowedByAddition : Pat<(ONNXAddOp (ONNXGemmOp:$res $m1, $m2, $none, $alpha, $beta, $transA, $transB), $bias),
+                                     (ONNXGemmOp $m1, $m2, $bias, $alpha, $beta, $transA, $transB),
+                                     [(HasOneUse $res), (HasRankOf<2> $m1), (HasRankOf<2> $m2), (HasNoneType $none)]>;
+
 // ONNX_Op (onnx.Identity (%X)) = ONNX_Op (%X)
 def IdentityEliminationPattern : Pat<(ONNXIdentityOp $arg),
                                      (replaceWithValue $arg)>;
diff --git a/test/mlir/onnx/onnx_canonicalization.mlir b/test/mlir/onnx/onnx_canonicalization.mlir
index 61e11c5..840cd7d 100644
--- a/test/mlir/onnx/onnx_canonicalization.mlir
+++ b/test/mlir/onnx/onnx_canonicalization.mlir
@@ -101,3 +101,14 @@ func @test_conv_split(%arg0 : tensor<1x9x32x64xf32>, %arg1 : tensor<5x9x6x7xf32>
   // CHECK-NEXT: %1 = "onnx.ConvNoBias"(%0, %arg1) {auto_pad = "NOTSET", group = 1 : i64, pads = [0, 0, 0, 0]} : (tensor<1x9x38x72xf32>, tensor<5x9x6x7xf32>) -> tensor<*xf32>
   // CHECK-NEXT: return %1 : tensor<*xf32>
 }
+
+//CHECK-LABEL: @test_gemm_add_fusion(%{{.*}}: tensor<128x128xf32>, %{{.*}}: tensor<128x128xf32>, %{{.*}}: tensor<128xf32>) -> tensor<*xf32> {
+func @test_gemm_add_fusion(%arg0: tensor<128x128xf32>, %arg1: tensor<128x128xf32>, %arg2: tensor<128xf32>) -> tensor<*xf32> {
+  %cst = constant unit
+  %0 = "onnx.Gemm"(%arg0, %arg1, %cst) : (tensor<128x128xf32>, tensor<128x128xf32>, none) -> tensor<*xf32>
+  %1 = "onnx.Add"(%0, %arg2) : (tensor<*xf32>, tensor<128xf32>) -> tensor<*xf32>
+  return %1 : tensor<*xf32>
+
+  // CHECK-NEXT: [[GEMM:%.+]] = "onnx.Gemm"(%{{.*}}, %{{.*}}, %{{.*}}) {alpha = 1.000000e+00 : f32, beta = 1.000000e+00 : f32, transA = 0 : i64, transB = 0 : i64} : (tensor<128x128xf32>, tensor<128x128xf32>, tensor<128xf32>) -> tensor<*xf32>
+  // return [[GEMM]] : tensor<*xf32>
+}
\ No newline at end of file

From fcb5f35993ba5b7fce51a33f7faafebc62cedd74 Mon Sep 17 00:00:00 2001
From: Alexandre Eichenberger <alexe@us.ibm.com>
Date: Mon, 24 Feb 2020 17:20:15 -0500
Subject: [PATCH 02/10] Introduce helper class to generate KRNL code and apply
 it to Convolution (#93)

* helper to gen krnl code, applied to conv

* suggested changes, name, removed set insertion point

* format

* suggested changes

* added comments and made a small name change
---
 .clang-format                                 |   1 +
 .../onnx_to_krnl/rewrite_patterns/nn/conv.inc | 146 +++++++-----------
 src/dialect/krnl/krnl_helper.cpp              | 146 ++++++++++++++++--
 src/dialect/krnl/krnl_helper.hpp              | 141 ++++++++++++++---
 4 files changed, 303 insertions(+), 131 deletions(-)

diff --git a/.clang-format b/.clang-format
index a74fda4..b3276c6 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,2 +1,3 @@
 BasedOnStyle: LLVM
 AlwaysBreakTemplateDeclarations: Yes
+AlignAfterOpenBracket: DontAlign
diff --git a/src/conversion/onnx_to_krnl/rewrite_patterns/nn/conv.inc b/src/conversion/onnx_to_krnl/rewrite_patterns/nn/conv.inc
index 20ac5e8..6e3afe1 100644
--- a/src/conversion/onnx_to_krnl/rewrite_patterns/nn/conv.inc
+++ b/src/conversion/onnx_to_krnl/rewrite_patterns/nn/conv.inc
@@ -12,9 +12,8 @@ struct ONNXConvNoBiasOpLowering : public ConversionPattern {
   ONNXConvNoBiasOpLowering(MLIRContext *ctx)
       : ConversionPattern(mlir::ONNXConvNoBiasOp::getOperationName(), 1, ctx) {}
 
-  PatternMatchResult
-  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const final {
+  PatternMatchResult matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+      ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
     // Insert an allocation and deallocation for the result of this operation.
     auto memRefType = convertToMemRefType(*op->result_type_begin());
@@ -25,12 +24,14 @@ struct ONNXConvNoBiasOpLowering : public ConversionPattern {
     if (hasAllConstantDimensions(memRefType))
       alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
     else
-      alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc,
-                                    {operands[0]});
+      alloc = insertAllocAndDealloc(
+          memRefType, loc, rewriter, insertDealloc, {operands[0]});
 
     auto resultShape = memRefType.getShape();
-    auto inputShape = operands[0].getType().cast<MemRefType>().getShape();
-    auto kernelShape = operands[1].getType().cast<MemRefType>().getShape();
+    auto &inputOperand = operands[0];
+    auto inputShape = inputOperand.getType().cast<MemRefType>().getShape();
+    auto &kernelOperand = operands[1];
+    auto kernelShape = kernelOperand.getType().cast<MemRefType>().getShape();
 
     // R = ConvNoBias(D, K)
     //
@@ -91,123 +92,82 @@ struct ONNXConvNoBiasOpLowering : public ConversionPattern {
         loc, FloatAttr::get(memRefType.getElementType(), 0));
     Value subchannels;
     if (kernelShape[1] < 0) {
-      subchannels =
-          rewriter.create<DimOp>(loc, operands[1], 1).getResult();
+      subchannels = rewriter.create<DimOp>(loc, kernelOperand, 1).getResult();
     } else {
-      subchannels = rewriter.create<ConstantIndexOp>(
-          loc, kernelShape[1]);
+      subchannels = rewriter.create<ConstantIndexOp>(loc, kernelShape[1]);
     }
 
     // 1. Define outer loops and emit empty optimization block:
     int64_t nOuterLoops = (group > 1) ? 3 : 2;
-    std::vector<Value> outerLoops;
-    std::vector<Value> optimizedOuterLoops;
-    Block *optimizationBlock = defineLoops(rewriter, loc, outerLoops,
-        optimizedOuterLoops, nOuterLoops);
-
-    // Prepare iteration arguments over outer loop nest.
-    KrnlIterateOperandPack pack(
-        rewriter, outerLoops, optimizedOuterLoops);
+    BuildKrnlLoop outerLoops(rewriter, loc, nOuterLoops);
+    outerLoops.createDefineAndOptimizeOp();
     //   for n = 0 .. N:
-    pack.pushConstantBound(0);
-    if (inputShape[0] < 0)
-      pack.pushOperandBound(
-          rewriter.create<DimOp>(loc, operands[0], 0).getResult());
-    else
-      pack.pushConstantBound(inputShape[0]);
+    int nIndex = outerLoops.pushBounds(0, inputOperand, 0);
     //   for g = 0 .. N:
-    if (group > 1) {
-      pack.pushConstantBound(0);
-      pack.pushConstantBound(group);
-    }
+    int gIndex = -1;
+    if (group > 1)
+      gIndex = outerLoops.pushBounds(0, group);
     //   for m = 0 .. kernelsPerGroup:
-    pack.pushConstantBound(0);
-    pack.pushConstantBound(kernelsPerGroup);
-    // Outer loop iteration.
-    auto iterateOp = rewriter.create<KrnlIterateOp>(loc, pack);
-    Block &outerIterationBlock = iterateOp.bodyRegion().front();
-    // Emit optimizations for outer loops:
-    rewriter.setInsertionPointToEnd(optimizationBlock);
-    rewriter.create<KrnlReturnLoopsOp>(loc, outerLoops);
-    rewriter.setInsertionPointToStart(&outerIterationBlock);
+    int mIndex = outerLoops.pushBounds(0, kernelsPerGroup);
+    // Outer loop iteration
+    outerLoops.createIterateOp();
+    rewriter.setInsertionPointToStart(outerLoops.getIterateBlock());
     {
       // 2. Emit the body of the outer loop nest.
 
       // 2.1 Compute kernel order number: kernel = g * kernelsPerGroup + m;
       // If group is not set then the value of the kernel ID is
       // identical to that of the loop over kernels.
-      Value kernel = outerIterationBlock.getArguments()[1];
+      Value kernel = outerLoops.getInductionVar(mIndex);
       if (group > 1) {
         // Middle loop is over groups and third loop is over the
         // kernel identifiers in the current group.
-        auto kernelsOffset = rewriter.create<MulIOp>(loc,
-            outerIterationBlock.getArguments()[1],
-            kernelsPerGroupValue);
-        kernel = rewriter.create<AddIOp>(loc, kernelsOffset,
-            outerIterationBlock.getArguments()[2]);
+        auto kernelsOffset = rewriter.create<MulIOp>(
+            loc, outerLoops.getInductionVar(gIndex), kernelsPerGroupValue);
+        kernel = rewriter.create<AddIOp>(
+            loc, kernelsOffset, outerLoops.getInductionVar(mIndex));
       }
 
       // 2.2 Define spatial loops
       int64_t nSpatialLoops = resultShape.size() - 2;
-      std::vector<Value> spatialLoops;
-      std::vector<Value> optimizedSpatialLoops;
-      Block *optSpatialLoopBlock = defineLoops(rewriter, loc, spatialLoops,
-        optimizedSpatialLoops, nSpatialLoops);
-
-      // 2.3 Prepare iteration arguments for spatial loop nest.
-      KrnlIterateOperandPack spatialPack(
-        rewriter, spatialLoops, optimizedSpatialLoops);
+      BuildKrnlLoop spatialLoops(rewriter, loc, nSpatialLoops);
+      spatialLoops.createDefineAndOptimizeOp();
       for (int i = 2; i < resultShape.size(); ++i)
-        addDimensionToPack(rewriter, loc, spatialPack, alloc, i);
+        spatialLoops.pushBounds(0, alloc, i);
 
       // 2.4 Emit loop nest over output spatial dimensions.
       //   for rX = 0 .. RX
-      auto spatialIterateOp =
-          rewriter.create<KrnlIterateOp>(loc, spatialPack);
-      Block &spatialIterationBlock = spatialIterateOp.bodyRegion().front();
-      // 2.5 Emit optimizations for outer loops:
-      rewriter.setInsertionPointToEnd(optSpatialLoopBlock);
-      rewriter.create<KrnlReturnLoopsOp>(loc, spatialLoops);
-      rewriter.setInsertionPointToStart(&spatialIterationBlock);
+      spatialLoops.createIterateOp();
+      rewriter.setInsertionPointToStart(spatialLoops.getIterateBlock());
+
       {
         // 3. Emit the body of the spatial loop nest.
         // 3.1 Emit: R[n][kernel][r1][r2] = 0;
         SmallVector<Value, 4> resultIndices;
         // n
-        resultIndices.emplace_back(outerIterationBlock.getArguments()[0]);
+        resultIndices.emplace_back(outerLoops.getInductionVar(nIndex));
         // kernel
         resultIndices.emplace_back(kernel);
         // rX
-        for (auto arg : spatialIterationBlock.getArguments())
+        for (auto arg : spatialLoops.getIterateBlock()->getArguments())
           resultIndices.emplace_back(arg);
         // Store initializer value into output location.
         rewriter.create<StoreOp>(loc, zero, alloc, resultIndices);
 
         // 3.2 Define inner loops.
         int64_t nInnerLoops = 1 + (kernelShape.size() - 2);
-        std::vector<Value> innerLoops;
-        std::vector<Value> optimizedInnerLoops;
-        Block *optInnerLoopBlock = defineLoops(rewriter, loc, innerLoops,
-            optimizedInnerLoops, nInnerLoops);
-
-        // 3.3 Prepare iteration arguments for inner loop nest.
-        KrnlIterateOperandPack innerPack(
-            rewriter, innerLoops, optimizedInnerLoops);
+        BuildKrnlLoop innerLoops(rewriter, loc, nInnerLoops);
+        innerLoops.createDefineAndOptimizeOp();
         //   for c = 0 .. C/group
-        innerPack.pushConstantBound(0);
-        innerPack.pushConstantBound(kernelShape[1]);
+        int cIndex = innerLoops.pushBounds(0, kernelShape[1]);
         //   for Kx = 0 .. KX
         for (int i = 2; i < kernelShape.size(); ++i)
-          addDimensionToPack(rewriter, loc, innerPack, operands[1], i);
+          innerLoops.pushBounds(0, kernelOperand, i);
 
         // 3.4 Emit inner loop nest.
-        auto innerIterateOp =
-            rewriter.create<KrnlIterateOp>(loc, innerPack);
-        Block &innerIterationBlock = innerIterateOp.bodyRegion().front();
-        // 3.5 Emit optimizations for outer loops:
-        rewriter.setInsertionPointToEnd(optInnerLoopBlock);
-        rewriter.create<KrnlReturnLoopsOp>(loc, innerLoops);
-        rewriter.setInsertionPointToStart(&innerIterationBlock);
+        innerLoops.createIterateOp();
+        rewriter.setInsertionPointToStart(innerLoops.getIterateBlock());
+
         {
           // 4. Emit inner loop body
           // R[n][kernel][r1][r2] =
@@ -217,13 +177,13 @@ struct ONNXConvNoBiasOpLowering : public ConversionPattern {
           // 4.1 Prepare indices for accesing the data tensor.
           SmallVector<Value, 4> dataIndices;
           // n
-          dataIndices.emplace_back(outerIterationBlock.getArguments()[0]);
+          dataIndices.emplace_back(outerLoops.getInductionVar(nIndex));
           // g * (C / group) + c
-          Value channelDepth = innerIterationBlock.getArguments()[0];
+          Value channelDepth = innerLoops.getInductionVar(cIndex);
           if (group > 1)
             channelDepth = rewriter.create<AddIOp>(loc, channelDepth,
-                rewriter.create<MulIOp>(loc, subchannels,
-                    outerIterationBlock.getArguments()[1]));
+                rewriter.create<MulIOp>(
+                    loc, subchannels, outerLoops.getInductionVar(gIndex)));
           dataIndices.emplace_back(channelDepth);
           // sX * rX + kX
           auto stridesAttribute = convOp.stridesAttr();
@@ -233,15 +193,14 @@ struct ONNXConvNoBiasOpLowering : public ConversionPattern {
             for (auto stride : stridesAttribute.getValue())
               strides.emplace_back(stride.cast<IntegerAttr>().getInt());
           for (int i = 0; i < kernelShape.size() - 2; ++i) {
-            Value spatialIndex = spatialIterationBlock.getArguments()[i];
+            Value spatialIndex = spatialLoops.getInductionVar(i);
             // If strides are present then emit the correct access index.
             if (stridesAttribute && strides[i] > 1)
               spatialIndex = rewriter.create<MulIOp>(loc,
                   rewriter.create<ConstantIndexOp>(loc, strides[i]),
-                  spatialIterationBlock.getArguments()[i]);
-            dataIndices.emplace_back(
-                rewriter.create<AddIOp>(loc, spatialIndex,
-                    innerIterationBlock.getArguments()[i+1]));
+                  spatialLoops.getInductionVar(i));
+            dataIndices.emplace_back(rewriter.create<AddIOp>(
+                loc, spatialIndex, innerLoops.getInductionVar(i + 1)));
           }
 
           // 4.2 Prepare indices for accessing the kernel tensor.
@@ -249,17 +208,16 @@ struct ONNXConvNoBiasOpLowering : public ConversionPattern {
           // kernel
           kernelIndices.emplace_back(kernel);
           // c
-          kernelIndices.emplace_back(innerIterationBlock.getArguments()[0]);
+          kernelIndices.emplace_back(innerLoops.getInductionVar(cIndex));
           // kX
           for (int i = 0; i < kernelShape.size() - 2; ++i)
-            kernelIndices.emplace_back(
-                innerIterationBlock.getArguments()[i+1]);
+            kernelIndices.emplace_back(innerLoops.getInductionVar(i + 1));
 
           // 4.3 Compute convolution.
           auto loadData =
-              rewriter.create<LoadOp>(loc, operands[0], dataIndices);
+              rewriter.create<LoadOp>(loc, inputOperand, dataIndices);
           auto loadKernel =
-              rewriter.create<LoadOp>(loc, operands[1], kernelIndices);
+              rewriter.create<LoadOp>(loc, kernelOperand, kernelIndices);
           auto loadPartialSum =
               rewriter.create<LoadOp>(loc, alloc, resultIndices);
           Value result = rewriter.create<AddFOp>(loc, loadPartialSum,
diff --git a/src/dialect/krnl/krnl_helper.cpp b/src/dialect/krnl/krnl_helper.cpp
index 72edb92..4f75a43 100644
--- a/src/dialect/krnl/krnl_helper.cpp
+++ b/src/dialect/krnl/krnl_helper.cpp
@@ -1,4 +1,5 @@
 #include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/StandardOps/Ops.h"
 #include "mlir/IR/AffineExpr.h"
 
 #include "src/dialect/krnl/krnl_ops.hpp"
@@ -9,9 +10,8 @@ namespace onnf {
 
 using namespace mlir;
 
-ParseResult
-KrnlDialectOperandParser::ParseOptionalOperand(const Type &operandType,
-                                               Value &operand) {
+ParseResult KrnlDialectOperandParser::ParseOptionalOperand(
+    const Type &operandType, Value &operand) {
   // If operand queue is empty, parse more operands and cache them.
   if (_operandRefQueue.empty()) {
     // Parse operand types:
@@ -19,7 +19,7 @@ KrnlDialectOperandParser::ParseOptionalOperand(const Type &operandType,
     _parser.parseOperandList(operand_refs);
 
     // Record operands:
-    for (auto& operand_ref : operand_refs)
+    for (auto &operand_ref : operand_refs)
       _operandRefQueue.emplace(operand_ref);
   }
 
@@ -48,8 +48,8 @@ ParseResult KrnlDialectOperandParser::ParseOptionalOperand(
   return success();
 }
 
-ParseResult KrnlDialectOperandParser::ParseOperand(const Type &operandType,
-                                                   Value &operand) {
+ParseResult KrnlDialectOperandParser::ParseOperand(
+    const Type &operandType, Value &operand) {
   if (ParseOptionalOperand(operandType, operand))
     return _parser.emitError(
         _parser.getCurrentLocation(), "Expecting an operand.");
@@ -65,8 +65,8 @@ ParseResult KrnlDialectOperandParser::ParseOperand(
   return success();
 }
 
-void printDimAndSymbolList(Operation::operand_iterator& begin, unsigned numDims,
-    unsigned numSymbols, OpAsmPrinter& p) {
+void printDimAndSymbolList(Operation::operand_iterator &begin, unsigned numDims,
+    unsigned numSymbols, OpAsmPrinter &p) {
   p << '(';
   p.printOperands(begin, begin + numDims);
   p << ')';
@@ -81,8 +81,8 @@ void printDimAndSymbolList(Operation::operand_iterator& begin, unsigned numDims,
 }
 
 void printBound(AffineMapAttr boundMap,
-    Operation::operand_iterator& boundOperandsBeg, const char* prefix,
-    OpAsmPrinter& p) {
+    Operation::operand_iterator &boundOperandsBeg, const char *prefix,
+    OpAsmPrinter &p) {
   AffineMap map = boundMap.getValue();
 
   // Check if this bound should be printed using custom assembly form.
@@ -120,9 +120,10 @@ void printBound(AffineMapAttr boundMap,
   printDimAndSymbolList(
       boundOperandsBeg, map.getNumDims(), map.getNumSymbols(), p);
 }
-}  // namespace onnf
+} // namespace onnf
 
 namespace mlir {
+
 void KrnlIterateOperandPack::pushConstantBound(int64_t bound) {
   if (boundMaps.size() % 2 == 0)
     _operands.emplace_back(inputLoops[boundMaps.size() / 2]);
@@ -137,4 +138,125 @@ void KrnlIterateOperandPack::pushOperandBound(mlir::Value operand) {
   boundMaps.emplace_back(AffineMapAttr::get(map));
   _operands.emplace_back(operand);
 }
-}  // namespace mlir
+
+BuildKrnlLoop::BuildKrnlLoop(
+    ConversionPatternRewriter &rewriter, Location loc, int loopNum)
+    : rewriter(rewriter), loc(loc), originalLoopNum(loopNum), pack(NULL),
+      pushCount(0), createdDefineOp(false), createdOptimizeOp(false),
+      createdIterateOp(false) {
+  if (originalLoopNum <= 0)
+    emitError(loc, "expected positive number of original loops");
+}
+
+BuildKrnlLoop::BuildKrnlLoop(
+    ConversionPatternRewriter &rewriter, Location loc, Value memRefOperand)
+    : BuildKrnlLoop(rewriter, loc,
+          memRefOperand.getType().cast<MemRefType>().getShape().size()) {}
+
+BuildKrnlLoop::~BuildKrnlLoop() {
+  if (!createdDefineOp)
+    emitError(loc, "expected to create define op");
+  if (!createdIterateOp)
+    emitError(loc, "expected to create iteration op");
+  if (pack)
+    free(pack);
+}
+
+void BuildKrnlLoop::createDefineAndOptimizeOp(bool withEmptyOptimization) {
+  // insert define loop op
+  auto loopsOp = rewriter.create<KrnlDefineLoopsOp>(loc, originalLoopNum);
+  originalLoops.reserve(originalLoopNum);
+  for (auto result : loopsOp.getResults())
+    originalLoops.push_back(result);
+  // inserte optimize loop op.
+  auto optimizedLoopsOp =
+      rewriter.create<KrnlOptimizeLoopsOp>(loc, originalLoopNum);
+  optLoops.reserve(originalLoopNum);
+  // Emit empty optimizations
+  if (withEmptyOptimization) {
+    for (auto result : optimizedLoopsOp.getResults())
+      optLoops.push_back(result);
+    optBlock = &optimizedLoopsOp.region().front();
+    auto ip = rewriter.saveInsertionPoint();
+    rewriter.setInsertionPointToEnd(optBlock);
+    rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
+    rewriter.restoreInsertionPoint(ip);
+  }
+  // prepare data structure to push bounds
+  pack = new KrnlIterateOperandPack(rewriter, originalLoops, optLoops);
+  createdOptimizeOp = true;
+}
+
+// push bounds (lower and upper) and return index for loop info
+int BuildKrnlLoop::pushBounds(int64_t lowerBound, int64_t upperBound) {
+  pack->pushConstantBound(lowerBound);
+  pack->pushConstantBound(upperBound);
+  return pushCount++;
+}
+
+int BuildKrnlLoop::pushBounds(int64_t lowerBound, Value upperBound) {
+  pack->pushConstantBound(lowerBound);
+  pack->pushOperandBound(upperBound);
+  return pushCount++;
+}
+
+int BuildKrnlLoop::pushBounds(int64_t lowerBound, Value upperBoundMemRefOperand,
+    int upperBoundMemRefIndex, bool upperBoundMustBeConstant) {
+  pack->pushConstantBound(lowerBound);
+  // process upperBound as a dimension of mem ref, possibly non-constant
+  auto shape = upperBoundMemRefOperand.getType().cast<MemRefType>().getShape();
+  if (shape[upperBoundMemRefIndex] < 0) {
+    if (upperBoundMustBeConstant)
+      emitError(loc, "bound expected to be constant");
+    pack->pushOperandBound(
+        rewriter
+            .create<DimOp>(loc, upperBoundMemRefOperand, upperBoundMemRefIndex)
+            .getResult());
+  } else
+    pack->pushConstantBound(shape[upperBoundMemRefIndex]);
+  return pushCount++;
+}
+
+int BuildKrnlLoop::pushBounds(Value lowerBound, Value upperBound) {
+  pack->pushOperandBound(lowerBound);
+  pack->pushOperandBound(upperBound);
+  return pushCount++;
+}
+
+// create iter
+void BuildKrnlLoop::createIterateOp() {
+  if (!createdDefineOp)
+    emitError(loc, "must create define op before iterate op");
+  // Tight now, optimize (possibly empty) is mandatory. This may change
+  if (!createdOptimizeOp)
+    emitError(loc, "must create optimize op before iterate op");
+  // have to have defined all bounds
+  if (pushCount != originalLoopNum) {
+    printf(" push count %d, original loop %d\n", pushCount, originalLoopNum);
+    emitError(loc, "must push bounds for all original loops");
+  }
+  // create iterate op
+  auto iterateOp = rewriter.create<KrnlIterateOp>(loc, *pack);
+  iterBlock = &iterateOp.bodyRegion().front();
+  createdIterateOp = true;
+}
+
+void BuildKrnlLoop::createDefineOptimizeAndIterateOp(
+    Value memRefOperand, bool withEmptyOptimization) {
+  int loopNum = memRefOperand.getType().cast<MemRefType>().getShape().size();
+  if (originalLoopNum != loopNum)
+    emitError(loc, "mismatch in loop numbers from constructor and define");
+  createDefineAndOptimizeOp(withEmptyOptimization);
+  for (int i = 0; i < originalLoopNum; ++i)
+    pushBounds(0, memRefOperand, i);
+  createIterateOp();
+}
+
+// get induction variable to be use within iter
+BlockArgument &BuildKrnlLoop::getInductionVar(int originalLoopIndex) {
+  if (originalLoopIndex < 0 || originalLoopIndex >= originalLoopNum)
+    emitError(loc, "original loop index is out of bound");
+  return iterBlock->getArguments()[originalLoopIndex];
+}
+
+} // namespace mlir
diff --git a/src/dialect/krnl/krnl_helper.hpp b/src/dialect/krnl/krnl_helper.hpp
index 41a141b..cfe1787 100644
--- a/src/dialect/krnl/krnl_helper.hpp
+++ b/src/dialect/krnl/krnl_helper.hpp
@@ -8,39 +8,38 @@
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/StandardTypes.h"
+#include "mlir/Transforms/DialectConversion.h"
 
 namespace onnf {
 
 class KrnlDialectOperandParser {
- public:
-  explicit KrnlDialectOperandParser(mlir::OpAsmParser& parser)
+public:
+  explicit KrnlDialectOperandParser(mlir::OpAsmParser &parser)
       : _parser(parser), _builder(parser.getBuilder()){};
 
   // Parse an optional operand.
-  mlir::ParseResult ParseOptionalOperand(const mlir::Type &operandType,
-                                         mlir::Value &operand);
+  mlir::ParseResult ParseOptionalOperand(
+      const mlir::Type &operandType, mlir::Value &operand);
 
   // Parse an optional operand and push it to an operand list.
-  mlir::ParseResult
-  ParseOptionalOperand(const mlir::Type &operandType,
-                       llvm::SmallVectorImpl<mlir::Value> &operandList);
+  mlir::ParseResult ParseOptionalOperand(const mlir::Type &operandType,
+      llvm::SmallVectorImpl<mlir::Value> &operandList);
 
   // Parse a required operand.
-  mlir::ParseResult ParseOperand(const mlir::Type &operandType,
-                                 mlir::Value &operand);
+  mlir::ParseResult ParseOperand(
+      const mlir::Type &operandType, mlir::Value &operand);
 
   // Parse a required operand and push it to an operand list.
-  mlir::ParseResult
-  ParseOperand(const mlir::Type &operandType,
-               llvm::SmallVectorImpl<mlir::Value> &operandList);
+  mlir::ParseResult ParseOperand(const mlir::Type &operandType,
+      llvm::SmallVectorImpl<mlir::Value> &operandList);
 
   // Do we have more operands to parse?
   bool hasOperandLeft() { return !_operandRefQueue.empty(); }
 
- private:
-  mlir::OpAsmParser& _parser;
+private:
+  mlir::OpAsmParser &_parser;
 
-  mlir::Builder& _builder;
+  mlir::Builder &_builder;
 
   // A queue storing the parsed SSA id references.
   std::queue<mlir::OpAsmParser::OperandType> _operandRefQueue;
@@ -50,24 +49,24 @@ class KrnlDialectOperandParser {
 // https://github.com/tensorflow/mlir/blob/6a150d70c7e06fb37cddd7188fa48cde9a90fe59/lib/Dialect/StandardOps/Ops.cpp#L197
 // Main difference is that it advances the iterator `begin` as it consumes
 // dimension and symbol operands.
-void printDimAndSymbolList(mlir::Operation::operand_iterator& begin,
-    unsigned numDims, unsigned numSymbols, mlir::OpAsmPrinter& p);
+void printDimAndSymbolList(mlir::Operation::operand_iterator &begin,
+    unsigned numDims, unsigned numSymbols, mlir::OpAsmPrinter &p);
 
 // Adapted from:
 // https://github.com/tensorflow/mlir/blob/5cb42c914fed14cebbbe5c170b4e2784d2628304/lib/Dialect/AffineOps/AffineOps.cpp#L1272
 // Main difference is that it advances the iterator `boundOperandsBeg` as it
 // prints bound.
 void printBound(mlir::AffineMapAttr boundMap,
-    mlir::Operation::operand_iterator& boundOperandsBeg, const char* prefix,
-    mlir::OpAsmPrinter& p);
-}  // namespace onnf
+    mlir::Operation::operand_iterator &boundOperandsBeg, const char *prefix,
+    mlir::OpAsmPrinter &p);
+} // namespace onnf
 
 namespace mlir {
 
 struct KrnlIterateOperandPack {
   KrnlIterateOperandPack(mlir::Builder &builder,
-                         llvm::ArrayRef<mlir::Value> inputLoops,
-                         llvm::ArrayRef<mlir::Value> optimizedLoops)
+      llvm::ArrayRef<mlir::Value> inputLoops,
+      llvm::ArrayRef<mlir::Value> optimizedLoops)
       : builder(builder), inputLoops(inputLoops),
         optimizedLoops(optimizedLoops) {
     _operands.insert(
@@ -88,7 +87,7 @@ struct KrnlIterateOperandPack {
 
   size_t getNumInputLoops() const { return inputLoops.size(); }
 
- private:
+private:
   int _boundIdx = 0;
 
   llvm::SmallVector<mlir::Value, 8> _operands;
@@ -97,7 +96,99 @@ struct KrnlIterateOperandPack {
 
   llvm::ArrayRef<mlir::Value> inputLoops, optimizedLoops;
 
-  mlir::Builder& builder;
+  mlir::Builder &builder;
 };
 
-}  // namespace mlir
+// Helper function to write kernel loops. This class will let us build a single
+// define/optimize/iterate operation combo. We can then insert optimizations in
+// the body of the optimization operation, and operations in the body of the
+// iterate operation.
+//
+// The sequence is as follow:
+//
+//   1) Create a object giving the rewriter, location, and number of loop in the
+//   original (non optimized) loop.
+//
+//   2) Create define & optimize ops (currently paired). Optimizations can then
+//   be added to the inner block of the optimize operation. Make sure to set the
+//   insertion point to that block for optimizations to go in the right place.
+//
+//   3) Push the bounds for each of the original loops. Bounds are pushed in
+//   pairs (lower & upper bounds). THere are a few methods to do it depending on
+//   the type of the bounds. When pushing bounds, the method returns a number
+//   that represent the index associated with that iteration (induction variable
+//   and bounds). That index can be used later to extract the induction variable
+//   for reference in computation and/or index calculations of mem refs.
+//
+//   4) Once all the bounds are pushed, create the iterate operation. Once this
+//   is done, we can add operations within the iterate blocks by setting the
+//   insertion point to it. Value of the induction variables can be retrieved
+//   using the proper index (determined when pushin the bounds).
+
+class BuildKrnlLoop {
+public:
+  // Create a build kernel loop for the given location and loop number.
+  BuildKrnlLoop(ConversionPatternRewriter &rewriter, Location loc, int loopNum);
+  // Do the same, but where the loop number corresponds to the dimensionality of
+  // the mem ref operand.
+  BuildKrnlLoop(
+      ConversionPatternRewriter &rewriter, Location loc, Value memRefOperand);
+  ~BuildKrnlLoop();
+
+  // Create define and optimize loop with loopNum original loops. If
+  // withEmptyOptimization, the optimization is simply the identity function (no
+  // optimizations).
+  void createDefineAndOptimizeOp(bool withEmptyOptimization = true);
+
+  // Push bounds (lower and upper) for each of the loops, in order. It returns
+  // the index associated with the loop iteration. This index is in the range
+  // from zero to original loop number -1, and is monotonally increasing from
+  // call to call. This index is later used in the getInductionVar call.
+  int pushBounds(int64_t lowerBound, int64_t upperBound);
+  int pushBounds(int64_t lowerBound, Value upperBound);
+  int pushBounds(Value lowerBound, Value upperBound);
+  // same, where the lower bound is an integer, and the uppoer bound is given by
+  // the size of the mem ref operand along the upperBoundMemRefIndex dimension.
+  int pushBounds(int64_t lowerBound, Value upperBoundMemRefOperand,
+      int upperBoundMemRefIndex, bool upperBoundMustBeConstant = false);
+
+  // Create an iterate op.
+  void createIterateOp();
+  // Create an define, optimize and iterate op, with the same loop nummber as
+  // the rank of the memRefOperand. The lower bound of each loops is zero, and
+  // the upper bound of each loops is the dimension given by the mem refs
+  void createDefineOptimizeAndIterateOp(
+      Value memRefOperand, bool withEmptyOptimization = true);
+
+  // Get the (original loop) induction variable associated with the given index.
+  // Use the index returned when pushing the bounds.
+  BlockArgument &getInductionVar(int originalLoopIndex);
+
+  // Get blocks. This allow us to set the insertion point to the inner block of
+  // the optimize and the iterate Operation
+  Block *getOptimizationBlock() { return optBlock; }
+  Block *getIterateBlock() { return iterBlock; }
+
+  // get original or optimized loops
+  std::vector<Value> &getOriginalLoops() { return originalLoops; }
+  std::vector<Value> &getOptimizedLoops() { return optLoops; }
+
+private:
+  // inputs
+  ConversionPatternRewriter &rewriter;
+  Location loc;
+  int originalLoopNum;
+  // track loops and bounds
+  std::vector<Value> originalLoops;
+  std::vector<Value> optLoops;
+  KrnlIterateOperandPack *pack;
+  int pushCount;
+  bool createdDefineOp;
+  bool createdOptimizeOp;
+  bool createdIterateOp;
+  // insertion points (opt block, iterate)
+  Block *optBlock;
+  Block *iterBlock;
+};
+
+} // namespace mlir

From 732317cd5a767c1dcc112a4f21eb7039ae92c0b9 Mon Sep 17 00:00:00 2001
From: Tian Jin <tjingrant@gmail.com>
Date: Tue, 25 Feb 2020 13:04:15 +0800
Subject: [PATCH 03/10] Transition to ONNX-1.6.0. (#95)

* Transition to ONNX-1.6.0.

* Use the version of ONNX inside ONNF when running backend tests.

* Install quietly and with sudo previledge.
---
 .circleci/config.yml                         | 2 +-
 src/builder/frontend_dialect_transformer.cpp | 5 +++--
 third_party/onnx                             | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 48fda88..3863f72 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -38,7 +38,7 @@ jobs:
       - run:
           name: Run End-To-End Tests
           command: |
-            sudo pip install -q onnx
+            sudo pip install -q -e ./ONNF/third_party/onnx
             cd ONNF/build
             cmake --build . --target run-onnx-backend-test
       - run:
diff --git a/src/builder/frontend_dialect_transformer.cpp b/src/builder/frontend_dialect_transformer.cpp
index cd23e8c..0efca22 100644
--- a/src/builder/frontend_dialect_transformer.cpp
+++ b/src/builder/frontend_dialect_transformer.cpp
@@ -189,8 +189,9 @@ private:
       }
     }
 
-    mlir::Type elementType =
-        convertONNXTypeToMLIRType(input.type().tensor_type().elem_type());
+    auto elementOnnxType =
+        (onnx::TensorProto_DataType)input.type().tensor_type().elem_type();
+    mlir::Type elementType = convertONNXTypeToMLIRType(elementOnnxType);
     llvm::ArrayRef<int64_t> tensor_dims(dims.data(), dims.size());
     arg_types.emplace_back(
         mlir::RankedTensorType::get(tensor_dims, elementType));
diff --git a/third_party/onnx b/third_party/onnx
index 1439eab..553df22 160000
--- a/third_party/onnx
+++ b/third_party/onnx
@@ -1 +1 @@
-Subproject commit 1439eab5542c625bb3da49860f0cd68c3eafdc18
+Subproject commit 553df22c67bee5f0fe6599cff60f1afc6748c635

From a720f9a7b20811bfd7a2c2fad71e577b7e20587b Mon Sep 17 00:00:00 2001
From: "Tung D. Le" <tung@jp.ibm.com>
Date: Tue, 25 Feb 2020 14:20:43 +0900
Subject: [PATCH 04/10] Remove special GemmNoBias since we can handle it using
 NoneType bias (#100)

* Remove special GemmNoBias since we can handle it using NoneType bias

* Remove GemmNoBias from onnx.md

Co-authored-by: Tian Jin <tjingrant@gmail.com>
---
 doc/Dialects/onnx.md                          | 27 -----------------
 .../rewrite_patterns/math/gemm.inc            |  5 +---
 src/dialect/onnx/onnx.td                      | 19 ------------
 src/dialect/onnx/onnx_ops.cpp                 | 26 -----------------
 src/pass/shape_inference_pass.cpp             |  1 -
 test/mlir/onnx/onnx_lowering.mlir             | 29 -------------------
 6 files changed, 1 insertion(+), 106 deletions(-)

diff --git a/doc/Dialects/onnx.md b/doc/Dialects/onnx.md
index 95746f6..69349aa 100644
--- a/doc/Dialects/onnx.md
+++ b/doc/Dialects/onnx.md
@@ -1558,33 +1558,6 @@ ONNX Gather operation
 
 1. `output`: memref of any type values or tensor of any type values
 
-### onnx.GemmNoBias (ONNXGemmNoBiasOp)
-ONNX general matrix multiply operation without bias.
-
-#### Description:
-
-
-The "onnx.Gemm" generic matrix multiplication without bias.
-
-
-#### Operands:
-
-1. `A`: memref of any type values or tensor of any type values
-1. `B`: memref of any type values or tensor of any type values
-
-#### Attributes:
-
-| Attribute | MLIR Type | Description |
-| :-------: | :-------: | ----------- |
-| `alpha` | `FloatAttr` | 32-bit float attribute attribute |
-| `beta` | `FloatAttr` | 32-bit float attribute attribute |
-| `transA` | `IntegerAttr` | 64-bit integer attribute attribute |
-| `transB` | `IntegerAttr` | 64-bit integer attribute attribute |
-
-#### Results:
-
-1. `o_Y`: memref of any type values or tensor of any type values
-
 ### onnx.Gemm (ONNXGemmOp)
 ONNX Gemm operation
 
diff --git a/src/conversion/onnx_to_krnl/rewrite_patterns/math/gemm.inc b/src/conversion/onnx_to_krnl/rewrite_patterns/math/gemm.inc
index 8a9bf8e..ee395b5 100644
--- a/src/conversion/onnx_to_krnl/rewrite_patterns/math/gemm.inc
+++ b/src/conversion/onnx_to_krnl/rewrite_patterns/math/gemm.inc
@@ -17,9 +17,7 @@ struct ONNXGemmOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    // The first predicate is unnecessary when we remove ONXGemmNoBiasOp.
-    bool hasBias = (operands.size() == 3) &&
-                   (!op->getOperand(2).getType().isa<NoneType>());
+    bool hasBias = !op->getOperand(2).getType().isa<NoneType>();
 
     Value A, B, C;
     A = operands[0];
@@ -215,5 +213,4 @@ struct ONNXGemmOpLowering : public ConversionPattern {
 void populateLoweringONNXGemmOpPattern(OwningRewritePatternList &patterns,
                                        MLIRContext *ctx) {
   patterns.insert<ONNXGemmOpLowering<ONNXGemmOp>>(ctx);
-  patterns.insert<ONNXGemmOpLowering<ONNXGemmNoBiasOp>>(ctx);
 }
diff --git a/src/dialect/onnx/onnx.td b/src/dialect/onnx/onnx.td
index 43d4a10..1cc88c3 100644
--- a/src/dialect/onnx/onnx.td
+++ b/src/dialect/onnx/onnx.td
@@ -90,25 +90,6 @@ def ONNXEntryPointOp: ONNX_Op<"EntryPoint"> {
 // or outputs. This decision affects only ONNX operations with optional
 // arguments not ONNX operations with variadic operands.
 
-def ONNXGemmNoBiasOp: ONNX_Op<"GemmNoBias",
-    [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
-  let summary = "ONNX general matrix multiply operation without bias.";
-  let description = [{
-
-    The "onnx.Gemm" generic matrix multiplication without bias.
-
-  }];
-
-  let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor]>:$A,
-           AnyTypeOf<[AnyMemRef, AnyTensor]>:$B,
-           DefaultValuedAttr<F32Attr, "1.0">:$alpha,
-           DefaultValuedAttr<F32Attr, "1.0">:$beta,
-           DefaultValuedAttr<I64Attr, "0">:$transA,
-           DefaultValuedAttr<I64Attr, "0">:$transB);
-
-  let results = (outs AnyTypeOf<[AnyMemRef, AnyTensor]>:$o_Y);
-}
-
 def ONNXConvNoBiasOp:ONNX_Op<"ConvNoBias",
     [NoSideEffect, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let hasCanonicalizer = 1;
diff --git a/src/dialect/onnx/onnx_ops.cpp b/src/dialect/onnx/onnx_ops.cpp
index 4de481a..5d93020 100644
--- a/src/dialect/onnx/onnx_ops.cpp
+++ b/src/dialect/onnx/onnx_ops.cpp
@@ -565,32 +565,6 @@ void ONNXGemmOp::inferShapes() {
   getResult().setType(RankedTensorType::get(dims, lhsTy.getElementType()));
 }
 
-// GemmNoBias
-
-void ONNXGemmNoBiasOp::inferShapes() {
-  // Cannot infer shape if no shape exists.
-  if (!getOperand(0).getType().isa<RankedTensorType>() ||
-      !getOperand(1).getType().isa<RankedTensorType>())
-    return;
-  auto lhsTy = getOperand(0).getType().cast<RankedTensorType>();
-  auto rhsTy = getOperand(1).getType().cast<RankedTensorType>();
-
-  int64_t M, N, K_A, K_B;
-  M = (transA() == 0) ? lhsTy.getShape()[0] : lhsTy.getShape()[1];
-  K_A = (transA() == 0) ? lhsTy.getShape()[1] : lhsTy.getShape()[0];
-  N = (transB() == 0) ? rhsTy.getShape()[1] : rhsTy.getShape()[0];
-  K_B = (transB() == 0) ? rhsTy.getShape()[0] : rhsTy.getShape()[1];
-
-  if ((K_A != -1) and (K_B != -1) and (K_A != K_B)) {
-    emitError("Tensor shapes mismatched.");
-  }
-
-  SmallVector<int64_t, 2> dims;
-  dims.emplace_back(M);
-  dims.emplace_back(N);
-  getResult().setType(RankedTensorType::get(dims, lhsTy.getElementType()));
-}
-
 /// BatchNormalizationTestMode
 void ONNXBatchNormalizationTestModeOp::inferShapes() {
   // Cannot infer shape if no shape exists.
diff --git a/src/pass/shape_inference_pass.cpp b/src/pass/shape_inference_pass.cpp
index 7ff0374..4038ec3 100644
--- a/src/pass/shape_inference_pass.cpp
+++ b/src/pass/shape_inference_pass.cpp
@@ -118,7 +118,6 @@ public:
         op->getName().getStringRef() != "onnx.Identity" &&
         op->getName().getStringRef() != "onnx.MatMul" &&
         op->getName().getStringRef() != "onnx.Gemm" &&
-        op->getName().getStringRef() != "onnx.GemmNoBias" &&
         op->getName().getStringRef() != "onnx.Reshape" &&
         op->getName().getStringRef() != "onnx.Transpose" &&
         op->getName().getStringRef() != "onnx.ReduceMax" &&
diff --git a/test/mlir/onnx/onnx_lowering.mlir b/test/mlir/onnx/onnx_lowering.mlir
index 9da12ac..c35536d 100644
--- a/test/mlir/onnx/onnx_lowering.mlir
+++ b/test/mlir/onnx/onnx_lowering.mlir
@@ -806,35 +806,6 @@ func @test_gemm(%arg0 : tensor<5x10xf32>, %arg1 : tensor<5x10xf32>, %arg2: tenso
   // CHECK: }
 }
 
-func @test_gemm_no_bias(%arg0 : tensor<5x10xf32>, %arg1 : tensor<5x10xf32>) -> tensor<*xf32> {
-  %0 ="onnx.GemmNoBias"(%arg0, %arg1) {alpha = 1.0 : f32, beta = 5.0 : f32, transA = 1, transB = 0} : (tensor<5x10xf32>, tensor<5x10xf32>) -> tensor<*xf32>
-  "std.return"(%0) : (tensor<*xf32>) -> ()
-
-  // CHECK-LABEL: test_gemm_no_bias
-  // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
-  // CHECK: [[ALPHA:%.+]] = constant 1.000000e+00 : f32
-  // CHECK: [[BETA:%.+]] = constant 5.000000e+00 : f32
-  // CHECK: [[DEF_LOOPS:%.+]]:3 = krnl.define_loops 3
-  // CHECK: [[OPT_LOOPS:%.+]]:3 = krnl.optimize_loops  {
-  // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1, [[DEF_LOOPS]]#2
-  // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop)
-  // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
-  // CHECK: krnl.iterate([[OPT_LOOPS]]#2) with ([[DEF_LOOPS]]#2 -> %arg4 = 0 to 5) {
-  // CHECK: [[A:%.+]] = load %arg0[%arg4, %arg2] : memref<5x10xf32>
-  // CHECK: [[B:%.+]] = load %arg1[%arg4, %arg3] : memref<5x10xf32>
-  // CHECK: [[Y:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
-  // CHECK: [[AB:%.+]] = mulf [[A]], [[B]] : f32
-  // CHECK: [[SUM:%.+]] = addf [[Y]], [[AB]] : f32
-  // CHECK: store [[SUM]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
-  // CHECK: }
-  // CHECK: [[LOAD_Y:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
-  // CHECK: [[ALPHA_AB:%.+]] = mulf [[ALPHA]], [[LOAD_Y]] : f32
-  // CHECK: store [[ALPHA_AB]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
-  // CHECK: }
-  // CHECK: return [[RES]] : memref<10x10xf32>
-  // CHECK: }
-}
-
 func @test_sqrt(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
   %0 = "onnx.Sqrt"(%arg0) : (tensor<?x10xf32>) -> tensor<*xf32>
   "std.return"(%0) : (tensor<*xf32>) -> ()

From 0d307d11835be0450d1c10a44e5d959e3a7ef791 Mon Sep 17 00:00:00 2001
From: Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>
Date: Tue, 25 Feb 2020 09:47:42 -0500
Subject: [PATCH 05/10] Set flag to true when definition is emitted. (#97)

---
 src/dialect/krnl/krnl_helper.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/dialect/krnl/krnl_helper.cpp b/src/dialect/krnl/krnl_helper.cpp
index 4f75a43..87a5f80 100644
--- a/src/dialect/krnl/krnl_helper.cpp
+++ b/src/dialect/krnl/krnl_helper.cpp
@@ -168,6 +168,8 @@ void BuildKrnlLoop::createDefineAndOptimizeOp(bool withEmptyOptimization) {
   originalLoops.reserve(originalLoopNum);
   for (auto result : loopsOp.getResults())
     originalLoops.push_back(result);
+  createdDefineOp = true;
+
   // inserte optimize loop op.
   auto optimizedLoopsOp =
       rewriter.create<KrnlOptimizeLoopsOp>(loc, originalLoopNum);
@@ -182,9 +184,10 @@ void BuildKrnlLoop::createDefineAndOptimizeOp(bool withEmptyOptimization) {
     rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
     rewriter.restoreInsertionPoint(ip);
   }
+  createdOptimizeOp = true;
+
   // prepare data structure to push bounds
   pack = new KrnlIterateOperandPack(rewriter, originalLoops, optLoops);
-  createdOptimizeOp = true;
 }
 
 // push bounds (lower and upper) and return index for loop info

From 32f08bcf0c8c85d5d1a20d4d60d8050d22d5344a Mon Sep 17 00:00:00 2001
From: Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>
Date: Tue, 25 Feb 2020 09:54:29 -0500
Subject: [PATCH 06/10] Clean-up code. (#98)

---
 src/dialect/krnl/krnl_helper.cpp | 58 ++++++++++++---------
 src/dialect/krnl/krnl_helper.hpp | 89 ++++++++++++++++++++------------
 2 files changed, 90 insertions(+), 57 deletions(-)

diff --git a/src/dialect/krnl/krnl_helper.cpp b/src/dialect/krnl/krnl_helper.cpp
index 87a5f80..91e9825 100644
--- a/src/dialect/krnl/krnl_helper.cpp
+++ b/src/dialect/krnl/krnl_helper.cpp
@@ -131,7 +131,7 @@ void KrnlIterateOperandPack::pushConstantBound(int64_t bound) {
   boundMaps.emplace_back(AffineMapAttr::get(map));
 }
 
-void KrnlIterateOperandPack::pushOperandBound(mlir::Value operand) {
+void KrnlIterateOperandPack::pushOperandBound(Value operand) {
   if (boundMaps.size() % 2 == 0)
     _operands.emplace_back(inputLoops[boundMaps.size() / 2]);
   AffineMap map = builder.getSymbolIdentityMap();
@@ -145,7 +145,7 @@ BuildKrnlLoop::BuildKrnlLoop(
       pushCount(0), createdDefineOp(false), createdOptimizeOp(false),
       createdIterateOp(false) {
   if (originalLoopNum <= 0)
-    emitError(loc, "expected positive number of original loops");
+    emitError(loc, "Expected positive number of original loops.");
 }
 
 BuildKrnlLoop::BuildKrnlLoop(
@@ -154,27 +154,24 @@ BuildKrnlLoop::BuildKrnlLoop(
           memRefOperand.getType().cast<MemRefType>().getShape().size()) {}
 
 BuildKrnlLoop::~BuildKrnlLoop() {
-  if (!createdDefineOp)
-    emitError(loc, "expected to create define op");
-  if (!createdIterateOp)
-    emitError(loc, "expected to create iteration op");
   if (pack)
     free(pack);
 }
 
 void BuildKrnlLoop::createDefineAndOptimizeOp(bool withEmptyOptimization) {
-  // insert define loop op
+  // Insert define loop operation.
   auto loopsOp = rewriter.create<KrnlDefineLoopsOp>(loc, originalLoopNum);
   originalLoops.reserve(originalLoopNum);
   for (auto result : loopsOp.getResults())
     originalLoops.push_back(result);
   createdDefineOp = true;
 
-  // inserte optimize loop op.
+  // Insert optimize loop operation.
   auto optimizedLoopsOp =
       rewriter.create<KrnlOptimizeLoopsOp>(loc, originalLoopNum);
   optLoops.reserve(originalLoopNum);
-  // Emit empty optimizations
+
+  // Emit empty optimizations if flag is set.
   if (withEmptyOptimization) {
     for (auto result : optimizedLoopsOp.getResults())
       optLoops.push_back(result);
@@ -190,7 +187,6 @@ void BuildKrnlLoop::createDefineAndOptimizeOp(bool withEmptyOptimization) {
   pack = new KrnlIterateOperandPack(rewriter, originalLoops, optLoops);
 }
 
-// push bounds (lower and upper) and return index for loop info
 int BuildKrnlLoop::pushBounds(int64_t lowerBound, int64_t upperBound) {
   pack->pushConstantBound(lowerBound);
   pack->pushConstantBound(upperBound);
@@ -206,17 +202,20 @@ int BuildKrnlLoop::pushBounds(int64_t lowerBound, Value upperBound) {
 int BuildKrnlLoop::pushBounds(int64_t lowerBound, Value upperBoundMemRefOperand,
     int upperBoundMemRefIndex, bool upperBoundMustBeConstant) {
   pack->pushConstantBound(lowerBound);
-  // process upperBound as a dimension of mem ref, possibly non-constant
+
+  // Process upperBound as a dimension of the MemRef. Non-constant dimensions
+  // are supported.
   auto shape = upperBoundMemRefOperand.getType().cast<MemRefType>().getShape();
   if (shape[upperBoundMemRefIndex] < 0) {
     if (upperBoundMustBeConstant)
-      emitError(loc, "bound expected to be constant");
+      emitError(loc, "Bound expected to be constant.");
     pack->pushOperandBound(
         rewriter
             .create<DimOp>(loc, upperBoundMemRefOperand, upperBoundMemRefIndex)
             .getResult());
   } else
     pack->pushConstantBound(shape[upperBoundMemRefIndex]);
+
   return pushCount++;
 }
 
@@ -226,19 +225,20 @@ int BuildKrnlLoop::pushBounds(Value lowerBound, Value upperBound) {
   return pushCount++;
 }
 
-// create iter
 void BuildKrnlLoop::createIterateOp() {
+  // Loop definition operation is mandatory.
   if (!createdDefineOp)
-    emitError(loc, "must create define op before iterate op");
-  // Tight now, optimize (possibly empty) is mandatory. This may change
+    emitError(loc, "Must create define op before iterate op.");
+
+  // Loop optimization operation is mandatory (for now).
   if (!createdOptimizeOp)
-    emitError(loc, "must create optimize op before iterate op");
-  // have to have defined all bounds
-  if (pushCount != originalLoopNum) {
-    printf(" push count %d, original loop %d\n", pushCount, originalLoopNum);
-    emitError(loc, "must push bounds for all original loops");
-  }
-  // create iterate op
+    emitError(loc, "Must create optimize op before iterate op.");
+
+  // Check if all bounds have been defined.
+  if (pushCount != originalLoopNum)
+    emitError(loc, "Must push bounds for all original loops.");
+
+  // Emit iteration operation.
   auto iterateOp = rewriter.create<KrnlIterateOp>(loc, *pack);
   iterBlock = &iterateOp.bodyRegion().front();
   createdIterateOp = true;
@@ -246,19 +246,27 @@ void BuildKrnlLoop::createIterateOp() {
 
 void BuildKrnlLoop::createDefineOptimizeAndIterateOp(
     Value memRefOperand, bool withEmptyOptimization) {
+  // Rank of the MemRef operand. We will emit a loop for each dimension.
   int loopNum = memRefOperand.getType().cast<MemRefType>().getShape().size();
   if (originalLoopNum != loopNum)
-    emitError(loc, "mismatch in loop numbers from constructor and define");
+    emitError(loc, "Mismatch in loop numbers from constructor and define.");
+
+  // Emit the definition and the optimization operations for the loop nest.
   createDefineAndOptimizeOp(withEmptyOptimization);
+
+  // Push a lower-upper bound pair for each dimension of the MemRef operand.
+  // The lower bound in this case is always zero.
   for (int i = 0; i < originalLoopNum; ++i)
     pushBounds(0, memRefOperand, i);
+
+  // Emit the iteration operation over the current loop nest.
   createIterateOp();
 }
 
-// get induction variable to be use within iter
 BlockArgument &BuildKrnlLoop::getInductionVar(int originalLoopIndex) {
+  // Check if loop iteration variable is within bounds.
   if (originalLoopIndex < 0 || originalLoopIndex >= originalLoopNum)
-    emitError(loc, "original loop index is out of bound");
+    emitError(loc, "Original loop index is out of bounds.");
   return iterBlock->getArguments()[originalLoopIndex];
 }
 
diff --git a/src/dialect/krnl/krnl_helper.hpp b/src/dialect/krnl/krnl_helper.hpp
index cfe1787..aebbe0b 100644
--- a/src/dialect/krnl/krnl_helper.hpp
+++ b/src/dialect/krnl/krnl_helper.hpp
@@ -106,19 +106,21 @@ private:
 //
 // The sequence is as follow:
 //
-//   1) Create a object giving the rewriter, location, and number of loop in the
-//   original (non optimized) loop.
+//   1) Create an object giving the rewriter, location, and number of loop in
+//   the original (non optimized) loop.
 //
 //   2) Create define & optimize ops (currently paired). Optimizations can then
-//   be added to the inner block of the optimize operation. Make sure to set the
-//   insertion point to that block for optimizations to go in the right place.
+//   be added to the inner block of the optimize operation. Make sure to set
+//   the insertion point to that block for optimizations to go in the right
+//   place.
 //
 //   3) Push the bounds for each of the original loops. Bounds are pushed in
-//   pairs (lower & upper bounds). THere are a few methods to do it depending on
-//   the type of the bounds. When pushing bounds, the method returns a number
-//   that represent the index associated with that iteration (induction variable
-//   and bounds). That index can be used later to extract the induction variable
-//   for reference in computation and/or index calculations of mem refs.
+//   pairs (lower & upper bounds). There are a few methods to do it depending
+//   on the type of the bounds. When pushing bounds, the method returns a
+//   number that represent the index associated with that iteration (induction
+//   variable and bounds). That index can be used later to extract the
+//   induction variable for reference in computation and/or index calculations
+//   of mem refs.
 //
 //   4) Once all the bounds are pushed, create the iterate operation. Once this
 //   is done, we can add operations within the iterate blocks by setting the
@@ -127,67 +129,90 @@ private:
 
 class BuildKrnlLoop {
 public:
-  // Create a build kernel loop for the given location and loop number.
+  // Create kernel loop builder for a loop nest of depth loopNum.
   BuildKrnlLoop(ConversionPatternRewriter &rewriter, Location loc, int loopNum);
-  // Do the same, but where the loop number corresponds to the dimensionality of
-  // the mem ref operand.
+
+  // Create kernel loop builder for a loop nest of depth equal to the
+  // dimensionality of the operand. An operand of MemRef type is requied.
   BuildKrnlLoop(
       ConversionPatternRewriter &rewriter, Location loc, Value memRefOperand);
   ~BuildKrnlLoop();
 
   // Create define and optimize loop with loopNum original loops. If
-  // withEmptyOptimization, the optimization is simply the identity function (no
-  // optimizations).
+  // withEmptyOptimization is true, the optimization is simply the identity
+  // function (no optimizations).
   void createDefineAndOptimizeOp(bool withEmptyOptimization = true);
 
-  // Push bounds (lower and upper) for each of the loops, in order. It returns
-  // the index associated with the loop iteration. This index is in the range
-  // from zero to original loop number -1, and is monotonally increasing from
-  // call to call. This index is later used in the getInductionVar call.
+  // Push bounds (lower and upper) for each of the loops (order matters).
+  // The function returns the order number associated with the loop iteration.
+  // This index is used by the getInductionVar call. Non-constant operands
+  // must be of MemRef type.
   int pushBounds(int64_t lowerBound, int64_t upperBound);
   int pushBounds(int64_t lowerBound, Value upperBound);
   int pushBounds(Value lowerBound, Value upperBound);
-  // same, where the lower bound is an integer, and the uppoer bound is given by
-  // the size of the mem ref operand along the upperBoundMemRefIndex dimension.
   int pushBounds(int64_t lowerBound, Value upperBoundMemRefOperand,
       int upperBoundMemRefIndex, bool upperBoundMustBeConstant = false);
 
-  // Create an iterate op.
+  // Create the KrnlIterateOp assiciated with this loop nest. The loops
+  // iteration will be created if the definition and the optimization
+  // operations associated with this loop nest have been emitted already.
   void createIterateOp();
-  // Create an define, optimize and iterate op, with the same loop nummber as
-  // the rank of the memRefOperand. The lower bound of each loops is zero, and
-  // the upper bound of each loops is the dimension given by the mem refs
+
+  // Create the loop nest definition, optimization and iteration operations
+  // for a given operand of MemRef type. The loop nest has a depth equal to the
+  // rank of the MemRef operand. The lower bound of each loop is zero. The
+  // upper bound of each loop is given by the corresponding dimension of the
+  // MemRef operand.
   void createDefineOptimizeAndIterateOp(
       Value memRefOperand, bool withEmptyOptimization = true);
 
-  // Get the (original loop) induction variable associated with the given index.
-  // Use the index returned when pushing the bounds.
+  // Get the (original loop) induction variable associated with the given
+  // index. Use the index returned when pushing the bounds.
   BlockArgument &getInductionVar(int originalLoopIndex);
 
-  // Get blocks. This allow us to set the insertion point to the inner block of
-  // the optimize and the iterate Operation
+  // Get a reference to the code region of the optimization operation.
+  // This allows us to set the insertion point to the inner block of the
+  // loop nest optimization operation.
   Block *getOptimizationBlock() { return optBlock; }
+
+  // Get a reference to the code region of the iteration operation.
+  // This allows us to set the insertion point to the inner block of the
+  // loop nest iteration operation.
   Block *getIterateBlock() { return iterBlock; }
 
-  // get original or optimized loops
+  // Get original loop nest.
   std::vector<Value> &getOriginalLoops() { return originalLoops; }
+
+  // Get optimized loop nest.
   std::vector<Value> &getOptimizedLoops() { return optLoops; }
 
 private:
-  // inputs
+  // Required for emitting operations.
   ConversionPatternRewriter &rewriter;
   Location loc;
   int originalLoopNum;
-  // track loops and bounds
+
+  // List of original, un-optimized loops.
   std::vector<Value> originalLoops;
+
+  // List of optimized loops.
   std::vector<Value> optLoops;
+
+  // List of lower-upper bound pairs needed by the KrnlIterateOp.
   KrnlIterateOperandPack *pack;
+
+  // Number of lower-upper bound pairs pushed.
   int pushCount;
+
+  // Flags that keep track of emitted operations.
   bool createdDefineOp;
   bool createdOptimizeOp;
   bool createdIterateOp;
-  // insertion points (opt block, iterate)
+
+  // Saved insertion point in the code region of the KrnlOptimizeLoopsOp.
   Block *optBlock;
+
+  // Saved insertion point in the code region of the KrnlIterateOp.
   Block *iterBlock;
 };
 

From ee3e140ddb2ac2886e845e900434d071130119e6 Mon Sep 17 00:00:00 2001
From: Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>
Date: Tue, 25 Feb 2020 10:38:08 -0500
Subject: [PATCH 07/10] [NFC] Change structure of conversion folder. (#96)

* Change structure of conversion folder.

* Fix comments.

Co-authored-by: Tian Jin <tjingrant@gmail.com>
---
 src/CMakeLists.txt                            |  16 +-
 .../onnx_to_krnl/convert_onnx_to_krnl.cpp     | 428 +-----------------
 .../elementwise.inc => math/elementwise.cpp}  |   6 +-
 .../math/gemm.inc => math/gemm.cpp}           |   6 +-
 .../math/matmul.inc => math/matmul.cpp}       |   6 +-
 .../math/reduction.inc => math/reduction.cpp} |   6 +-
 .../math/softmax.inc => math/softmax.cpp}     |   6 +-
 .../nn/conv.inc => nn/conv.cpp}               |   6 +-
 .../normalization.cpp}                        |   6 +-
 .../onnx_to_krnl/onnx_to_krnl_common.cpp      | 324 +++++++++++++
 .../onnx_to_krnl/onnx_to_krnl_common.hpp      | 217 +++++++++
 .../identity.inc => tensor/identity.cpp}      |   6 +-
 .../tensor/reshape.inc => tensor/reshape.cpp} |   6 +-
 .../transpose.inc => tensor/transpose.cpp}    |   6 +-
 .../unsqueeze.inc => tensor/unsqueeze.cpp}    |   6 +-
 15 files changed, 612 insertions(+), 439 deletions(-)
 rename src/conversion/onnx_to_krnl/{rewrite_patterns/math/elementwise.inc => math/elementwise.cpp} (99%)
 rename src/conversion/onnx_to_krnl/{rewrite_patterns/math/gemm.inc => math/gemm.cpp} (98%)
 rename src/conversion/onnx_to_krnl/{rewrite_patterns/math/matmul.inc => math/matmul.cpp} (98%)
 rename src/conversion/onnx_to_krnl/{rewrite_patterns/math/reduction.inc => math/reduction.cpp} (98%)
 rename src/conversion/onnx_to_krnl/{rewrite_patterns/math/softmax.inc => math/softmax.cpp} (98%)
 rename src/conversion/onnx_to_krnl/{rewrite_patterns/nn/conv.inc => nn/conv.cpp} (98%)
 rename src/conversion/onnx_to_krnl/{rewrite_patterns/nn/normalization.inc => nn/normalization.cpp} (97%)
 create mode 100644 src/conversion/onnx_to_krnl/onnx_to_krnl_common.cpp
 create mode 100644 src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp
 rename src/conversion/onnx_to_krnl/{rewrite_patterns/tensor/identity.inc => tensor/identity.cpp} (85%)
 rename src/conversion/onnx_to_krnl/{rewrite_patterns/tensor/reshape.inc => tensor/reshape.cpp} (97%)
 rename src/conversion/onnx_to_krnl/{rewrite_patterns/tensor/transpose.inc => tensor/transpose.cpp} (96%)
 rename src/conversion/onnx_to_krnl/{rewrite_patterns/tensor/unsqueeze.inc => tensor/unsqueeze.cpp} (95%)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d895be5..b210275 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -62,7 +62,21 @@ target_include_directories(onnf_shape_inference
 target_link_libraries(onnf_shape_inference ${MLIRLibs})
 add_dependencies(onnf_shape_inference gen_krnl_ops)
 
-add_library(onnf_lower_frontend conversion/onnx_to_krnl/convert_onnx_to_krnl.cpp)
+add_library(onnf_lower_frontend
+        conversion/onnx_to_krnl/onnx_to_krnl_common.cpp
+        conversion/onnx_to_krnl/onnx_to_krnl_common.hpp
+        conversion/onnx_to_krnl/math/elementwise.cpp
+        conversion/onnx_to_krnl/math/gemm.cpp
+        conversion/onnx_to_krnl/math/matmul.cpp
+        conversion/onnx_to_krnl/math/reduction.cpp
+        conversion/onnx_to_krnl/math/softmax.cpp
+        conversion/onnx_to_krnl/nn/conv.cpp
+        conversion/onnx_to_krnl/nn/normalization.cpp
+        conversion/onnx_to_krnl/tensor/identity.cpp
+        conversion/onnx_to_krnl/tensor/reshape.cpp
+        conversion/onnx_to_krnl/tensor/transpose.cpp
+        conversion/onnx_to_krnl/tensor/unsqueeze.cpp
+        conversion/onnx_to_krnl/convert_onnx_to_krnl.cpp)
 target_include_directories(onnf_lower_frontend
         PRIVATE ${ONNF_SRC_ROOT} ${ONNF_BIN_ROOT}
         ${ONNF_SRC_ROOT})
diff --git a/src/conversion/onnx_to_krnl/convert_onnx_to_krnl.cpp b/src/conversion/onnx_to_krnl/convert_onnx_to_krnl.cpp
index 84d4be8..ffc7219 100644
--- a/src/conversion/onnx_to_krnl/convert_onnx_to_krnl.cpp
+++ b/src/conversion/onnx_to_krnl/convert_onnx_to_krnl.cpp
@@ -8,404 +8,11 @@
 // Krnl IR and standard operations.
 //
 //===----------------------------------------------------------------------===//
-#include <map>
 
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Sequence.h"
-
-#include "src/dialect/krnl/krnl_helper.hpp"
-#include "src/dialect/krnl/krnl_ops.hpp"
-#include "src/dialect/onnx/onnx_ops.hpp"
-#include "src/pass/passes.hpp"
+#include "src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp"
 
 using namespace mlir;
 
-//===----------------------------------------------------------------------===//
-// FrontendToAffine RewritePatterns
-//===----------------------------------------------------------------------===//
-
-/// Check is all dimensions are known at compile time.
-static bool hasAllConstantDimensions(MemRefType type) {
-  auto memRefShape = type.getShape();
-  for (int i = 0; i < memRefShape.size(); ++i)
-    if (memRefShape[i] < 0)
-      return false;
-  return true;
-}
-
-/// Get the corresponding MemRefType of a given TensorType/MemRefType.
-static MemRefType convertToMemRefType(Type type) {
-  MemRefType memRefType;
-  auto tensorType = type.dyn_cast<TensorType>();
-  if (tensorType) {
-    assert(tensorType.hasRank() && "expected only ranked shapes");
-    memRefType =
-        MemRefType::get(tensorType.getShape(), tensorType.getElementType());
-  } else {
-    memRefType = type.dyn_cast<MemRefType>();
-  }
-  return memRefType;
-}
-
-/// Insert an allocation and deallocation for the given MemRefType.
-static Value insertAllocAndDealloc(MemRefType type, Location loc,
-                                   PatternRewriter &rewriter,
-                                   bool insertDealloc,
-                                   ArrayRef<Value> operands = {}) {
-  // Put together alloc operands for any dynamic dimensions of the memref.
-  AllocOp alloc;
-  if (!operands.empty()) {
-    auto memRefShape = type.getShape();
-    auto rank = memRefShape.size();
-
-    std::map<int, Value> fromOperands;
-    for (int reversedIdx = 0; reversedIdx < rank; ++reversedIdx) {
-      int memRefDimIdx = rank - 1 - reversedIdx;
-      if (memRefShape[memRefDimIdx] < 0) { // unknown dimension
-        Value maxDim = nullptr;
-        for (int i = 0; i < operands.size(); i++) {
-          auto operandShape =
-              operands[i].getType().cast<MemRefType>().getShape();
-          int operandDimIdx = operandShape.size() - 1 - reversedIdx;
-
-          if (operandDimIdx < 0)
-            continue;
-
-          // In case of operations with broadcasting, the dimension of the
-          // alloc result is the maximum size along each dimension of the
-          // operands.
-          auto operandDim =
-              rewriter.create<DimOp>(loc, operands[i], operandDimIdx);
-          if (maxDim) {
-            auto maxCondition = rewriter.create<CmpIOp>(loc, CmpIPredicate::sgt,
-                                                        operandDim, maxDim);
-            maxDim = rewriter.create<SelectOp>(loc, maxCondition, operandDim,
-                                               maxDim);
-          } else {
-            maxDim = operandDim;
-          }
-        }
-        fromOperands.insert(std::make_pair(memRefDimIdx, maxDim));
-      }
-    }
-
-    SmallVector<Value, 4> allocOperands;
-    for (int i = 0; i < rank; ++i)
-      if (memRefShape[i] < 0)
-        allocOperands.push_back(fromOperands[i]);
-    alloc = rewriter.create<AllocOp>(loc, type, allocOperands);
-  } else {
-    alloc = rewriter.create<AllocOp>(loc, type);
-  }
-
-  // Make sure to allocate at the beginning of the block if
-  // all dimensions are known.
-  auto *parentBlock = alloc.getOperation()->getBlock();
-  if (hasAllConstantDimensions(type))
-    alloc.getOperation()->moveBefore(&parentBlock->front());
-
-  if (insertDealloc) {
-    auto dealloc = rewriter.create<DeallocOp>(loc, alloc);
-    dealloc.getOperation()->moveBefore(&parentBlock->back());
-  }
-
-  return alloc;
-}
-
-// Determine if current function returns the result value of the
-// current op being lowered. If it does then dealloc should not be
-// inserted.
-static bool checkInsertDealloc(Operation *currentOp) {
-  auto parentBlock = currentOp->getBlock();
-
-  bool insertDealloc = true;
-  parentBlock->walk([&insertDealloc, currentOp](ReturnOp op) {
-    assert(currentOp->getNumResults() < 2 &&
-           "No more than one result supported (for now).");
-    // If there is at least one result to investigate.
-    if (currentOp->getNumResults() > 0) {
-      auto result = currentOp->getResult(0);
-      for (const auto &operand : op.getOperands())
-        if (operand == result)
-          insertDealloc = false;
-    }
-  });
-
-  return insertDealloc;
-}
-
-// Create a mapping from result type's dimensions to input type's dimensions,
-// given that the result type is the result of a reduction op over the input
-// type.
-std::map<int64_t, int64_t>
-getReductionMapping(MemRefType inputTy, ArrayRef<int64_t> axes, bool keepdims) {
-  std::map<int64_t, int64_t> OutInDimMap;
-  int64_t rank = inputTy.getRank();
-
-  // Mark reduction axes.
-  std::vector<bool> isReductionAxis;
-  for (decltype(rank) i = 0; i < rank; ++i) {
-    if (std::find(axes.begin(), axes.end(), i) != axes.end())
-      isReductionAxis.push_back(true);
-    else
-      isReductionAxis.push_back(false);
-  }
-
-  for (decltype(rank) inIndex = 0, outIndex = 0; inIndex < rank; ++inIndex) {
-    // If it is a reduction axis, there is no relationship among dimensions.
-    if (isReductionAxis[inIndex]) {
-      if (keepdims)
-        outIndex++;
-    } else {
-      OutInDimMap.insert(std::make_pair(outIndex, inIndex));
-      outIndex++;
-    }
-  }
-
-  return OutInDimMap;
-}
-
-// Add bounds associated with the op operand to the KRNL iteration pack.
-// Dynamic dimenions are supported.
-static void addDimensionToPack(ConversionPatternRewriter &rewriter,
-                               Location loc, KrnlIterateOperandPack &pack,
-                               Value operand, int index) {
-  auto shape = operand.getType().cast<MemRefType>().getShape();
-  if (shape[index] < 0) {
-    pack.pushConstantBound(0);
-    pack.pushOperandBound(
-        rewriter.create<DimOp>(loc, operand, index).getResult());
-  } else {
-    pack.pushConstantBound(0);
-    pack.pushConstantBound(shape[index]);
-  }
-}
-
-// Function that defines the KRNL dialect loops and their respective
-// optimized version.
-static KrnlOptimizeLoopsOp
-emitOptimizedLoops(ConversionPatternRewriter &rewriter, Location loc,
-                   std::vector<Value> &loops,
-                   std::vector<Value> &optimizedLoops, int64_t numLoops) {
-  // Define loops.
-  auto loopsOp = rewriter.create<KrnlDefineLoopsOp>(loc, numLoops);
-  loops.reserve(numLoops);
-  for (auto result : loopsOp.getResults())
-    loops.push_back(result);
-
-  // Define optimized version of the loops.
-  auto optimizedLoopsOp = rewriter.create<KrnlOptimizeLoopsOp>(loc, numLoops);
-  optimizedLoops.reserve(numLoops);
-  for (auto result : optimizedLoopsOp.getResults())
-    optimizedLoops.push_back(result);
-
-  return optimizedLoopsOp;
-}
-
-// Function that emits the loops and their optimized version.
-// The function returns a reference to the inner optimization block.
-static Block *defineLoops(ConversionPatternRewriter &rewriter, Location loc,
-                          std::vector<Value> &loops,
-                          std::vector<Value> &optimizedLoops,
-                          int64_t numLoops) {
-  KrnlOptimizeLoopsOp optimizedLoopsOp =
-      emitOptimizedLoops(rewriter, loc, loops, optimizedLoops, numLoops);
-  return &optimizedLoopsOp.region().front();
-}
-
-// Function which emits a basic set of loops and optimized loops
-// for a given operation argument. A reference to the loop optimization
-// block is returned in the last argument of the function.
-static void emitKrnlLoopsAndIterationForOperand(
-    ConversionPatternRewriter &rewriter, Location loc, Value operand,
-    std::vector<Value> &originalLoops, KrnlOptimizeLoopsOp &optimizedLoopsOp,
-    KrnlIterateOp &iterateOp) {
-  // Operand shape.
-  auto shape = operand.getType().cast<MemRefType>().getShape();
-
-  // Number of loops.
-  int64_t rank = shape.size();
-
-  // Define loops and optimized loops.
-  std::vector<Value> optimizedLoops;
-  optimizedLoopsOp =
-      emitOptimizedLoops(rewriter, loc, originalLoops, optimizedLoops, rank);
-
-  KrnlIterateOperandPack pack(rewriter, originalLoops, optimizedLoops);
-  // Iterate over the loop nest.
-  for (int i = 0; i < rank; ++i)
-    addDimensionToPack(rewriter, loc, pack, operand, i);
-
-  iterateOp = rewriter.create<KrnlIterateOp>(loc, pack);
-}
-
-unsigned getMemRefEltSizeInBytes(MemRefType memRefType) {
-  auto elementType = memRefType.getElementType();
-
-  unsigned sizeInBits;
-  if (elementType.isIntOrFloat()) {
-    sizeInBits = elementType.getIntOrFloatBitWidth();
-  } else {
-    auto vectorType = elementType.cast<VectorType>();
-    sizeInBits =
-        vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
-  }
-  return llvm::divideCeil(sizeInBits, 8);
-}
-
-// Get run-time dimension information for unknown dimensions used for
-// broadcasting.
-std::map<int, std::map<int, Value>>
-getBroadcastedDimInfo(Location loc, ConversionPatternRewriter &rewriter,
-                      MemRefType memRefType, ArrayRef<Value> operands) {
-  auto memRefShape = memRefType.getShape();
-  int64_t rank = memRefShape.size();
-  // For unknown dimensions, we need to get dimension values at runtime in
-  // order to do broadcasting.
-  std::map<int, std::map<int, Value>> DimInfo;
-  // For each result dimension, compute the number of sharing operands.
-  // Sharing operands are operands sharing the same index (counting from the
-  // rightmost to the leftmost) for a given dimension.
-  std::map<int, int> sharedDimCount;
-  for (int reversedIdx = 0; reversedIdx < rank; ++reversedIdx) {
-    int dimIdx = rank - 1 - reversedIdx;
-    sharedDimCount[dimIdx] = 0;
-    for (int i = 0; i < operands.size(); ++i) {
-      auto shape = operands[i].getType().cast<MemRefType>().getShape();
-      if (reversedIdx <= shape.size() - 1)
-        sharedDimCount[dimIdx]++;
-    }
-  }
-  // An unknown dimension can have a value of 1 or N (N > 1).
-  // If its value is 1, it is broadcasted dimension.
-  // Otherwise, non-broadcasted dimension.
-  // We only care about unknown dimensions whose number of sharing operands is
-  // more than one, since they are potentially broadcasted dimensions.
-  for (int i = 0; i < operands.size(); ++i) {
-    std::map<int, Value> broadcastedDims;
-    auto shape = operands[i].getType().cast<MemRefType>().getShape();
-    int size = shape.size();
-    for (int j = 0; j < shape.size(); ++j) {
-      if (shape[j] < 0 and sharedDimCount[rank - size + j] > 1) {
-        auto dim = rewriter.create<DimOp>(loc, operands[i], j).getResult();
-        auto one = rewriter.create<ConstantIndexOp>(loc, 1);
-        auto isBroadcasted =
-            rewriter.create<CmpIOp>(loc, CmpIPredicate::eq, dim, one);
-        broadcastedDims.insert(std::make_pair(j, isBroadcasted));
-      }
-    }
-    DimInfo.insert(std::make_pair(i, broadcastedDims));
-  }
-  return DimInfo;
-}
-
-// Extract induction variables that are used for broadcasting values of a
-// given operand.
-std::vector<Value>
-getLoopIVsForBroadcasting(Location loc, ConversionPatternRewriter &rewriter,
-                          ArrayRef<Value> loopIVs, Value operand,
-                          std::map<int, Value> broadcastedDims) {
-  // `operand` must has a ranked type. This should have been checked by the
-  // shape inference pass.
-  auto operandShape = operand.getType().cast<MemRefType>().getShape();
-  auto rank = operandShape.size();
-  auto loopCount = loopIVs.size();
-
-  std::vector<Value> newLoopIVs;
-  for (unsigned reversedIdx = 0; reversedIdx < rank; ++reversedIdx) {
-    auto dimIdx = rank - 1 - reversedIdx;
-    auto loopIdx = loopCount - 1 - reversedIdx;
-    if (operandShape[dimIdx] == 1) {
-      // Broadcasted dimension
-      auto zero = rewriter.create<ConstantIndexOp>(loc, 0);
-      newLoopIVs.insert(newLoopIVs.begin(), zero);
-    } else if ((operandShape[dimIdx] == -1) &&
-               (broadcastedDims.find(dimIdx) != broadcastedDims.end())) {
-      // Unknown dimension, it can have a value of 1 or N (N > 1).
-      // If its value is 1, it is broadcasted dimension.
-      // Otherwise, non-broadcasted dimension.
-      auto zero = rewriter.create<ConstantIndexOp>(loc, 0);
-      auto idx = rewriter.create<SelectOp>(loc, broadcastedDims[dimIdx], zero,
-                                           loopIVs[loopIdx]);
-      newLoopIVs.insert(newLoopIVs.begin(), idx);
-    } else {
-      // Non-broadcasted dimension
-      newLoopIVs.insert(newLoopIVs.begin(), loopIVs[loopIdx]);
-    }
-  }
-  return newLoopIVs;
-}
-
-namespace {
-
-// This is to get a scalar operation of a given type for a specific operation.
-template <typename Op>
-struct ScalarOp {
-  using FOp = void;
-  using IOp = void;
-};
-
-template <typename FOp>
-using ScalarFOp = typename ScalarOp<FOp>::FOp;
-template <typename IOp>
-using ScalarIOp = typename ScalarOp<IOp>::IOp;
-
-// Get the identity element of a operation.
-// Return NULL if the function does not have identity.
-template <typename DataType, typename Op>
-DataType getIdentityValue() {
-  return NULL;
-}
-
-//===----------------------------------------------------------------------===//
-// This is used in the innermost loop of a KrnlIterateOp to insert computation
-// composed of one or many scalar ops.
-// Use template specialization for each of different ONNX operations.
-//===----------------------------------------------------------------------===//
-template <typename Op>
-Value mapToLowerScalarOp(Operation *op, ArrayRef<Type> result_types,
-                         ArrayRef<Value> operands,
-                         ConversionPatternRewriter &rewriter) {
-  auto loc = op->getLoc();
-  Type element_type = operands.front().getType();
-  if (element_type.isa<IntegerType>()) {
-    return rewriter.create<ScalarIOp<Op>>(loc, result_types, operands,
-                                          mlir::None);
-  } else if (element_type.isa<FloatType>()) {
-    return rewriter.create<ScalarFOp<Op>>(loc, result_types, operands,
-                                          mlir::None);
-  } else {
-    emitError(loc, "unsupported element type");
-    return nullptr;
-  }
-}
-
-// We divide the operator lowering into different categories.
-// These categories are mostly similar to the operator categories in ONNX:
-// https://github.com/onnx/onnx/tree/master/onnx/defs.
-// Besides, it is better to put operators with the same computation pattern into
-// the same category, e.g. element-wise operators will belong to the elementwise
-// category.
-
-// Math
-#include "src/conversion/onnx_to_krnl/rewrite_patterns/math/elementwise.inc"
-#include "src/conversion/onnx_to_krnl/rewrite_patterns/math/gemm.inc"
-#include "src/conversion/onnx_to_krnl/rewrite_patterns/math/reduction.inc"
-#include "src/conversion/onnx_to_krnl/rewrite_patterns/math/softmax.inc"
-#include "src/conversion/onnx_to_krnl/rewrite_patterns/math/matmul.inc"
-// Tensor
-#include "src/conversion/onnx_to_krnl/rewrite_patterns/tensor/identity.inc"
-#include "src/conversion/onnx_to_krnl/rewrite_patterns/tensor/reshape.inc"
-#include "src/conversion/onnx_to_krnl/rewrite_patterns/tensor/transpose.inc"
-#include "src/conversion/onnx_to_krnl/rewrite_patterns/tensor/unsqueeze.inc"
-// Neural network
-#include "src/conversion/onnx_to_krnl/rewrite_patterns/nn/conv.inc"
-#include "src/conversion/onnx_to_krnl/rewrite_patterns/nn/normalization.inc"
-
 //===----------------------------------------------------------------------===//
 // EntryPoint Op lowering to Krnl Entry Point.
 //===----------------------------------------------------------------------===//
@@ -427,39 +34,6 @@ public:
   }
 };
 
-//===----------------------------------------------------------------------===//
-// Conversion from Tensor type to the Standard dialect MemRef type.
-//===----------------------------------------------------------------------===//
-
-struct TensorTypeConverter : public TypeConverter {
-  using TypeConverter::TypeConverter;
-
-  TensorTypeConverter() {
-    addConversion(convertType);
-  }
-
-  static LogicalResult convertType(Type t, SmallVectorImpl<Type> &results) {
-    if (auto type = convertToMemRefType(t)) {
-      results.push_back(type);
-      return success();
-    }
-
-    results.push_back(t);
-    return success();
-  }
-
-  /// Return true if the inputs and outputs of the given function type are
-  /// legal. [Taken from MLIR and adapted to only check the legality of the
-  /// inputs. Once unranked results can be handled gracefully this
-  /// override needs to be removed in favour of the original MLIR one.]
-  bool isSignatureLegal(FunctionType funcType) {
-    return llvm::all_of(funcType.getInputs(),
-                        [this](Type type) { return isLegal(type); });
-  }
-};
-
-} // end anonymous namespace.
-
 //===----------------------------------------------------------------------===//
 // Frontend to Krnl Dialect lowering pass
 //===----------------------------------------------------------------------===//
diff --git a/src/conversion/onnx_to_krnl/rewrite_patterns/math/elementwise.inc b/src/conversion/onnx_to_krnl/math/elementwise.cpp
similarity index 99%
rename from src/conversion/onnx_to_krnl/rewrite_patterns/math/elementwise.inc
rename to src/conversion/onnx_to_krnl/math/elementwise.cpp
index 945d4da..b397281 100644
--- a/src/conversion/onnx_to_krnl/rewrite_patterns/math/elementwise.inc
+++ b/src/conversion/onnx_to_krnl/math/elementwise.cpp
@@ -1,4 +1,4 @@
-//===----- elementwise.inc - Elementwise Ops ------------------------------===//
+//===----- elementwise.cpp - Elementwise Ops ------------------------------===//
 //
 // Copyright 2019 The IBM Research Authors.
 //
@@ -8,6 +8,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp"
+
+using namespace mlir;
+
 template <>
 struct ScalarOp<ONNXAddOp> {
   using FOp = AddFOp;
diff --git a/src/conversion/onnx_to_krnl/rewrite_patterns/math/gemm.inc b/src/conversion/onnx_to_krnl/math/gemm.cpp
similarity index 98%
rename from src/conversion/onnx_to_krnl/rewrite_patterns/math/gemm.inc
rename to src/conversion/onnx_to_krnl/math/gemm.cpp
index ee395b5..0eed272 100644
--- a/src/conversion/onnx_to_krnl/rewrite_patterns/math/gemm.inc
+++ b/src/conversion/onnx_to_krnl/math/gemm.cpp
@@ -1,4 +1,4 @@
-//===----- gemm.inc - Lowering Gemm Op ------------------------------------===//
+//===----- gemm.cpp - Lowering Gemm Op ------------------------------------===//
 //
 // Copyright 2019 The IBM Research Authors.
 //
@@ -8,6 +8,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp"
+
+using namespace mlir;
+
 template <typename GemmOp>
 struct ONNXGemmOpLowering : public ConversionPattern {
   ONNXGemmOpLowering(MLIRContext *ctx)
diff --git a/src/conversion/onnx_to_krnl/rewrite_patterns/math/matmul.inc b/src/conversion/onnx_to_krnl/math/matmul.cpp
similarity index 98%
rename from src/conversion/onnx_to_krnl/rewrite_patterns/math/matmul.inc
rename to src/conversion/onnx_to_krnl/math/matmul.cpp
index 1af1f1b..a3cb26a 100644
--- a/src/conversion/onnx_to_krnl/rewrite_patterns/math/matmul.inc
+++ b/src/conversion/onnx_to_krnl/math/matmul.cpp
@@ -1,4 +1,4 @@
-//===----- matmul.inc - Lowering Matmul Op --------------------------------===//
+//===----- matmul.cpp - Lowering Matmul Op --------------------------------===//
 //
 // Copyright 2019 The IBM Research Authors.
 //
@@ -8,6 +8,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp"
+
+using namespace mlir;
+
 struct ONNXMatMulOpLowering : public ConversionPattern {
   ONNXMatMulOpLowering(MLIRContext *ctx)
       : ConversionPattern(mlir::ONNXMatMulOp::getOperationName(), 1, ctx) {}
diff --git a/src/conversion/onnx_to_krnl/rewrite_patterns/math/reduction.inc b/src/conversion/onnx_to_krnl/math/reduction.cpp
similarity index 98%
rename from src/conversion/onnx_to_krnl/rewrite_patterns/math/reduction.inc
rename to src/conversion/onnx_to_krnl/math/reduction.cpp
index 9b94861..42b074a 100644
--- a/src/conversion/onnx_to_krnl/rewrite_patterns/math/reduction.inc
+++ b/src/conversion/onnx_to_krnl/math/reduction.cpp
@@ -1,4 +1,4 @@
-//===----- reduction.inc - Lowering Reduction Ops -------------------------===//
+//===----- reduction.cpp - Lowering Reduction Ops -------------------------===//
 //
 // Copyright 2019 The IBM Research Authors.
 //
@@ -8,6 +8,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp"
+
+using namespace mlir;
+
 // Identity values
 template <>
 float getIdentityValue<float, ONNXReduceMaxOp>(){
diff --git a/src/conversion/onnx_to_krnl/rewrite_patterns/math/softmax.inc b/src/conversion/onnx_to_krnl/math/softmax.cpp
similarity index 98%
rename from src/conversion/onnx_to_krnl/rewrite_patterns/math/softmax.inc
rename to src/conversion/onnx_to_krnl/math/softmax.cpp
index 3f24a6e..3277635 100644
--- a/src/conversion/onnx_to_krnl/rewrite_patterns/math/softmax.inc
+++ b/src/conversion/onnx_to_krnl/math/softmax.cpp
@@ -1,4 +1,4 @@
-//===----- softmax.inc - Softmax Op ---------------------------------------===//
+//===----- softmax.cpp - Softmax Op ---------------------------------------===//
 //
 // Copyright 2019 The IBM Research Authors.
 //
@@ -8,6 +8,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp"
+
+using namespace mlir;
+
 struct ONNXSoftmaxOpLowering : public ConversionPattern {
   ONNXSoftmaxOpLowering(MLIRContext *ctx)
       : ConversionPattern(mlir::ONNXSoftmaxOp::getOperationName(), 1, ctx) {}
diff --git a/src/conversion/onnx_to_krnl/rewrite_patterns/nn/conv.inc b/src/conversion/onnx_to_krnl/nn/conv.cpp
similarity index 98%
rename from src/conversion/onnx_to_krnl/rewrite_patterns/nn/conv.inc
rename to src/conversion/onnx_to_krnl/nn/conv.cpp
index 6e3afe1..851668a 100644
--- a/src/conversion/onnx_to_krnl/rewrite_patterns/nn/conv.inc
+++ b/src/conversion/onnx_to_krnl/nn/conv.cpp
@@ -1,4 +1,4 @@
-//===----- conv.inc - Lowering Convolution Op -----------------------------===//
+//===----- conv.cpp - Lowering Convolution Op -----------------------------===//
 //
 // Copyright 2019 The IBM Research Authors.
 //
@@ -8,6 +8,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp"
+
+using namespace mlir;
+
 struct ONNXConvNoBiasOpLowering : public ConversionPattern {
   ONNXConvNoBiasOpLowering(MLIRContext *ctx)
       : ConversionPattern(mlir::ONNXConvNoBiasOp::getOperationName(), 1, ctx) {}
diff --git a/src/conversion/onnx_to_krnl/rewrite_patterns/nn/normalization.inc b/src/conversion/onnx_to_krnl/nn/normalization.cpp
similarity index 97%
rename from src/conversion/onnx_to_krnl/rewrite_patterns/nn/normalization.inc
rename to src/conversion/onnx_to_krnl/nn/normalization.cpp
index cb98b13..d151f0a 100644
--- a/src/conversion/onnx_to_krnl/rewrite_patterns/nn/normalization.inc
+++ b/src/conversion/onnx_to_krnl/nn/normalization.cpp
@@ -1,4 +1,4 @@
-//===----- normalization.inc - Lowering Normalization Ops -----------------===//
+//===----- normalization.cpp - Lowering Normalization Ops -----------------===//
 //
 // Copyright 2019 The IBM Research Authors.
 //
@@ -8,6 +8,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp"
+
+using namespace mlir;
+
 struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern {
   ONNXBatchNormalizationTestModeOpLowering(MLIRContext *ctx)
       : ConversionPattern(
diff --git a/src/conversion/onnx_to_krnl/onnx_to_krnl_common.cpp b/src/conversion/onnx_to_krnl/onnx_to_krnl_common.cpp
new file mode 100644
index 0000000..16bc499
--- /dev/null
+++ b/src/conversion/onnx_to_krnl/onnx_to_krnl_common.cpp
@@ -0,0 +1,324 @@
+//====-- onnx_to_krnl_common.cpp - ONNX dialects to Krnl lowering ---------===//
+//
+// Copyright 2019 The IBM Research Authors.
+//
+// =============================================================================
+//
+// This file contains common code shared by the functions performing the
+// lowering to the KRNL dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp"
+
+/// Check is all dimensions are known at compile time.
+bool hasAllConstantDimensions(MemRefType type) {
+  auto memRefShape = type.getShape();
+  for (int i = 0; i < memRefShape.size(); ++i)
+    if (memRefShape[i] < 0)
+      return false;
+  return true;
+}
+
+/// Get the corresponding MemRefType of a given TensorType/MemRefType.
+MemRefType convertToMemRefType(Type type) {
+  MemRefType memRefType;
+  auto tensorType = type.dyn_cast<TensorType>();
+  if (tensorType) {
+    assert(tensorType.hasRank() && "expected only ranked shapes");
+    memRefType =
+        MemRefType::get(tensorType.getShape(), tensorType.getElementType());
+  } else {
+    memRefType = type.dyn_cast<MemRefType>();
+  }
+  return memRefType;
+}
+
+/// Insert an allocation and deallocation for the given MemRefType.
+Value insertAllocAndDealloc(MemRefType type, Location loc,
+                                   PatternRewriter &rewriter,
+                                   bool insertDealloc,
+                                   ArrayRef<Value> operands) {
+  // Put together alloc operands for any dynamic dimensions of the memref.
+  AllocOp alloc;
+  if (!operands.empty()) {
+    auto memRefShape = type.getShape();
+    auto rank = memRefShape.size();
+
+    std::map<int, Value> fromOperands;
+    for (int reversedIdx = 0; reversedIdx < rank; ++reversedIdx) {
+      int memRefDimIdx = rank - 1 - reversedIdx;
+      if (memRefShape[memRefDimIdx] < 0) { // unknown dimension
+        Value maxDim = nullptr;
+        for (int i = 0; i < operands.size(); i++) {
+          auto operandShape =
+              operands[i].getType().cast<MemRefType>().getShape();
+          int operandDimIdx = operandShape.size() - 1 - reversedIdx;
+
+          if (operandDimIdx < 0)
+            continue;
+
+          // In case of operations with broadcasting, the dimension of the
+          // alloc result is the maximum size along each dimension of the
+          // operands.
+          auto operandDim =
+              rewriter.create<DimOp>(loc, operands[i], operandDimIdx);
+          if (maxDim) {
+            auto maxCondition = rewriter.create<CmpIOp>(loc, CmpIPredicate::sgt,
+                                                        operandDim, maxDim);
+            maxDim = rewriter.create<SelectOp>(loc, maxCondition, operandDim,
+                                               maxDim);
+          } else {
+            maxDim = operandDim;
+          }
+        }
+        fromOperands.insert(std::make_pair(memRefDimIdx, maxDim));
+      }
+    }
+
+    SmallVector<Value, 4> allocOperands;
+    for (int i = 0; i < rank; ++i)
+      if (memRefShape[i] < 0)
+        allocOperands.push_back(fromOperands[i]);
+    alloc = rewriter.create<AllocOp>(loc, type, allocOperands);
+  } else {
+    alloc = rewriter.create<AllocOp>(loc, type);
+  }
+
+  // Make sure to allocate at the beginning of the block if
+  // all dimensions are known.
+  auto *parentBlock = alloc.getOperation()->getBlock();
+  if (hasAllConstantDimensions(type))
+    alloc.getOperation()->moveBefore(&parentBlock->front());
+
+  if (insertDealloc) {
+    auto dealloc = rewriter.create<DeallocOp>(loc, alloc);
+    dealloc.getOperation()->moveBefore(&parentBlock->back());
+  }
+
+  return alloc;
+}
+
+// Determine if current function returns the result value of the
+// current op being lowered. If it does then dealloc should not be
+// inserted.
+bool checkInsertDealloc(Operation *currentOp) {
+  auto parentBlock = currentOp->getBlock();
+
+  bool insertDealloc = true;
+  parentBlock->walk([&insertDealloc, currentOp](ReturnOp op) {
+    assert(currentOp->getNumResults() < 2 &&
+           "No more than one result supported (for now).");
+    // If there is at least one result to investigate.
+    if (currentOp->getNumResults() > 0) {
+      auto result = currentOp->getResult(0);
+      for (const auto &operand : op.getOperands())
+        if (operand == result)
+          insertDealloc = false;
+    }
+  });
+
+  return insertDealloc;
+}
+
+// Create a mapping from result type's dimensions to input type's dimensions,
+// given that the result type is the result of a reduction op over the input
+// type.
+std::map<int64_t, int64_t>
+getReductionMapping(MemRefType inputTy, ArrayRef<int64_t> axes, bool keepdims) {
+  std::map<int64_t, int64_t> OutInDimMap;
+  int64_t rank = inputTy.getRank();
+
+  // Mark reduction axes.
+  std::vector<bool> isReductionAxis;
+  for (decltype(rank) i = 0; i < rank; ++i) {
+    if (std::find(axes.begin(), axes.end(), i) != axes.end())
+      isReductionAxis.push_back(true);
+    else
+      isReductionAxis.push_back(false);
+  }
+
+  for (decltype(rank) inIndex = 0, outIndex = 0; inIndex < rank; ++inIndex) {
+    // If it is a reduction axis, there is no relationship among dimensions.
+    if (isReductionAxis[inIndex]) {
+      if (keepdims)
+        outIndex++;
+    } else {
+      OutInDimMap.insert(std::make_pair(outIndex, inIndex));
+      outIndex++;
+    }
+  }
+
+  return OutInDimMap;
+}
+
+// Add bounds associated with the op operand to the KRNL iteration pack.
+// Dynamic dimenions are supported.
+void addDimensionToPack(ConversionPatternRewriter &rewriter,
+                               Location loc, KrnlIterateOperandPack &pack,
+                               Value operand, int index) {
+  auto shape = operand.getType().cast<MemRefType>().getShape();
+  if (shape[index] < 0) {
+    pack.pushConstantBound(0);
+    pack.pushOperandBound(
+        rewriter.create<DimOp>(loc, operand, index).getResult());
+  } else {
+    pack.pushConstantBound(0);
+    pack.pushConstantBound(shape[index]);
+  }
+}
+
+// Function that defines the KRNL dialect loops and their respective
+// optimized version.
+KrnlOptimizeLoopsOp
+emitOptimizedLoops(ConversionPatternRewriter &rewriter, Location loc,
+                   std::vector<Value> &loops,
+                   std::vector<Value> &optimizedLoops, int64_t numLoops) {
+  // Define loops.
+  auto loopsOp = rewriter.create<KrnlDefineLoopsOp>(loc, numLoops);
+  loops.reserve(numLoops);
+  for (auto result : loopsOp.getResults())
+    loops.push_back(result);
+
+  // Define optimized version of the loops.
+  auto optimizedLoopsOp = rewriter.create<KrnlOptimizeLoopsOp>(loc, numLoops);
+  optimizedLoops.reserve(numLoops);
+  for (auto result : optimizedLoopsOp.getResults())
+    optimizedLoops.push_back(result);
+
+  return optimizedLoopsOp;
+}
+
+// Function that emits the loops and their optimized version.
+// The function returns a reference to the inner optimization block.
+Block *defineLoops(ConversionPatternRewriter &rewriter, Location loc,
+                          std::vector<Value> &loops,
+                          std::vector<Value> &optimizedLoops,
+                          int64_t numLoops) {
+  KrnlOptimizeLoopsOp optimizedLoopsOp =
+      emitOptimizedLoops(rewriter, loc, loops, optimizedLoops, numLoops);
+  return &optimizedLoopsOp.region().front();
+}
+
+// Function which emits a basic set of loops and optimized loops
+// for a given operation argument. A reference to the loop optimization
+// block is returned in the last argument of the function.
+void emitKrnlLoopsAndIterationForOperand(
+    ConversionPatternRewriter &rewriter, Location loc, Value operand,
+    std::vector<Value> &originalLoops, KrnlOptimizeLoopsOp &optimizedLoopsOp,
+    KrnlIterateOp &iterateOp) {
+  // Operand shape.
+  auto shape = operand.getType().cast<MemRefType>().getShape();
+
+  // Number of loops.
+  int64_t rank = shape.size();
+
+  // Define loops and optimized loops.
+  std::vector<Value> optimizedLoops;
+  optimizedLoopsOp =
+      emitOptimizedLoops(rewriter, loc, originalLoops, optimizedLoops, rank);
+
+  KrnlIterateOperandPack pack(rewriter, originalLoops, optimizedLoops);
+  // Iterate over the loop nest.
+  for (int i = 0; i < rank; ++i)
+    addDimensionToPack(rewriter, loc, pack, operand, i);
+
+  iterateOp = rewriter.create<KrnlIterateOp>(loc, pack);
+}
+
+unsigned getMemRefEltSizeInBytes(MemRefType memRefType) {
+  auto elementType = memRefType.getElementType();
+
+  unsigned sizeInBits;
+  if (elementType.isIntOrFloat()) {
+    sizeInBits = elementType.getIntOrFloatBitWidth();
+  } else {
+    auto vectorType = elementType.cast<VectorType>();
+    sizeInBits =
+        vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
+  }
+  return llvm::divideCeil(sizeInBits, 8);
+}
+
+// Get run-time dimension information for unknown dimensions used for
+// broadcasting.
+std::map<int, std::map<int, Value>>
+getBroadcastedDimInfo(Location loc, ConversionPatternRewriter &rewriter,
+                      MemRefType memRefType, ArrayRef<Value> operands) {
+  auto memRefShape = memRefType.getShape();
+  int64_t rank = memRefShape.size();
+  // For unknown dimensions, we need to get dimension values at runtime in
+  // order to do broadcasting.
+  std::map<int, std::map<int, Value>> DimInfo;
+  // For each result dimension, compute the number of sharing operands.
+  // Sharing operands are operands sharing the same index (counting from the
+  // rightmost to the leftmost) for a given dimension.
+  std::map<int, int> sharedDimCount;
+  for (int reversedIdx = 0; reversedIdx < rank; ++reversedIdx) {
+    int dimIdx = rank - 1 - reversedIdx;
+    sharedDimCount[dimIdx] = 0;
+    for (int i = 0; i < operands.size(); ++i) {
+      auto shape = operands[i].getType().cast<MemRefType>().getShape();
+      if (reversedIdx <= shape.size() - 1)
+        sharedDimCount[dimIdx]++;
+    }
+  }
+  // An unknown dimension can have a value of 1 or N (N > 1).
+  // If its value is 1, it is broadcasted dimension.
+  // Otherwise, non-broadcasted dimension.
+  // We only care about unknown dimensions whose number of sharing operands is
+  // more than one, since they are potentially broadcasted dimensions.
+  for (int i = 0; i < operands.size(); ++i) {
+    std::map<int, Value> broadcastedDims;
+    auto shape = operands[i].getType().cast<MemRefType>().getShape();
+    int size = shape.size();
+    for (int j = 0; j < shape.size(); ++j) {
+      if (shape[j] < 0 and sharedDimCount[rank - size + j] > 1) {
+        auto dim = rewriter.create<DimOp>(loc, operands[i], j).getResult();
+        auto one = rewriter.create<ConstantIndexOp>(loc, 1);
+        auto isBroadcasted =
+            rewriter.create<CmpIOp>(loc, CmpIPredicate::eq, dim, one);
+        broadcastedDims.insert(std::make_pair(j, isBroadcasted));
+      }
+    }
+    DimInfo.insert(std::make_pair(i, broadcastedDims));
+  }
+  return DimInfo;
+}
+
+// Extract induction variables that are used for broadcasting values of a
+// given operand.
+std::vector<Value>
+getLoopIVsForBroadcasting(Location loc, ConversionPatternRewriter &rewriter,
+                          ArrayRef<Value> loopIVs, Value operand,
+                          std::map<int, Value> broadcastedDims) {
+  // `operand` must has a ranked type. This should have been checked by the
+  // shape inference pass.
+  auto operandShape = operand.getType().cast<MemRefType>().getShape();
+  auto rank = operandShape.size();
+  auto loopCount = loopIVs.size();
+
+  std::vector<Value> newLoopIVs;
+  for (unsigned reversedIdx = 0; reversedIdx < rank; ++reversedIdx) {
+    auto dimIdx = rank - 1 - reversedIdx;
+    auto loopIdx = loopCount - 1 - reversedIdx;
+    if (operandShape[dimIdx] == 1) {
+      // Broadcasted dimension
+      auto zero = rewriter.create<ConstantIndexOp>(loc, 0);
+      newLoopIVs.insert(newLoopIVs.begin(), zero);
+    } else if ((operandShape[dimIdx] == -1) &&
+               (broadcastedDims.find(dimIdx) != broadcastedDims.end())) {
+      // Unknown dimension, it can have a value of 1 or N (N > 1).
+      // If its value is 1, it is broadcasted dimension.
+      // Otherwise, non-broadcasted dimension.
+      auto zero = rewriter.create<ConstantIndexOp>(loc, 0);
+      auto idx = rewriter.create<SelectOp>(loc, broadcastedDims[dimIdx], zero,
+                                           loopIVs[loopIdx]);
+      newLoopIVs.insert(newLoopIVs.begin(), idx);
+    } else {
+      // Non-broadcasted dimension
+      newLoopIVs.insert(newLoopIVs.begin(), loopIVs[loopIdx]);
+    }
+  }
+  return newLoopIVs;
+}
diff --git a/src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp b/src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp
new file mode 100644
index 0000000..bd22d95
--- /dev/null
+++ b/src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp
@@ -0,0 +1,217 @@
+//====-- onnx_to_krnl_common.hpp - ONNX dialects to Krnl lowering ---------===//
+//
+// Copyright 2019 The IBM Research Authors.
+//
+// =============================================================================
+//
+// This file contains common code shared by the functions performing the
+// lowering to the KRNL dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <map>
+
+#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/StandardOps/Ops.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Sequence.h"
+#include "mlir/IR/PatternMatch.h"
+
+#include "src/dialect/krnl/krnl_helper.hpp"
+#include "src/dialect/krnl/krnl_ops.hpp"
+#include "src/dialect/onnx/onnx_ops.hpp"
+#include "src/pass/passes.hpp"
+
+using namespace mlir;
+
+//===----------------------------------------------------------------------===//
+// Common functions used when lowering the ONNX frontend dialect to KRNL.
+//===----------------------------------------------------------------------===//
+
+/// Check is all dimensions are known at compile time.
+bool hasAllConstantDimensions(MemRefType type);
+
+/// Get the corresponding MemRefType of a given TensorType/MemRefType.
+MemRefType convertToMemRefType(Type type);
+
+/// Insert an allocation and deallocation for the given MemRefType.
+Value insertAllocAndDealloc(MemRefType type, Location loc,
+                                   PatternRewriter &rewriter,
+                                   bool insertDealloc,
+                                   ArrayRef<Value> operands = {});
+
+// Determine if current function returns the result value of the
+// current op being lowered. If it does then dealloc should not be
+// inserted.
+bool checkInsertDealloc(Operation *currentOp);
+
+// Create a mapping from result type's dimensions to input type's dimensions,
+// given that the result type is the result of a reduction op over the input
+// type.
+std::map<int64_t, int64_t>
+getReductionMapping(MemRefType inputTy, ArrayRef<int64_t> axes, bool keepdims);
+
+// Add bounds associated with the op operand to the KRNL iteration pack.
+// Dynamic dimenions are supported.
+void addDimensionToPack(ConversionPatternRewriter &rewriter,
+                               Location loc, KrnlIterateOperandPack &pack,
+                               Value operand, int index);
+
+// Function that defines the KRNL dialect loops and their respective
+// optimized version.
+KrnlOptimizeLoopsOp
+emitOptimizedLoops(ConversionPatternRewriter &rewriter, Location loc,
+                   std::vector<Value> &loops,
+                   std::vector<Value> &optimizedLoops, int64_t numLoops);
+
+// Function that emits the loops and their optimized version.
+// The function returns a reference to the inner optimization block.
+Block *defineLoops(ConversionPatternRewriter &rewriter, Location loc,
+                          std::vector<Value> &loops,
+                          std::vector<Value> &optimizedLoops,
+                          int64_t numLoops);
+
+// Function which emits a basic set of loops and optimized loops
+// for a given operation argument. A reference to the loop optimization
+// block is returned in the last argument of the function.
+void emitKrnlLoopsAndIterationForOperand(
+    ConversionPatternRewriter &rewriter, Location loc, Value operand,
+    std::vector<Value> &originalLoops, KrnlOptimizeLoopsOp &optimizedLoopsOp,
+    KrnlIterateOp &iterateOp);
+
+unsigned getMemRefEltSizeInBytes(MemRefType memRefType);
+
+// Get run-time dimension information for unknown dimensions used for
+// broadcasting.
+std::map<int, std::map<int, Value>>
+getBroadcastedDimInfo(Location loc, ConversionPatternRewriter &rewriter,
+                      MemRefType memRefType, ArrayRef<Value> operands);
+
+// Extract induction variables that are used for broadcasting values of a
+// given operand.
+std::vector<Value>
+getLoopIVsForBroadcasting(Location loc, ConversionPatternRewriter &rewriter,
+                          ArrayRef<Value> loopIVs, Value operand,
+                          std::map<int, Value> broadcastedDims);
+
+//===----------------------------------------------------------------------===//
+// This is to get a scalar operation of a given type for a specific operation.
+//===----------------------------------------------------------------------===//
+template <typename Op>
+struct ScalarOp {
+  using FOp = void;
+  using IOp = void;
+};
+
+template <typename FOp>
+using ScalarFOp = typename ScalarOp<FOp>::FOp;
+template <typename IOp>
+using ScalarIOp = typename ScalarOp<IOp>::IOp;
+
+// Get the identity element of a operation.
+// Return NULL if the function does not have identity.
+template <typename DataType, typename Op>
+DataType getIdentityValue() {
+  return NULL;
+}
+
+//===----------------------------------------------------------------------===//
+// This is used in the innermost loop of a KrnlIterateOp to insert computation
+// composed of one or many scalar ops.
+// Use template specialization for each of different ONNX operations.
+//===----------------------------------------------------------------------===//
+template <typename Op>
+Value mapToLowerScalarOp(Operation *op, ArrayRef<Type> result_types,
+                         ArrayRef<Value> operands,
+                         ConversionPatternRewriter &rewriter) {
+  auto loc = op->getLoc();
+  Type element_type = operands.front().getType();
+  if (element_type.isa<IntegerType>()) {
+    return rewriter.create<ScalarIOp<Op>>(loc, result_types, operands,
+                                          mlir::None);
+  } else if (element_type.isa<FloatType>()) {
+    return rewriter.create<ScalarFOp<Op>>(loc, result_types, operands,
+                                          mlir::None);
+  } else {
+    emitError(loc, "unsupported element type");
+    return nullptr;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Conversion from Tensor type to the Standard dialect MemRef type.
+//===----------------------------------------------------------------------===//
+
+struct TensorTypeConverter : public TypeConverter {
+  using TypeConverter::TypeConverter;
+
+  TensorTypeConverter() {
+    addConversion(convertType);
+  }
+
+  static LogicalResult convertType(Type t, SmallVectorImpl<Type> &results) {
+    if (auto type = convertToMemRefType(t)) {
+      results.push_back(type);
+      return success();
+    }
+
+    results.push_back(t);
+    return success();
+  }
+
+  /// Return true if the inputs and outputs of the given function type are
+  /// legal. [Taken from MLIR and adapted to only check the legality of the
+  /// inputs. Once unranked results can be handled gracefully this
+  /// override needs to be removed in favour of the original MLIR one.]
+  bool isSignatureLegal(FunctionType funcType) {
+    return llvm::all_of(funcType.getInputs(),
+                        [this](Type type) { return isLegal(type); });
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Functions to add lowering patterns for frontend operations.
+//===----------------------------------------------------------------------===//
+
+// `math` directory methods:
+
+void populateLoweringONNXElementwiseOpPattern(
+    OwningRewritePatternList &patterns, MLIRContext *ctx);
+
+void populateLoweringONNXGemmOpPattern(OwningRewritePatternList &patterns,
+                                       MLIRContext *ctx);
+
+void populateLoweringONNXMatMulOpPattern(
+    OwningRewritePatternList &patterns, MLIRContext *ctx);
+
+void populateLoweringONNXReductionOpPattern(
+    OwningRewritePatternList &patterns, MLIRContext *ctx);
+
+void populateLoweringONNXSoftmaxOpPattern(
+    OwningRewritePatternList &patterns, MLIRContext *ctx);
+
+// `nn` directory methods:
+
+void populateLoweringONNXConvOpPattern(
+    OwningRewritePatternList &patterns, MLIRContext *ctx);
+
+void populateLoweringONNXNormalizationOpPattern(
+    OwningRewritePatternList &patterns, MLIRContext *ctx);
+
+// `tensor` directory methods:
+
+void populateLoweringONNXUnsqueezeOpPattern(
+    OwningRewritePatternList &patterns, MLIRContext *ctx);
+
+void populateLoweringONNXTransposeOpPattern(
+    OwningRewritePatternList &patterns, MLIRContext *ctx);
+
+void populateLoweringONNXReshapeOpPattern(
+    OwningRewritePatternList &patterns, MLIRContext *ctx);
+
+void populateLoweringONNXIdentityOpPattern(
+    OwningRewritePatternList &patterns, MLIRContext *ctx);
diff --git a/src/conversion/onnx_to_krnl/rewrite_patterns/tensor/identity.inc b/src/conversion/onnx_to_krnl/tensor/identity.cpp
similarity index 85%
rename from src/conversion/onnx_to_krnl/rewrite_patterns/tensor/identity.inc
rename to src/conversion/onnx_to_krnl/tensor/identity.cpp
index 2ff1633..45985af 100644
--- a/src/conversion/onnx_to_krnl/rewrite_patterns/tensor/identity.inc
+++ b/src/conversion/onnx_to_krnl/tensor/identity.cpp
@@ -1,4 +1,4 @@
-//===----- identity.inc - Lowering Identity Op ----------------------------===//
+//===----- identity.cpp - Lowering Identity Op ----------------------------===//
 //
 // Copyright 2019 The IBM Research Authors.
 //
@@ -8,6 +8,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp"
+
+using namespace mlir;
+
 struct ONNXIdentityOpLowering : public ConversionPattern {
   ONNXIdentityOpLowering(MLIRContext *ctx)
       : ConversionPattern(mlir::ONNXIdentityOp::getOperationName(), 1, ctx) {}
diff --git a/src/conversion/onnx_to_krnl/rewrite_patterns/tensor/reshape.inc b/src/conversion/onnx_to_krnl/tensor/reshape.cpp
similarity index 97%
rename from src/conversion/onnx_to_krnl/rewrite_patterns/tensor/reshape.inc
rename to src/conversion/onnx_to_krnl/tensor/reshape.cpp
index b64494f..6489a71 100644
--- a/src/conversion/onnx_to_krnl/rewrite_patterns/tensor/reshape.inc
+++ b/src/conversion/onnx_to_krnl/tensor/reshape.cpp
@@ -1,4 +1,4 @@
-//===----- reshape.inc - Lowering Reshape Op ------------------------------===//
+//===----- reshape.cpp - Lowering Reshape Op ------------------------------===//
 //
 // Copyright 2019 The IBM Research Authors.
 //
@@ -8,6 +8,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp"
+
+using namespace mlir;
+
 struct ONNXReshapeOpLowering : public ConversionPattern {
   ONNXReshapeOpLowering(MLIRContext *ctx)
       : ConversionPattern(mlir::ONNXReshapeOp::getOperationName(), 1, ctx) {}
diff --git a/src/conversion/onnx_to_krnl/rewrite_patterns/tensor/transpose.inc b/src/conversion/onnx_to_krnl/tensor/transpose.cpp
similarity index 96%
rename from src/conversion/onnx_to_krnl/rewrite_patterns/tensor/transpose.inc
rename to src/conversion/onnx_to_krnl/tensor/transpose.cpp
index 3bb897a..0a6c8f4 100644
--- a/src/conversion/onnx_to_krnl/rewrite_patterns/tensor/transpose.inc
+++ b/src/conversion/onnx_to_krnl/tensor/transpose.cpp
@@ -1,4 +1,4 @@
-//===----- transpose.inc - Lowering Transpose Op --------------------------===//
+//===----- transpose.cpp - Lowering Transpose Op --------------------------===//
 //
 // Copyright 2019 The IBM Research Authors.
 //
@@ -8,6 +8,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp"
+
+using namespace mlir;
+
 struct ONNXTransposeOpLowering : public ConversionPattern {
   ONNXTransposeOpLowering(MLIRContext *ctx)
       : ConversionPattern(mlir::ONNXTransposeOp::getOperationName(), 1, ctx) {}
diff --git a/src/conversion/onnx_to_krnl/rewrite_patterns/tensor/unsqueeze.inc b/src/conversion/onnx_to_krnl/tensor/unsqueeze.cpp
similarity index 95%
rename from src/conversion/onnx_to_krnl/rewrite_patterns/tensor/unsqueeze.inc
rename to src/conversion/onnx_to_krnl/tensor/unsqueeze.cpp
index 6d5289d..070a91c 100644
--- a/src/conversion/onnx_to_krnl/rewrite_patterns/tensor/unsqueeze.inc
+++ b/src/conversion/onnx_to_krnl/tensor/unsqueeze.cpp
@@ -1,4 +1,4 @@
-//===----- unsqueeze.inc - Lowering Unsqueeze Op --------------------------===//
+//===----- unsqueeze.cpp - Lowering Unsqueeze Op --------------------------===//
 //
 // Copyright 2019 The IBM Research Authors.
 //
@@ -8,6 +8,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/conversion/onnx_to_krnl/onnx_to_krnl_common.hpp"
+
+using namespace mlir;
+
 struct ONNXUnsqueezeOpLowering : public ConversionPattern {
   ONNXUnsqueezeOpLowering(MLIRContext *ctx)
       : ConversionPattern(mlir::ONNXUnsqueezeOp::getOperationName(), 1, ctx) {}

From e02aa877480960aebafbe1f3015e17ed44ce982a Mon Sep 17 00:00:00 2001
From: Tian Jin <tjingrant@gmail.com>
Date: Wed, 26 Feb 2020 00:18:37 +0800
Subject: [PATCH 08/10] Update gitignore file to ignore Filesystem artifacts
 and python related temporary files. (#103)

Co-authored-by: Gheorghe-Teodor Bercea <gt.bercea@gmail.com>
---
 .gitignore | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 142 insertions(+)

diff --git a/.gitignore b/.gitignore
index 259148f..7f8814f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,3 +30,145 @@
 *.exe
 *.out
 *.app
+
+# Filesystem
+.DS_Store
+
+# The following .gitignore content is taken from
+# https://github.com/github/gitignore/blob/master/Python.gitignore
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/

From 3b1c29c0785772781b92be7a0c32ad89e92e941f Mon Sep 17 00:00:00 2001
From: Alexandre Eichenberger <alexe@us.ibm.com>
Date: Tue, 25 Feb 2020 14:33:48 -0500
Subject: [PATCH 09/10] Using attribute setters for maxpool (#105)

* using attribute setters for maxpool

* fix typos, added handling of storage order, simplified code
---
 src/dialect/onnx/onnx_ops.cpp                 | 182 +++++++++---------
 .../onnx/onnx_shape_inference_maxpool.mlir    |  20 +-
 2 files changed, 104 insertions(+), 98 deletions(-)

diff --git a/src/dialect/onnx/onnx_ops.cpp b/src/dialect/onnx/onnx_ops.cpp
index 5d93020..3666318 100644
--- a/src/dialect/onnx/onnx_ops.cpp
+++ b/src/dialect/onnx/onnx_ops.cpp
@@ -24,12 +24,29 @@
 using namespace mlir;
 using namespace mlir::OpTrait::util;
 
+//===----------------------------------------------------------------------===//
+// ONNX Helper functions
+//===----------------------------------------------------------------------===//
+
+static size_t ArrayAttrSize(ArrayAttr a) { return a.size(); }
+
+static size_t ArrayAttrSize(Optional<ArrayAttr> a) {
+  return a.getValue().size();
+}
+
+static int64_t ArrayAttrIntVal(ArrayAttr a, int i) {
+  return (a.getValue()[i]).cast<IntegerAttr>().getInt();
+}
+
+static int64_t ArrayAttrIntVal(Optional<ArrayAttr> a, int i) {
+  return (a.getValue().getValue()[i]).cast<IntegerAttr>().getInt();
+}
+
 //===----------------------------------------------------------------------===//
 // Get reduction type
 //===----------------------------------------------------------------------===//
-RankedTensorType getReductionOutputType(RankedTensorType operandTy,
-                                        Optional<ArrayAttr> axesAttrs,
-                                        APInt keepdims) {
+RankedTensorType getReductionOutputType(
+    RankedTensorType operandTy, Optional<ArrayAttr> axesAttrs, APInt keepdims) {
   int64_t rank = operandTy.getRank();
 
   SmallVector<int64_t, 4> axes;
@@ -87,19 +104,18 @@ ONNXOpsDialect::ONNXOpsDialect(mlir::MLIRContext *ctx)
 }
 
 void ONNXEntryPointOp::build(mlir::Builder *builder,
-                             mlir::OperationState &state, mlir::FuncOp function,
-                             int numInputs, int numOutputs) {
+    mlir::OperationState &state, mlir::FuncOp function, int numInputs,
+    int numOutputs) {
   state.addAttribute(ONNXEntryPointOp::getEntryPointFuncAttrName(),
-                     builder->getSymbolRefAttr(function));
+      builder->getSymbolRefAttr(function));
   state.addAttribute(ONNXEntryPointOp::getNumInputsAttrName(),
-                     builder->getI32IntegerAttr(numInputs));
+      builder->getI32IntegerAttr(numInputs));
   state.addAttribute(ONNXEntryPointOp::getNumOutputsAttrName(),
-                     builder->getI32IntegerAttr(numOutputs));
+      builder->getI32IntegerAttr(numOutputs));
 }
 
 ONNXEntryPointOp ONNXEntryPointOp::create(mlir::Location location,
-                                          mlir::FuncOp &func, int numInputs,
-                                          int numOutputs) {
+    mlir::FuncOp &func, int numInputs, int numOutputs) {
   mlir::OperationState state(location, "onnx.EntryPoint");
   Builder builder(location->getContext());
   mlir::ONNXEntryPointOp::build(&builder, state, func, numInputs, numOutputs);
@@ -552,9 +568,9 @@ void ONNXGemmOp::inferShapes() {
     int rank = shape.size();
     if ((rank > 2) ||
         (rank >= 1 && shape[rank - 1] != -1 && N != -1 &&
-         N != shape[rank - 1] && shape[rank - 1] != 1) ||
+            N != shape[rank - 1] && shape[rank - 1] != 1) ||
         (rank == 2 && shape[rank - 2] != -1 && M != -1 &&
-         M != shape[rank - 2] && shape[rank - 2] != 1)) {
+            M != shape[rank - 2] && shape[rank - 2] != 1)) {
       emitError("Bias shape mismatched.");
     }
   }
@@ -885,111 +901,103 @@ void ONNXConvNoBiasOp::inferShapes() {
 //===----------------------------------------------------------------------===//
 
 // MaxPoolSingleOut
+// Infer shape attributes output:
+//   -  auto_pad set to NOTSET;
+//   -  dilations, strides: set to 1 if not defined by user;
+//   -  pads: set to proper value, 0 if not defined by user.
 
 void ONNXMaxPoolSingleOutOp::inferShapes() {
   // Cannot infer shape if no shape exists.
   if (!X().getType().isa<RankedTensorType>())
     return;
+  auto builder = mlir::Builder(this->getContext());
 
-  // 1) get shape of input
+  // 1) Get shape of input.
   auto xTy = X().getType().cast<RankedTensorType>();
   auto xShape = xTy.getShape();
   auto xRank = xShape.size();
 
-  // 2) analyse parameters
-  // get kernel sizes from kernel_shape attribute
+  // 2) Analyse parameters. Get kernel sizes from kernel_shape attribute.
   auto kernelShape = kernel_shape();
   if (!kernelShape)
     emitError(
-        "kernel_shape is a mandatory attribute for which there is no default.");
-  auto kernelShapeArray = kernelShape.getValue();
-  auto kernelRank = kernelShape.size();
+        "kernel_shape is a mandatory attribute for which there is no default");
+  auto kernelRank = ArrayAttrSize(kernelShape);
   if (kernelRank > xRank)
-    emitError("kernel_shape spatial dimension is too large.");
+    emitError("kernel_shape spatial dimension is too large");
   auto kernelOffset = xRank - kernelRank;
 
-  // ceil mode
+  // Ceil mode.
   auto ceilMode = ceil_mode().getSExtValue();
 
-  // dilatation
-  SmallVector<int64_t, 4> actualDilations;
+  // Dilatation.
   auto dilationsOpt = dilations();
   if (dilationsOpt.hasValue()) {
-    auto dilationsArray =
-        dilationsOpt.getValue().getValue(); // opt -> attr -> array
-    if (dilationsArray.size() != kernelRank)
-      emitError("dialation rank is not the same as the spatial rank.");
-    // fill in the actual values
+    if (ArrayAttrSize(dilationsOpt) != kernelRank)
+      emitError("dialation rank is not the same as the spatial rank");
+    // Test values.
     for (int i = 0; i < kernelRank; ++i) {
-      int64_t d = (dilationsArray[i]).cast<IntegerAttr>().getInt();
-      if (d < 1)
-        emitError("dialation value must be nonzero positive.");
-      actualDilations.emplace_back(d);
+      if (ArrayAttrIntVal(dilationsOpt, i) < 1)
+        emitError("dialation value must be nonzero positive");
     }
   } else {
-    for (int i = 0; i < kernelRank; ++i) {
-      actualDilations.emplace_back(1);
-    }
+    // Default dilatation is needed.
+    SmallVector<int64_t, 4> defaultVals(kernelRank, 1);
+    // Convert to ArrayRef, then build attribute, then store attribute.
+    ArrayRef<int64_t> defaultRefs(defaultVals);
+    auto defaultAttr = builder.getI64ArrayAttr(defaultRefs);
+    dilationsAttr(defaultAttr);
+    dilationsOpt = dilations();
   }
 
-  // storage order
+  // Storage order.
+  auto storageOrder = storage_order().getSExtValue();
+  if (storageOrder != 0)
+    emitError("column major storage order not supported at this time");
 
-  // strides
-  SmallVector<int64_t, 4> actualStrides;
+  // Strides.
   auto stridesOpt = strides();
   if (stridesOpt.hasValue()) {
-    auto stridesArray = stridesOpt.getValue().getValue();
-    if (stridesArray.size() != kernelRank)
-      emitError("strides rank is not the same as the spatial rank.");
-    // fill in the actual values
+    if (ArrayAttrSize(stridesOpt) != kernelRank)
+      emitError("strides rank is not the same as the spatial rank");
+    // Check values.
     for (int i = 0; i < kernelRank; ++i) {
-      int64_t s = (stridesArray[i]).cast<IntegerAttr>().getInt();
-      if (s < 1)
-        emitError("strides value must be nonzero positive.");
-      actualStrides.emplace_back(s);
+      if (ArrayAttrIntVal(stridesOpt, i) < 1)
+        emitError("strides value must be nonzero positive");
     }
   } else {
-    for (int i = 0; i < kernelRank; ++i) {
-      actualStrides.emplace_back(1);
-    }
+    SmallVector<int64_t, 4> defaultVals(kernelRank, 1);
+    // Convert to ArrayRef, then build attribute, then store attribute.
+    ArrayRef<int64_t> defaultRefs(defaultVals);
+    auto defaultAttr = builder.getI64ArrayAttr(defaultRefs);
+    stridesAttr(defaultAttr);
+    stridesOpt = strides();
   }
 
-  // now try to find padding, getting auto_pad attribute first
+  // Now try to find padding, getting auto_pad attribute first.
   auto autoPad = auto_pad();
-  // and then investigate the various different cases
-  SmallVector<int64_t, 4> actualPads;
-  auto defaultPads = false;
+  // And then investigate the various different cases.
+  SmallVector<int64_t, 4> actualPads(2 * kernelRank, 0);
   if (autoPad == "NOTSET") {
     auto padsOpt = pads();
     if (padsOpt.hasValue()) {
-      auto padsArray = padsOpt.getValue().getValue();
-      // pads consists of two entries for each spatial axis.
-      if (padsArray.size() != 2 * kernelRank)
-        emitError("pads rank is not twice the spatial rank.");
-      // fill in the actual values
+      // Pads consists of two entries for each spatial axis.
+      if (ArrayAttrSize(padsOpt) != 2 * kernelRank)
+        emitError("pads rank is not twice the spatial rank");
+      // Check values
       for (int i = 0; i < 2 * kernelRank; ++i) {
-        int64_t p = (padsArray[i]).cast<IntegerAttr>().getInt();
+        int64_t p = ArrayAttrIntVal(padsOpt, i);
         if (p < 0)
-          emitError("pads value must be nonnegative.");
-        actualPads.emplace_back(p);
+          emitError("pads value must be nonnegative");
+        actualPads[i] = p;
       }
-    } else {
-      // pads are not defined, default to value 0
-      defaultPads = true;
     }
-  } else if (autoPad == "VALID") {
-    defaultPads = true;
   } else if (autoPad == "SAME_UPPER" || autoPad == "SAME_LOWER") {
-    // init pad with zero
-    for (int i = 0; i < 2 * kernelRank; ++i) {
-      actualPads.emplace_back(0);
-    }
     for (int i = 0; i < kernelRank; ++i) {
       auto inputSpatialShape = xShape[kernelOffset + i];
-      auto kernelSpatialShape =
-          (kernelShapeArray[i]).cast<IntegerAttr>().getInt();
-      auto dilations = actualDilations[i];
-      auto strideSpatialShape = actualStrides[i];
+      auto kernelSpatialShape = ArrayAttrIntVal(kernelShape, i);
+      auto dilations = ArrayAttrIntVal(dilationsOpt, i);
+      auto strideSpatialShape = ArrayAttrIntVal(stridesOpt, i);
       int64_t outputSpatialShape =
           ceil((1.0 * inputSpatialShape) / (1.0 * strideSpatialShape));
       auto sumOfPad = (outputSpatialShape - 1) * strideSpatialShape +
@@ -1004,29 +1012,27 @@ void ONNXMaxPoolSingleOutOp::inferShapes() {
         }
       }
     }
-  } else {
+  } else if (autoPad != "VALID") {
     emitError("auto_pad of unknown / unsupported value.");
   }
-  // handle case where default pad values must be used
-  if (defaultPads) {
-    for (int i = 0; i < 2 * kernelRank; ++i) {
-      actualPads.emplace_back(0);
-    }
+  // Set pads values in attributes.
+  {
+    ArrayRef<int64_t> defaultRefs(actualPads);
+    auto defaultAttr = builder.getI64ArrayAttr(defaultRefs);
+    padsAttr(defaultAttr);
+    auto defaultAutoPadAttr = builder.getStringAttr("NOTSET");
+    auto_padAttr(defaultAutoPadAttr);
   }
 
-  // initialize output shape
+  // Initialize output shape.
   SmallVector<int64_t, 4> yShape(xShape.begin(), xShape.end());
-  // for all kernel dimensions
+  // Process for all kernel dimensions.
   for (int i = 0; i < kernelRank; ++i) {
     auto inputSpatialShape = xShape[kernelOffset + i];
     auto padShape = actualPads[i] + actualPads[kernelRank + i];
-    auto kernelSpatialShape =
-        (kernelShapeArray[i]).cast<IntegerAttr>().getInt();
-    auto dilations = actualDilations[i];
-    auto strideSpatialShape = actualStrides[i];
-    /// output_spatial_shape[i] = ceil( (input_spatial_shape[i] + pad_shape[i] -
-    //  ((kernel_spatial_shape[i] - 1) * dilations[i] + 1)) /
-    //  strides_spatial_shape[i] + 1)
+    auto kernelSpatialShape = ArrayAttrIntVal(kernelShape, i);
+    auto dilations = ArrayAttrIntVal(dilationsOpt, i);
+    auto strideSpatialShape = ArrayAttrIntVal(stridesOpt, i);
     double numerator = inputSpatialShape + padShape -
                        ((kernelSpatialShape - 1) * dilations + 1);
     double denominator = strideSpatialShape;
diff --git a/test/mlir/onnx/onnx_shape_inference_maxpool.mlir b/test/mlir/onnx/onnx_shape_inference_maxpool.mlir
index 3ebaf34..1d83b8b 100644
--- a/test/mlir/onnx/onnx_shape_inference_maxpool.mlir
+++ b/test/mlir/onnx/onnx_shape_inference_maxpool.mlir
@@ -6,7 +6,7 @@ func @test_default_maxpoolsingleout(%arg0 : tensor<5x5x32x32xf32>) -> tensor<*xf
   "std.return"(%0) : (tensor<*xf32>) -> ()
 }
 // CHECK-LABEL: test_default_maxpoolsingleout
-// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "VALID", ceil_mode = 0 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1]} : (tensor<5x5x32x32xf32>) -> tensor<5x5x30x30xf32>
+// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, dilations = [1, 1], kernel_shape = [3, 3], pads = [0, 0, 0, 0], strides = [1, 1]} : (tensor<5x5x32x32xf32>) -> tensor<5x5x30x30xf32>
 // CHECK: return [[RES]] : tensor<5x5x30x30xf32>
 
 
@@ -16,7 +16,7 @@ func @test_default_maxpoolsingleout_defpad(%arg0 : tensor<5x5x32x32xf32>) -> ten
   "std.return"(%0) : (tensor<*xf32>) -> ()
 }
 // CHECK-LABEL: test_default_maxpoolsingleout_defpad
-// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, kernel_shape = [3, 3]} : (tensor<5x5x32x32xf32>) -> tensor<5x5x30x30xf32>
+// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, dilations = [1, 1], kernel_shape = [3, 3], pads = [0, 0, 0, 0], strides = [1, 1]} : (tensor<5x5x32x32xf32>) -> tensor<5x5x30x30xf32>
 // CHECK: return [[RES]] : tensor<5x5x30x30xf32>
 
 
@@ -26,7 +26,7 @@ func @test_default_maxpoolsingleout_pad(%arg0 : tensor<5x5x32x32xf32>) -> tensor
   "std.return"(%0) : (tensor<*xf32>) -> ()
 }
 // CHECK-LABEL: test_default_maxpoolsingleout_pad
-// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1]} : (tensor<5x5x32x32xf32>) -> tensor<5x5x32x32xf32>
+// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, dilations = [1, 1], kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]} : (tensor<5x5x32x32xf32>) -> tensor<5x5x32x32xf32>
 // CHECK: return [[RES]] : tensor<5x5x32x32xf32>
 
 
@@ -36,7 +36,7 @@ func @test_default_maxpoolsingleout_pad_nonunif(%arg0 : tensor<5x5x32x32xf32>) -
   "std.return"(%0) : (tensor<*xf32>) -> ()
 }
 // CHECK-LABEL: test_default_maxpoolsingleout_pad_nonunif
-// CHECK: [[RES:%.+]] =  "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, kernel_shape = [5, 3], pads = [2, 1, 1, 0]} : (tensor<5x5x32x32xf32>) -> tensor<5x5x31x31xf32>
+// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, dilations = [1, 1], kernel_shape = [5, 3], pads = [2, 1, 1, 0], strides = [1, 1]} : (tensor<5x5x32x32xf32>) -> tensor<5x5x31x31xf32>
 // CHECK: return [[RES]] : tensor<5x5x31x31xf32>
 
 
@@ -46,7 +46,7 @@ func @test_default_maxpoolsingleout_strides(%arg0 : tensor<5x5x32x32xf32>) -> te
   "std.return"(%0) : (tensor<*xf32>) -> ()
 }
 // CHECK-LABEL: test_default_maxpoolsingleout_strides
-// CHECK: [[RES:%.+]] =  "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [2, 2]} : (tensor<5x5x32x32xf32>) -> tensor<5x5x16x16xf32>
+// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, dilations = [1, 1], kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [2, 2]} : (tensor<5x5x32x32xf32>) -> tensor<5x5x16x16xf32>
 // CHECK: return [[RES]] : tensor<5x5x16x16xf32>
 
 
@@ -56,7 +56,7 @@ func @test_default_maxpoolsingleout_strides_nonunifpad(%arg0 : tensor<5x5x30x32x
   "std.return"(%0) : (tensor<*xf32>) -> ()
 }
 // CHECK-LABEL: test_default_maxpoolsingleout_strides_nonunifpad
-// CHECK: [[RES:%.+]] =  "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, kernel_shape = [2, 2], pads = [1, 0, 0, 0], strides = [2, 2]} : (tensor<5x5x30x32xf32>) -> tensor<5x5x15x16xf32>
+// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, dilations = [1, 1], kernel_shape = [2, 2], pads = [1, 0, 0, 0], strides = [2, 2]} : (tensor<5x5x30x32xf32>) -> tensor<5x5x15x16xf32>
 // CHECK: return [[RES]] : tensor<5x5x15x16xf32>
 
 
@@ -66,7 +66,7 @@ func @test_default_maxpoolsingleout_strides_nonunifpad_ceil(%arg0 : tensor<5x5x3
   "std.return"(%0) : (tensor<*xf32>) -> ()
 }
 // CHECK-LABEL: test_default_maxpoolsingleout_strides_nonunifpad_ceil
-// CHECK: [[RES:%.+]] =  "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 1 : i64, kernel_shape = [2, 2], pads = [1, 0, 0, 0], strides = [2, 2]} : (tensor<5x5x30x32xf32>) -> tensor<5x5x16x16xf32>
+// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 1 : i64, dilations = [1, 1], kernel_shape = [2, 2], pads = [1, 0, 0, 0], strides = [2, 2]} : (tensor<5x5x30x32xf32>) -> tensor<5x5x16x16xf32>
 // CHECK: return [[RES]] : tensor<5x5x16x16xf32>
 
 
@@ -76,7 +76,7 @@ func @test_default_maxpoolsingleout_strides_dilatation(%arg0 : tensor<5x5x8x8xf3
   "std.return"(%0) : (tensor<*xf32>) -> ()
 }
 // CHECK-LABEL: test_default_maxpoolsingleout_strides_dilatation
-// CHECK: [[RES:%.+]] =  "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, dilations = [2, 2], kernel_shape = [2, 2], strides = [3, 3]} : (tensor<5x5x8x8xf32>) -> tensor<5x5x2x2xf32>
+// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, dilations = [2, 2], kernel_shape = [2, 2], pads = [0, 0, 0, 0], strides = [3, 3]} : (tensor<5x5x8x8xf32>) -> tensor<5x5x2x2xf32>
 // CHECK: return [[RES]] : tensor<5x5x2x2xf32>
 
 /// Test the default behavior of Max Pool with dilatation
@@ -85,7 +85,7 @@ func @test_default_maxpoolsingleout_upper(%arg0 : tensor<5x5x16x13xf32>) -> tens
   "std.return"(%0) : (tensor<*xf32>) -> ()
 }
 // CHECK-LABEL: test_default_maxpoolsingleout_upper
-// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "SAME_UPPER", ceil_mode = 0 : i64, kernel_shape = [4, 4], strides = [4, 4]} : (tensor<5x5x16x13xf32>) -> tensor<5x5x4x4xf32>
+// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, dilations = [1, 1], kernel_shape = [4, 4], pads = [0, 1, 0, 2], strides = [4, 4]} : (tensor<5x5x16x13xf32>) -> tensor<5x5x4x4xf32>
 // CHECK: return [[RES]] : tensor<5x5x4x4xf32>
 
 
@@ -95,6 +95,6 @@ func @test_default_maxpoolsingleout_lower(%arg0 : tensor<5x5x16x13xf32>) -> tens
   "std.return"(%0) : (tensor<*xf32>) -> ()
 }
 // CHECK-LABEL: test_default_maxpoolsingleout_lower
-// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "SAME_LOWER", ceil_mode = 0 : i64, kernel_shape = [4, 4], strides = [4, 4]} : (tensor<5x5x16x13xf32>) -> tensor<5x5x4x4xf32>
+// CHECK: [[RES:%.+]] = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", ceil_mode = 0 : i64, dilations = [1, 1], kernel_shape = [4, 4], pads = [0, 2, 0, 1], strides = [4, 4]} : (tensor<5x5x16x13xf32>) -> tensor<5x5x4x4xf32>
 // CHECK: return [[RES]] : tensor<5x5x4x4xf32>
 

From 3a88361b17db5ce421ac4494bd866bdceadbc81f Mon Sep 17 00:00:00 2001
From: Alexandre Eichenberger <alexe@us.ibm.com>
Date: Tue, 25 Feb 2020 15:46:11 -0500
Subject: [PATCH 10/10] use input/output operation names, use helper for
 attribute function and int values (#106)

---
 src/dialect/onnx/onnx_ops.cpp | 173 +++++++++++++++++-----------------
 1 file changed, 84 insertions(+), 89 deletions(-)

diff --git a/src/dialect/onnx/onnx_ops.cpp b/src/dialect/onnx/onnx_ops.cpp
index 3666318..ed18086 100644
--- a/src/dialect/onnx/onnx_ops.cpp
+++ b/src/dialect/onnx/onnx_ops.cpp
@@ -406,12 +406,12 @@ void ONNXIdentityOp::inferShapes() {
 
 void ONNXMatMulOp::inferShapes() {
   // Cannot infer shape if no shape exists.
-  if (!getOperand(0).getType().isa<RankedTensorType>() ||
-      !getOperand(1).getType().isa<RankedTensorType>())
+  if (!A().getType().isa<RankedTensorType>() ||
+      !B().getType().isa<RankedTensorType>())
     return;
 
-  auto lhsTy = getOperand(0).getType().cast<RankedTensorType>();
-  auto rhsTy = getOperand(1).getType().cast<RankedTensorType>();
+  auto lhsTy = A().getType().cast<RankedTensorType>();
+  auto rhsTy = B().getType().cast<RankedTensorType>();
 
   SmallVector<int64_t, 2> dims;
   auto lhsShape = lhsTy.getShape();
@@ -419,14 +419,14 @@ void ONNXMatMulOp::inferShapes() {
 
   if (lhsShape.size() < 1 && rhsShape.size() < 1) {
     // Multiplication by scalars is not allowed.
-    emitError("Multiplication by scalar arguments not allowed.");
+    emitError("Multiplication by scalar arguments not allowed");
   } else if (lhsShape.size() == 1 && rhsShape.size() == 1) {
     // Special case when both arrays are 1-dimensional and according to
     // numpy rules the types need to be extended to 1xN and Nx1. Helper sizes
     // need to be removed after the multiplication but cannot be removed if all
     // sizes are 1.
     if (lhsShape[0] != -1 && rhsShape[0] != -1 && lhsShape[0] != rhsShape[0])
-      emitError("Attempt to multiply incompatible matrices.");
+      emitError("Attempt to multiply incompatible matrices");
     dims.emplace_back(1);
   } else if (lhsShape.size() == 1 && rhsShape.size() >= 2) {
     // If the first argument is 1-D, it is promoted to a matrix by prepending a
@@ -441,7 +441,7 @@ void ONNXMatMulOp::inferShapes() {
     unsigned rhsRank = rhsShape.size();
     if (lhsShape[0] != -1 && rhsShape[rhsRank - 2] != -1 &&
         lhsShape[0] != rhsShape[rhsRank - 2])
-      emitError("Attempt to multiply incompatible matrices.");
+      emitError("Attempt to multiply incompatible matrices");
 
     for (decltype(rhsRank) i = 0; i < rhsRank - 2; ++i)
       dims.emplace_back(rhsShape[i]);
@@ -459,7 +459,7 @@ void ONNXMatMulOp::inferShapes() {
     unsigned lhsRank = lhsShape.size();
     if (lhsShape[lhsRank - 1] != -1 && rhsShape[0] != -1 &&
         lhsShape[lhsRank - 1] != rhsShape[0])
-      emitError("Attempt to multiply incompatible matrices.");
+      emitError("Attempt to multiply incompatible matrices");
 
     for (decltype(lhsRank) i = 0; i < lhsRank - 2; ++i)
       dims.emplace_back(lhsShape[i]);
@@ -473,7 +473,7 @@ void ONNXMatMulOp::inferShapes() {
     unsigned lhsRank = lhsShape.size();
     if (lhsShape[lhsRank - 1] != -1 && rhsShape[0] != -1 &&
         lhsShape[lhsRank - 1] != rhsShape[0])
-      emitError("Attempt to multiply incompatible matrices.");
+      emitError("Attempt to multiply incompatible matrices");
 
     for (decltype(lhsRank) i = 0; i < lhsRank - 1; ++i)
       dims.emplace_back(lhsShape[i]);
@@ -487,7 +487,7 @@ void ONNXMatMulOp::inferShapes() {
     unsigned rhsRank = rhsShape.size();
     if (lhsShape[1] != -1 && rhsShape[rhsRank - 2] != -1 &&
         lhsShape[1] != rhsShape[rhsRank - 2])
-      emitError("Attempt to multiply incompatible matrices.");
+      emitError("Attempt to multiply incompatible matrices");
 
     for (decltype(rhsRank) i = 0; i < rhsRank - 2; ++i)
       dims.emplace_back(rhsShape[i]);
@@ -503,7 +503,7 @@ void ONNXMatMulOp::inferShapes() {
     unsigned rhsRank = rhsShape.size();
     if (lhsShape[lhsRank - 1] != -1 && rhsShape[rhsRank - 2] != -1 &&
         lhsShape[lhsRank - 1] != rhsShape[rhsRank - 2])
-      emitError("Attempt to multiply incompatible matrices.");
+      emitError("Attempt to multiply incompatible matrices");
 
     // Check and perform broadcasting for the shapes.
     SmallVector<int64_t, 2> lhsBcastShape;
@@ -513,7 +513,7 @@ void ONNXMatMulOp::inferShapes() {
     for (decltype(rhsRank) i = 0; i < rhsRank - 2; ++i)
       rhsBcastShape.emplace_back(rhsShape[i]);
     if (!getBroadcastedShape(lhsBcastShape, rhsBcastShape, dims))
-      emitError("Broadcasted dimensions are incompatible.");
+      emitError("Broadcasted dimensions are incompatible");
 
     dims.emplace_back(lhsShape[lhsRank - 2]);
     dims.emplace_back(rhsShape[rhsRank - 1]);
@@ -528,7 +528,7 @@ void ONNXMatMulOp::inferShapes() {
 
     // Check legality of matrix multiplication.
     if (lhsDim != -1 && rhsDim != -1 && lhsDim != rhsDim)
-      emitError("Attempt to multiply incompatible matrices.");
+      emitError("Attempt to multiply incompatible matrices");
 
     if (rhsShape.size() > 1)
       dims.emplace_back(rhsShape[1]);
@@ -542,14 +542,14 @@ void ONNXMatMulOp::inferShapes() {
 // Gemm
 
 void ONNXGemmOp::inferShapes() {
-  bool hasBias = !getOperand(2).getType().isa<NoneType>();
+  bool hasBias = !C().getType().isa<NoneType>();
   // Cannot infer shape if no shape exists.
-  if (!getOperand(0).getType().isa<RankedTensorType>() ||
-      !getOperand(1).getType().isa<RankedTensorType>() ||
-      (hasBias && !getOperand(2).getType().isa<RankedTensorType>()))
+  if (!A().getType().isa<RankedTensorType>() ||
+      !B().getType().isa<RankedTensorType>() ||
+      (hasBias && !C().getType().isa<RankedTensorType>()))
     return;
-  auto lhsTy = getOperand(0).getType().cast<RankedTensorType>();
-  auto rhsTy = getOperand(1).getType().cast<RankedTensorType>();
+  auto lhsTy = A().getType().cast<RankedTensorType>();
+  auto rhsTy = B().getType().cast<RankedTensorType>();
 
   int64_t M, N, K_A, K_B;
   M = (transA() == 0) ? lhsTy.getShape()[0] : lhsTy.getShape()[1];
@@ -558,12 +558,12 @@ void ONNXGemmOp::inferShapes() {
   K_B = (transB() == 0) ? rhsTy.getShape()[0] : rhsTy.getShape()[1];
 
   if ((K_A != -1) and (K_B != -1) and (K_A != K_B)) {
-    emitError("Tensor shapes mismatched.");
+    emitError("Tensor shapes mismatched");
   }
 
   if (hasBias) {
     // Check whether bias is unidirectional broadcasting or not.
-    auto biasTy = getOperand(2).getType().cast<RankedTensorType>();
+    auto biasTy = C().getType().cast<RankedTensorType>();
     auto shape = biasTy.getShape();
     int rank = shape.size();
     if ((rank > 2) ||
@@ -571,7 +571,7 @@ void ONNXGemmOp::inferShapes() {
             N != shape[rank - 1] && shape[rank - 1] != 1) ||
         (rank == 2 && shape[rank - 2] != -1 && M != -1 &&
             M != shape[rank - 2] && shape[rank - 2] != 1)) {
-      emitError("Bias shape mismatched.");
+      emitError("Bias shape mismatched");
     }
   }
 
@@ -584,50 +584,50 @@ void ONNXGemmOp::inferShapes() {
 /// BatchNormalizationTestMode
 void ONNXBatchNormalizationTestModeOp::inferShapes() {
   // Cannot infer shape if no shape exists.
-  if (!getOperand(0).getType().isa<RankedTensorType>() ||
-      !getOperand(1).getType().isa<RankedTensorType>() ||
-      !getOperand(2).getType().isa<RankedTensorType>() ||
-      !getOperand(3).getType().isa<RankedTensorType>() ||
-      !getOperand(4).getType().isa<RankedTensorType>())
+  if (!X().getType().isa<RankedTensorType>() ||
+      !scale().getType().isa<RankedTensorType>() ||
+      !B().getType().isa<RankedTensorType>() ||
+      !mean().getType().isa<RankedTensorType>() ||
+      !var().getType().isa<RankedTensorType>())
     return;
 
-  auto input = getOperand(0).getType().cast<RankedTensorType>();
-  auto scale = getOperand(1).getType().cast<RankedTensorType>();
-  auto bias = getOperand(2).getType().cast<RankedTensorType>();
-  auto mean = getOperand(3).getType().cast<RankedTensorType>();
-  auto variance = getOperand(4).getType().cast<RankedTensorType>();
+  auto inputTensorTy = X().getType().cast<RankedTensorType>();
+  auto scaleTensorTy = scale().getType().cast<RankedTensorType>();
+  auto biasTensorTy = B().getType().cast<RankedTensorType>();
+  auto meanTensorTy = mean().getType().cast<RankedTensorType>();
+  auto varianceTensorTy = var().getType().cast<RankedTensorType>();
 
   // Check whether the shapes of scale, bias, mean and variance are valid.
   // Operand's dimensions can be in the form of NxCxD1xD2x...xDn or N.
   // In case of N, C is assumed to be 1.
   // Shapes of scale, bias, mean and variance must be C.
   int64_t c = -1;
-  if (input.getShape().size() == 1) {
+  if (inputTensorTy.getShape().size() == 1) {
     c = 1;
-  } else if (input.getShape().size() > 2) {
-    c = (input.getShape()[1] != -1) ? input.getShape()[1] : -1;
+  } else if (inputTensorTy.getShape().size() > 2) {
+    c = (inputTensorTy.getShape()[1] != -1) ? inputTensorTy.getShape()[1] : -1;
   } else {
-    emitError("Wrong rank for the input.");
+    emitError("Wrong rank for the input");
   }
 
   if (c != -1) {
-    auto s = scale.getShape();
-    auto b = bias.getShape();
-    auto m = mean.getShape();
-    auto v = variance.getShape();
+    auto s = scaleTensorTy.getShape();
+    auto b = biasTensorTy.getShape();
+    auto m = meanTensorTy.getShape();
+    auto v = varianceTensorTy.getShape();
 
     if ((s.size() != 1) || (s[0] != -1 && s[0] != c))
-      emitError("Wrong rank for the scale.");
+      emitError("Wrong rank for the scale");
     if ((b.size() != 1) || (b[0] != -1 && b[0] != c))
-      emitError("Wrong rank for the bias.");
+      emitError("Wrong rank for the bias");
     if ((m.size() != 1) || (m[0] != -1 && m[0] != c))
-      emitError("Wrong rank for the mean.");
+      emitError("Wrong rank for the mean");
     if ((v.size() != 1) || (v[0] != -1 && v[0] != c))
-      emitError("Wrong rank for the variance.");
+      emitError("Wrong rank for the variance");
   }
 
   // The output tensor of the same shape as the input.
-  getResult().setType(getOperand(0).getType());
+  getResult().setType(X().getType());
 }
 
 // TODO:
@@ -640,21 +640,21 @@ void ONNXBatchNormalizationTestModeOp::inferShapes() {
 
 void ONNXReshapeOp::inferShapes() {
   // Cannot infer shape if no shape tensor is specified.
-  if (!getOperand(1).getType().isa<RankedTensorType>())
-    emitError("Shape tensor not ranked.");
+  if (!shape().getType().isa<RankedTensorType>())
+    emitError("Shape tensor not ranked");
 
-  auto inputTensorTy = getOperand(0).getType().cast<RankedTensorType>();
-  auto shapeTensorTy = getOperand(1).getType().cast<RankedTensorType>();
+  auto inputTensorTy = data().getType().cast<RankedTensorType>();
+  auto shapeTensorTy = shape().getType().cast<RankedTensorType>();
 
   // Only rank 1 shape tensors are supported.
   if (shapeTensorTy.getShape().size() != 1)
-    emitError("Shape tensor must have rank one.");
+    emitError("Shape tensor must have rank one");
 
   int64_t outputRank = shapeTensorTy.getShape()[0];
 
   // Shape tensor must have constant shape.
   if (outputRank < 0)
-    emitError("Shape tensor must have constant shape.");
+    emitError("Shape tensor must have constant shape");
 
   SmallVector<int64_t, 2> dims;
   for (int i = 0; i < outputRank; ++i)
@@ -670,12 +670,12 @@ void ONNXReshapeOp::inferShapes() {
 
 void ONNXTransposeOp::inferShapes() {
   // Cannot infer shape if no shape exists.
-  if (!getOperand().getType().isa<RankedTensorType>())
+  if (!data().getType().isa<RankedTensorType>())
     return;
 
   // Naive transposition which handles the default case of
   // reversing the shape of the tensor (similar to numpy.transpose).
-  auto arrayTy = getOperand().getType().cast<RankedTensorType>();
+  auto arrayTy = data().getType().cast<RankedTensorType>();
   SmallVector<int64_t, 2> dims;
   auto permutation = ONNXTransposeOp::permAttr();
   if (permutation) {
@@ -697,7 +697,7 @@ void ONNXTransposeOp::inferShapes() {
 
 void ONNXReduceMaxOp::inferShapes() {
   if (!getOperand().getType().isa<RankedTensorType>()) {
-    emitError("Shape tensor not ranked.");
+    emitError("Shape tensor not ranked");
     return;
   }
 
@@ -711,7 +711,7 @@ void ONNXReduceMaxOp::inferShapes() {
 
 void ONNXReduceMinOp::inferShapes() {
   if (!getOperand().getType().isa<RankedTensorType>()) {
-    emitError("Shape tensor not ranked.");
+    emitError("Shape tensor not ranked");
     return;
   }
 
@@ -725,7 +725,7 @@ void ONNXReduceMinOp::inferShapes() {
 
 void ONNXReduceProdOp::inferShapes() {
   if (!getOperand().getType().isa<RankedTensorType>()) {
-    emitError("Shape tensor not ranked.");
+    emitError("Shape tensor not ranked");
     return;
   }
 
@@ -739,7 +739,7 @@ void ONNXReduceProdOp::inferShapes() {
 
 void ONNXReduceSumOp::inferShapes() {
   if (!getOperand().getType().isa<RankedTensorType>()) {
-    emitError("Shape tensor not ranked.");
+    emitError("Shape tensor not ranked");
     return;
   }
 
@@ -758,22 +758,22 @@ void ONNXConvNoBiasOp::inferShapes() {
   // W: (M x C/group x k1 x k2 x ... x kn)
 
   // Cannot infer shape if no shape exists.
-  if (!getOperand(0).getType().isa<RankedTensorType>() ||
-      !getOperand(1).getType().isa<RankedTensorType>())
+  if (!X().getType().isa<RankedTensorType>() ||
+      !W().getType().isa<RankedTensorType>())
     return;
 
-  auto dataTy = getOperand(0).getType().cast<RankedTensorType>();
-  auto weightTy = getOperand(1).getType().cast<RankedTensorType>();
+  auto dataTy = X().getType().cast<RankedTensorType>();
+  auto weightTy = W().getType().cast<RankedTensorType>();
   auto dataShape = dataTy.getShape();
   auto weightShape = weightTy.getShape();
 
   // Lowest supported convolution is a one dimensional convolution.
   if (dataShape.size() < 3)
-    emitError("Data input shape must be at least (NxCxD1).");
+    emitError("Data input shape must be at least (NxCxD1)");
 
   // Check that shape of weight and data have same length.
   if (dataShape.size() != weightShape.size())
-    emitError("Weight size not compatible with data size.");
+    emitError("Weight size not compatible with data size");
 
   // Required attribute auto_pad defaults to NOTSET.
   auto autoPad = auto_pad();
@@ -782,7 +782,7 @@ void ONNXConvNoBiasOp::inferShapes() {
       ONNXConvNoBiasOp::group().getSExtValue(); //.getLimitedValue();
   // Check that the X.shape[1] == (W.shape[1] * group) == C condition holds.
   if (dataShape[1] != (weightShape[1] * group))
-    emitError("Channel dimension mismatch.");
+    emitError("Channel dimension mismatch");
 
   // Note: the value of the group attribut only impacts the way the
   // computation is carried out and not the actual output size.
@@ -812,11 +812,10 @@ void ONNXConvNoBiasOp::inferShapes() {
   // argument.
   SmallVector<int64_t, 2> kernelDims;
   if (auto kernelShape = kernel_shapeAttr()) {
-    if (kernelShape.getValue().size() != nDims)
-      emitError("kernel_shape length incompatible with spatial dimensions.");
+    if (ArrayAttrSize(kernelShape) != nDims)
+      emitError("kernel_shape length incompatible with spatial dimensions");
     for (int i = 0; i < nDims; ++i)
-      kernelDims.emplace_back(
-          (kernelShape.getValue()[i]).cast<IntegerAttr>().getInt());
+      kernelDims.emplace_back(ArrayAttrIntVal(kernelShape, i));
   } else {
     for (int i = 0; i < nDims; ++i)
       kernelDims.emplace_back(weightShape[i + 2]);
@@ -834,13 +833,11 @@ void ONNXConvNoBiasOp::inferShapes() {
   // From a dimensionality perspective the kernel size becomes the dilated
   // kernel size.
   if (auto dilations = dilationsAttr()) {
-    if (dilations.getValue().size() != nDims)
-      emitError("dilations length incompatible with spatial dimensions.");
+    if (ArrayAttrSize(dilations) != nDims)
+      emitError("dilations length incompatible with spatial dimensions");
     for (int i = 0; i < nDims; ++i)
       kernelDims[i] =
-          (kernelDims[i] + 1) *
-              (dilations.getValue()[i]).cast<IntegerAttr>().getInt() -
-          1;
+          (kernelDims[i] + 1) * ArrayAttrIntVal(dilations, i)  -        1;
   }
 
   // Subtract kernel dimensions from input data dimensions.
@@ -853,16 +850,14 @@ void ONNXConvNoBiasOp::inferShapes() {
     // present then pads is considered to be all zeros (no padding).
     if (auto pads = padsAttr()) {
       // pads consists of two entries for each spatial axis.
-      if (pads.getValue().size() != 2 * nDims)
-        emitError("pads size is not twice the spatial size.");
+      if (ArrayAttrSize(pads) != 2 * nDims)
+        emitError("pads size is not twice the spatial size");
 
       for (int i = 0; i < nDims; ++i) {
         // Padding for beginning of axis.
-        int32_t p = (pads.getValue()[i]).cast<IntegerAttr>().getInt();
-        outSpatialDims[i] += p;
+        outSpatialDims[i] += ArrayAttrIntVal(pads, i);
         // Padding for end of axis.
-        p = (pads.getValue()[i + nDims]).cast<IntegerAttr>().getInt();
-        outSpatialDims[i] += p;
+        outSpatialDims[i] += ArrayAttrIntVal(pads, i + nDims);
       }
     }
   } else if (autoPad == "SAME_UPPER" || autoPad == "SAME_LOWER") {
@@ -878,15 +873,15 @@ void ONNXConvNoBiasOp::inferShapes() {
   } else if (autoPad == "VALID") {
     // No padding
   } else {
-    emitError("Unexpected attribute value for auto_pad.");
+    emitError("Unexpected attribute value for auto_pad");
   }
 
   // Strides
   if (auto strides = ONNXConvNoBiasOp::stridesAttr()) {
-    if (strides.getValue().size() != nDims)
-      emitError("strides length incompatible with spatial dimensions.");
+    if (ArrayAttrSize(strides) != nDims)
+      emitError("strides length incompatible with spatial dimensions");
     for (int i = 0; i < nDims; ++i) {
-      int64_t stride = strides.getValue()[i].cast<IntegerAttr>().getInt();
+      int64_t stride = ArrayAttrIntVal(strides, i);
       outSpatialDims[i] = floor(outSpatialDims[i] / stride);
     }
   }
@@ -1013,7 +1008,7 @@ void ONNXMaxPoolSingleOutOp::inferShapes() {
       }
     }
   } else if (autoPad != "VALID") {
-    emitError("auto_pad of unknown / unsupported value.");
+    emitError("auto_pad of unknown / unsupported value");
   }
   // Set pads values in attributes.
   {
@@ -1044,7 +1039,7 @@ void ONNXMaxPoolSingleOutOp::inferShapes() {
     }
     yShape[kernelOffset + i] = res;
   }
-  auto arrayTy = getOperand().getType().cast<RankedTensorType>();
+  auto arrayTy = X().getType().cast<RankedTensorType>();
   getResult().setType(RankedTensorType::get(yShape, arrayTy.getElementType()));
 }
 
@@ -1053,10 +1048,10 @@ void ONNXMaxPoolSingleOutOp::inferShapes() {
 // Unsqueeze
 
 void ONNXUnsqueezeOp::inferShapes() {
-  if (!getOperand().getType().isa<RankedTensorType>())
+  if (!data().getType().isa<RankedTensorType>())
     return;
 
-  auto operandTy = getOperand().getType().cast<RankedTensorType>();
+  auto operandTy = data().getType().cast<RankedTensorType>();
   int inRank = operandTy.getRank();
 
   ArrayAttr axisAttrs = axesAttr();
@@ -1072,10 +1067,10 @@ void ONNXUnsqueezeOp::inferShapes() {
       if (std::find(axes.begin(), axes.end(), axis) == axes.end())
         axes.emplace_back(axis);
       else
-        emitError("Duplicated axes.");
+        emitError("Duplicated axes");
     }
   } else {
-    emitError("Axes attribute is required.");
+    emitError("Axes attribute is required");
   }
 
   SmallVector<int64_t, 4> dims;