From cde1157d6267b62cd641689f6d7ebba4927ca483 Mon Sep 17 00:00:00 2001
From: Tian Jin <tjingrant@gmail.com>
Date: Mon, 8 Jun 2020 10:18:55 +0800
Subject: [PATCH] Rapid check test (#141)

* Call llc, ld from within onnx-mlir.

* Rename EmitLLVMBC -> EmitLib., reorder header files

* Edit comment.

* Checkpoint, debug.py works.

* Automatically generate inputs in debug.py.

* Use float.

* initial support for rapidcheck tests.

* Convolution test case works.

* Format code.

* Link library with MainUtils.

* Fix CMake script error.

* Fast implementation of array assertion, more detailed error analysis.

* More utility for DynMemRef.

* Fix linking issue.

* Uncomment unit test.

* Refactor to separate C++/Python ExecutionSession, enable unit test.

* format code.

* Verbose build.

* Enable PIC option for ExecusionSession.

* Fix cmake error.

* Build all targets.

* Fix doc to build all targets.

* Clean up.

* Clean up, debug.

* Use type alias consistently.

* Move definitions to DynMemRef.cpp.

* include algorithm.

* pyruntime -> PyRuntime

* Format code.

* Free memory.

* Add comments.

* Copyright notice.

* Improve stylistic consistency.

* Add comment.

* Revert irrelevant changes.

* Disambiguate.

* Refator test case generator out from test case implementation, implement example exhaustive test driver.

* Add documentation for testing.
---
 .circleci/config.yml                          |   9 +-
 .gitmodules                                   |   3 +
 CMakeLists.txt                                |   2 +
 README.md                                     |   2 +-
 docs/README.md                                |   2 +-
 docs/Testing.md                               |  60 +++++++
 docs/_data/navigation.yml                     |   8 +-
 src/CMakeLists.txt                            |  18 +-
 src/MainUtils.cpp                             |  24 ++-
 src/MainUtils.hpp                             |   3 +
 src/Runtime/CMakeLists.txt                    |  41 +++--
 src/Runtime/DataType.h                        |  10 ++
 src/Runtime/DynMemRef.cpp                     |  81 +++++++++
 src/Runtime/DynMemRef.h                       | 160 ++++++++++++++++-
 src/Runtime/ExecusionSession.cpp              |  64 +++++++
 src/Runtime/ExecusionSession.hpp              |  40 +++++
 .../{Runtime.cpp => PyExecutionSession.cpp}   |  40 ++---
 src/Runtime/PyExecutionSession.hpp            |  35 ++++
 src/Runtime/Runtime.hpp                       |  37 ----
 src/Transform/LowerToLLVM.cpp                 |   1 +
 src/main.cpp                                  |  20 +--
 test/CMakeLists.txt                           |   3 +-
 test/backend/CMakeLists.txt                   |   2 +-
 test/backend/test.py                          |   2 +-
 test/numerical/CMakeLists.txt                 |  28 +++
 test/numerical/TestConv.cpp                   | 162 ++++++++++++++++++
 third_party/rapidcheck                        |   1 +
 utils/debug.py                                |   2 +-
 utils/install-onnx-mlir.sh                    |   2 +-
 29 files changed, 747 insertions(+), 115 deletions(-)
 create mode 100644 docs/Testing.md
 create mode 100644 src/Runtime/ExecusionSession.cpp
 create mode 100644 src/Runtime/ExecusionSession.hpp
 rename src/Runtime/{Runtime.cpp => PyExecutionSession.cpp} (61%)
 create mode 100644 src/Runtime/PyExecutionSession.hpp
 delete mode 100644 src/Runtime/Runtime.hpp
 create mode 100644 test/numerical/CMakeLists.txt
 create mode 100644 test/numerical/TestConv.cpp
 create mode 160000 third_party/rapidcheck

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 4df5769..c590cd2 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -40,7 +40,14 @@ jobs:
           command: |
             sudo pip install -q -e ./onnx-mlir/third_party/onnx
             cd onnx-mlir/build
-            cmake --build . --target check-onnx-backend
+            VERBOSE=1 cmake --build . --target check-onnx-backend
+      - run:
+          name: Run Unit Tests
+          command: |
+            cd onnx-mlir/build
+            # Need to include the bin directory in $PATH,
+            # otherwise CTest fails to find the test executables.
+            PATH=$(pwd)/bin:$PATH make test -j$(nproc)
       - run:
           name: Run DocCheck
           command: cd onnx-mlir/build && cmake --build . --target check-doc
diff --git a/.gitmodules b/.gitmodules
index cd8c28a..5305d49 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -10,3 +10,6 @@
 [submodule "third_party/variant"]
 	path = third_party/variant
 	url = https://github.com/mpark/variant.git
+[submodule "third_party/rapidcheck"]
+	path = third_party/rapidcheck
+	url = https://github.com/emil-e/rapidcheck.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9084b78..29aa571 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,6 +18,7 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 
+include(CTest)
 include(MLIR.cmake)
 
 if (MSVC)
@@ -29,6 +30,7 @@ add_subdirectory(third_party/onnx)
 add_subdirectory(third_party/benchmark)
 add_subdirectory(third_party/pybind11)
 add_subdirectory(third_party/variant)
+add_subdirectory(third_party/rapidcheck)
 
 set(CMAKE_CXX_STANDARD 14)
 
diff --git a/README.md b/README.md
index 377b87b..7c78209 100644
--- a/README.md
+++ b/README.md
@@ -60,7 +60,7 @@ export LLVM_PROJ_BUILD=$(pwd)/llvm-project/build
 
 mkdir onnx-mlir/build && cd onnx-mlir/build
 cmake ..
-cmake --build . --target onnx-mlir
+cmake --build .
 
 # Run FileCheck tests:
 export LIT_OPTS=-v
diff --git a/docs/README.md b/docs/README.md
index 5d5b2d1..bac5809 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -56,7 +56,7 @@ export LLVM_PROJ_BUILD=$(pwd)/llvm-project/build
 
 mkdir onnx-mlir/build && cd onnx-mlir/build
 cmake ..
-cmake --build . --target onnx-mlir
+cmake --build .
 
 # Run FileCheck tests:
 export LIT_OPTS=-v
diff --git a/docs/Testing.md b/docs/Testing.md
new file mode 100644
index 0000000..2f01c2e
--- /dev/null
+++ b/docs/Testing.md
@@ -0,0 +1,60 @@
+# Testing
+
+In onnx-mlir, there are three types of tests to ensure correctness of implementation:
+
+## ONNX Backend Tests
+
+TODO.
+
+## LLVM FileCheck Tests
+
+TODO.
+
+## Numerical Tests
+
+Numerical tests are used to test for numerical correctness in addition to the tests provided by the ONNX package.
+The goal is to provide extensive numerical value based unit tests; this is very important for ensuring that
+optimization transformations are valid and correct: more corner cases will arise as we specialize for specific 
+architecture parameters (like vector width). Numerical tests generates extensive amount of numerical value-based 
+unit tests based on simple, naive (and extremely slow) implementation of operations being tested, used to verify 
+the correctness of our operation lowering and optimization.
+
+Numerical tests should be structured such that the following two components are independent and separate:
+- Generation of test case parameters (for instance, the dimensions of convolutions N, C, H, W, kH, kW ...).
+- Checking that the values produced by onnx-mlir is consistent with those produced by naive implementation.
+
+The motivation is that there are two ways we want to generate test case parameters:
+- Exhaustive generation of test case parameters. Where we want to exhaustively test the correctness of a small range
+of parameters (for instance, if we would like to test and verify that 3x3 convolution is correctly implmented for
+all valid padding configurations.)
+- When the possible parameter space is extremely large, we can rely on RapidCheck to randomly generate test cases
+that becomes increasingly large as smaller test cases succeed. And it also automatically shrinks the test cases
+in the event that an error occurs. For example, the following RapidCheck test case automatically generates test
+case parameters (N from between 1 and 10, C from within 1 and 20 etc...). By default rc::check will draw 100 sets of
+test case parameters and invoke the value checking function `isOMConvTheSameAsNaiveImplFor`.
+
+```cpp
+  // RapidCheck test case generation.
+  rc::check("convolution implementation correctness", []() {
+    const auto N = *rc::gen::inRange(1, 10);
+    const auto C = *rc::gen::inRange(1, 20);
+    const auto H = *rc::gen::inRange(5, 20);
+    const auto W = *rc::gen::inRange(5, 20);
+
+    const auto kH = *rc::gen::inRange(1, 15);
+    const auto kW = *rc::gen::inRange(1, 15);
+
+    // We don't want an entire window of padding.
+    const auto pHBegin = *rc::gen::inRange(0, kH - 1);
+    const auto pHEnd = *rc::gen::inRange(0, kH - 1);
+    const auto pWBegin = *rc::gen::inRange(0, kW - 1);
+    const auto pWEnd = *rc::gen::inRange(0, kW - 1);
+
+    // Make sure we have at least 1 output per dimension.
+    RC_PRE((H >= kH) && (W > kW));
+
+    RC_ASSERT(isOMConvTheSameAsNaiveImplFor(
+        N, C, H, W, kH, kW, pHBegin, pHEnd, pWBegin, pWEnd));
+  });
+```
+  
\ No newline at end of file
diff --git a/docs/_data/navigation.yml b/docs/_data/navigation.yml
index 222d8ef..5b1c1bc 100644
--- a/docs/_data/navigation.yml
+++ b/docs/_data/navigation.yml
@@ -23,10 +23,10 @@ toc:
         url: /ImportONNXDefs.html
       - page: About Documentation
         url: /Documentation.html
-#  - title: Discussions
-#    subfolderitems:
-#      - page: Placeholder
-#        url: /piece1.html
+  - title: Discussions
+    subfolderitems:
+      - page: Testing Guidelines
+        url: /Testing.html
   - title: Tools
     subfolderitems:
       - page: debug.py - Debug Numerical Errors
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index ca16cdd..9f93e29 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -6,10 +6,18 @@ add_subdirectory(Tool)
 add_subdirectory(Builder)
 add_subdirectory(Runtime)
 
-add_executable(onnx-mlir
+add_library(MainUtils
         MainUtils.hpp
-        MainUtils.cpp
+        MainUtils.cpp)
+target_link_libraries(MainUtils onnx)
+
+target_include_directories(MainUtils PRIVATE ${ONNX_MLIR_SRC_ROOT})
+target_include_directories(MainUtils PRIVATE ${CMAKE_BINARY_DIR})
+target_include_directories(MainUtils PRIVATE ${ONNX_MLIR_BIN_ROOT})
+
+add_executable(onnx-mlir
         main.cpp)
+target_link_libraries(onnx-mlir MainUtils)
 
 # Locate llc, which is needed for translating LLVM bitcode
 # to object file.
@@ -38,8 +46,7 @@ endif()
 # So it's better not to use target_link_libraries for the add_subdirectory
 # targets, or only use it for libraries that have no further dependencies
 # (except system libraries such as libc).
-target_link_libraries(onnx-mlir
-        onnx
+target_link_libraries(MainUtils
         OMBuilder
         OMKrnlOps
         OMONNXOps
@@ -58,10 +65,9 @@ target_link_libraries(onnx-mlir
         ${CMAKE_DL_LIBS})
 
 if (INCLUDE_ONNX_ML)
-  target_link_libraries(onnx-mlir OMMLONNXOps)
+  target_link_libraries(MainUtils OMMLONNXOps)
 endif()
 
-
 target_include_directories(onnx-mlir PRIVATE ${ONNX_MLIR_SRC_ROOT})
 target_include_directories(onnx-mlir PRIVATE ${CMAKE_BINARY_DIR})
 target_include_directories(onnx-mlir PRIVATE ${ONNX_MLIR_BIN_ROOT})
diff --git a/src/MainUtils.cpp b/src/MainUtils.cpp
index 0e59994..2052008 100644
--- a/src/MainUtils.cpp
+++ b/src/MainUtils.cpp
@@ -176,7 +176,7 @@ void emitOutputFiles(string outputBaseName, EmissionTargetType emissionTarget,
   if (emissionTarget == EmitLib) {
     // Write LLVM bitcode to disk, compile & link.
     compileModuleToSharedLibrary(module, outputBaseName);
-    printf("Shared library %s.so has been compiled.", outputBaseName.c_str());
+    printf("Shared library %s.so has been compiled.\n", outputBaseName.c_str());
   } else {
     // Emit the version with all constants included.
     outputCode(module, outputBaseName, ".onnx.mlir");
@@ -203,3 +203,25 @@ void emitOutputFiles(string outputBaseName, EmissionTargetType emissionTarget,
     }
   }
 }
+
+int compileModule(mlir::OwningModuleRef &module, mlir::MLIRContext &context,
+    std::string outputBaseName, EmissionTargetType emissionTarget) {
+  mlir::PassManager pm(&context);
+  if (emissionTarget >= EmitONNXIR) {
+    addONNXToMLIRPasses(pm);
+  }
+
+  if (emissionTarget >= EmitMLIR) {
+    addONNXToKrnlPasses(pm);
+    addKrnlToAffinePasses(pm);
+  }
+
+  if (emissionTarget >= EmitLLVMIR)
+    addKrnlToLLVMPasses(pm);
+
+  if (mlir::failed(pm.run(*module)))
+    return 4;
+
+  emitOutputFiles(outputBaseName, emissionTarget, context, module);
+  return 0;
+}
diff --git a/src/MainUtils.hpp b/src/MainUtils.hpp
index c6c015d..2ccb60f 100644
--- a/src/MainUtils.hpp
+++ b/src/MainUtils.hpp
@@ -71,3 +71,6 @@ void outputCode(
 void emitOutputFiles(std::string outputBaseName,
     EmissionTargetType emissionTarget, mlir::MLIRContext &context,
     mlir::OwningModuleRef &module);
+
+int compileModule(mlir::OwningModuleRef &module, mlir::MLIRContext &context,
+    std::string outputBaseName, EmissionTargetType targetType);
diff --git a/src/Runtime/CMakeLists.txt b/src/Runtime/CMakeLists.txt
index 75f3c78..a5f2a55 100644
--- a/src/Runtime/CMakeLists.txt
+++ b/src/Runtime/CMakeLists.txt
@@ -4,19 +4,36 @@ add_library(cruntime SHARED
         DynMemRef.cpp
         DynMemRef.h
         DataType.h)
-target_include_directories(cruntime
-        PRIVATE ${ONNX_MLIR_SRC_ROOT} ${ONNX_MLIR_BIN_ROOT}
+
+add_library(DynMemRefUtils
+        DynMemRef.h
+        DynMemRef.cpp
+        DataType.h)
+
+add_library(ExecutionSession
+        ExecusionSession.hpp
+        ExecusionSession.cpp)
+target_link_libraries(ExecutionSession
+        ${CMAKE_DL_LIBS})
+target_include_directories(ExecutionSession PRIVATE
+        ${ONNX_MLIR_SRC_ROOT}
+        ${ONNX_MLIR_BIN_ROOT}
+        ${ONNX_MLIR_SRC_ROOT})
+set_target_properties(ExecutionSession PROPERTIES
+        POSITION_INDEPENDENT_CODE TRUE)
+
+pybind11_add_module(PyRuntime
+        PyExecutionSession.cpp
+        PyExecutionSession.hpp)
+target_link_libraries(PyRuntime PRIVATE
+        ${CMAKE_DL_LIBS}
+        ExecutionSession
+        DynMemRefUtils)
+target_include_directories(PyRuntime PRIVATE
+        ${ONNX_MLIR_SRC_ROOT}
+        ${ONNX_MLIR_BIN_ROOT}
         ${ONNX_MLIR_SRC_ROOT})
 
-pybind11_add_module(pyruntime
-        DynMemRef.cpp
-        DynMemRef.h
-        Runtime.cpp
-        Runtime.hpp)
-target_link_libraries(pyruntime PRIVATE ${CMAKE_DL_LIBS})
-target_include_directories(pyruntime
-        PRIVATE ${ONNX_MLIR_SRC_ROOT} ${ONNX_MLIR_BIN_ROOT}
-        ${ONNX_MLIR_SRC_ROOT})
-add_dependencies(pyruntime cruntime)
+add_dependencies(PyRuntime cruntime)
 install(FILES Runtime/DynMemRef.h DESTINATION include)
 install(TARGETS cruntime DESTINATION lib)
diff --git a/src/Runtime/DataType.h b/src/Runtime/DataType.h
index c9cc174..a6ec1f6 100644
--- a/src/Runtime/DataType.h
+++ b/src/Runtime/DataType.h
@@ -1,3 +1,13 @@
+//===---------------------- DataType.h - ONNX DataTypes -------------------===//
+//
+// Copyright 2019-2020 The IBM Research Authors.
+//
+// =============================================================================
+//
+// This file contains declaration of ONNX data types.
+//
+//===----------------------------------------------------------------------===//
+
 enum DYN_MEMREF_DATA_TYPE {
   UNDEFINED = 0;
   // Basic types.
diff --git a/src/Runtime/DynMemRef.cpp b/src/Runtime/DynMemRef.cpp
index b6c9b8c..9fa12cd 100644
--- a/src/Runtime/DynMemRef.cpp
+++ b/src/Runtime/DynMemRef.cpp
@@ -1,3 +1,14 @@
+//===----------- DynMemRef.cpp - Dynamic MemRef Implementation ------------===//
+//
+// Copyright 2019-2020 The IBM Research Authors.
+//
+// =============================================================================
+//
+// This file contains implementations of Dynamic MemRef data structures and
+// helper functions.
+//
+//===----------------------------------------------------------------------===//
+
 #include <cassert>
 #include <map>
 #include <string>
@@ -5,12 +16,78 @@
 
 #include "DynMemRef.h"
 
+namespace {
+// Helper function to compute cartisian product.
+inline std::vector<std::vector<INDEX_TYPE>> CartProduct(
+    const std::vector<std::vector<INDEX_TYPE>> &v) {
+  std::vector<std::vector<INDEX_TYPE>> s = {{}};
+  for (const auto &u : v) {
+    std::vector<std::vector<INDEX_TYPE>> r;
+    for (const auto &x : s) {
+      for (const auto y : u) {
+        r.push_back(x);
+        r.back().push_back(y);
+      }
+    }
+    s = move(r);
+  }
+  return s;
+}
+} // namespace
+
 DynMemRef::DynMemRef(int _rank) {
   rank = _rank;
   sizes = (INDEX_TYPE *)malloc(rank * sizeof(INDEX_TYPE));
   strides = (int64_t *)malloc(rank * sizeof(int64_t));
 }
 
+INDEX_TYPE DynMemRef::size() const {
+  return std::accumulate(sizes, sizes + rank, 1, std::multiplies<>());
+}
+
+std::vector<std::vector<INDEX_TYPE>> DynMemRef::indexSet() const {
+  // First, we create index set of each dimension separately.
+  // i.e., for a tensor/DMR of shape (2, 3), its dimWiseIdxSet will be:
+  // {{0,1}, {0,1,2}};
+  std::vector<std::vector<INDEX_TYPE>> dimWiseIdxSet;
+  for (auto dimSize : std::vector<INDEX_TYPE>(sizes, sizes + rank)) {
+    std::vector<INDEX_TYPE> dimIdxSet(dimSize);
+    std::iota(std::begin(dimIdxSet), std::end(dimIdxSet), 0);
+    dimWiseIdxSet.emplace_back(dimIdxSet);
+  }
+  // Then, the cartesian product of vectors within dimWiseIdxSet will be the
+  // index set for the whole DMR.
+  return CartProduct(dimWiseIdxSet);
+}
+
+INDEX_TYPE DynMemRef::computeOffset(std::vector<INDEX_TYPE> &idxs) const {
+  auto dimStrides = std::vector<INDEX_TYPE>(strides, strides + rank);
+  INDEX_TYPE elemOffset = std::inner_product(
+      idxs.begin(), idxs.end(), dimStrides.begin(), (INDEX_TYPE)0);
+  return elemOffset;
+}
+
+std::vector<int64_t> DynMemRef::computeStridesFromSizes() const {
+  // Shift dimension sizes one to the left, fill in the vacated rightmost
+  // element with 1; this gets us a vector that'll be more useful for computing
+  // strides of memory access along each dimension using prefix product (aka
+  // partial_sum with a multiply operator below). The intuition is that the size
+  // of the leading dimension does not matter when computing strides.
+  std::vector<int64_t> sizesVec(sizes + 1, sizes + rank);
+  sizesVec.push_back(1);
+
+  std::vector<int64_t> dimStrides(rank);
+  std::partial_sum(sizesVec.rbegin(), sizesVec.rend(), dimStrides.rbegin(),
+      std::multiplies<>());
+  return dimStrides;
+}
+
+DynMemRef::~DynMemRef() {
+  free(data);
+  free(sizes);
+  free(strides);
+}
+
 // An ordered dynamic MemRef dictionary.
 // The goal is to support accessing dynamic memory ref by name and by index.
 // Currently, only accessing by index is supported.
@@ -68,6 +145,10 @@ void setSizes(DynMemRef *dynMemRef, INDEX_TYPE *sizes) {
 
 int64_t *getStrides(DynMemRef *dynMemRef) { return dynMemRef->strides; }
 
+int64_t getSize(OrderedDynMemRefDict *dict) {
+  return dict->orderedNames.size();
+}
+
 void setStrides(DynMemRef *dynMemRef, int64_t *strides) {
   for (int i = 0; i < dynMemRef->rank; i++)
     dynMemRef->sizes[i] = strides[i];
diff --git a/src/Runtime/DynMemRef.h b/src/Runtime/DynMemRef.h
index 0ccae32..a384786 100644
--- a/src/Runtime/DynMemRef.h
+++ b/src/Runtime/DynMemRef.h
@@ -1,7 +1,23 @@
+//===------------ DynMemRef.h - Dynamic MemRef Implementation -------------===//
+//
+// Copyright 2019-2020 The IBM Research Authors.
+//
+// =============================================================================
+//
+// This file contains declaration of Dynamic MemRef data structures and helper
+// functions.
+//
+//===----------------------------------------------------------------------===//
+
 #ifdef __cplusplus
 #pragma once
 
+#include <algorithm>
 #include <cstdint>
+#include <iostream>
+#include <numeric>
+#include <random>
+#include <vector>
 #else
 #include <stdint.h>
 #endif
@@ -11,6 +27,7 @@ typedef int64_t INDEX_TYPE;
 // This is a dynamic version of memref.
 // The same struct can be used to represent memrefs of
 // all ranks and type combinations.
+// We will refer to it as a DMR (Dynamic MemRef).
 struct DynMemRef {
   void *data;
   void *alignedData;
@@ -21,7 +38,64 @@ struct DynMemRef {
   int64_t *strides;
 
 #ifdef __cplusplus
-  DynMemRef(int _rank);
+  explicit DynMemRef(int _rank);
+
+  // Create a full DMR of type T and shape _sizes, with all data fields
+  // initialized to proper values and data pointers malloc'ed.
+  template <typename T>
+  static DynMemRef *create(std::vector<INDEX_TYPE> _sizes) {
+    auto dmr = new DynMemRef(_sizes.size());
+    dmr->offset = 0;
+    dmr->rank = _sizes.size();
+    dmr->sizes = (INDEX_TYPE *)malloc(dmr->rank * sizeof(INDEX_TYPE));
+    std::copy(_sizes.begin(), _sizes.end(), dmr->sizes);
+
+    dmr->strides = (int64_t *)malloc(dmr->rank * sizeof(int64_t));
+    auto computedStrides = dmr->computeStridesFromSizes();
+    std::copy(computedStrides.begin(), computedStrides.end(), dmr->strides);
+
+    dmr->data = malloc(dmr->size() * sizeof(T));
+    dmr->alignedData = dmr->data;
+
+    return dmr;
+  }
+
+  // Access an element (by reference) at index position idxs.
+  template <typename T>
+  T &elem(std::vector<INDEX_TYPE> idxs) {
+    INDEX_TYPE elemOffset = computeOffset(idxs);
+    T *typedPtr = (T *)data;
+    return typedPtr[elemOffset];
+  }
+
+  // Access an element (by reference) at *flattened* index position idx.
+  template <typename T>
+  T &elem(INDEX_TYPE idx) {
+    T *typedPtr = (T *)data;
+    return typedPtr[idx];
+  }
+
+  // Get a typed ptr to the data content of the DMR.
+  template <typename T>
+  T *typedPtr() {
+    return (T *)data;
+  }
+
+  // Get how many elements are stored in DMR, as implied by its shape.
+  INDEX_TYPE size() const;
+
+  // Helper function to compute strides of access along each dimensions from its
+  // shape.
+  std::vector<int64_t> computeStridesFromSizes() const;
+
+  // Compute flattened array idx from a multi-dimensional array idx.
+  INDEX_TYPE computeOffset(std::vector<INDEX_TYPE> &idxs) const;
+
+  // Get the index set (i.e., all valid multi-dimensional array indexes that can
+  // be used to access this DMR's constituent elements).
+  std::vector<std::vector<INDEX_TYPE>> indexSet() const;
+
+  ~DynMemRef();
 #endif
 };
 
@@ -42,9 +116,12 @@ extern "C" {
 // Get number of dynamic memrefs in OrderedDynMemRefDict dict.
 int numDynMemRefs(OrderedDynMemRefDict *dict);
 
-// Create an ordered dynmemref dictionary.
+// Create an ordered dynamic memref dictionary.
 OrderedDynMemRefDict *createOrderedDynMemRefDict();
 
+// Get how many dynamic memrefs are in dict.
+int64_t getSize(OrderedDynMemRefDict *dict);
+
 // Create a dynmemref with a certain rank.
 DynMemRef *createDynMemRef(int rank);
 
@@ -75,4 +152,83 @@ int64_t *getStrides(DynMemRef *);
 
 #ifdef __cplusplus
 }
+
+template <typename T>
+void printVector(std::vector<T> vec, std::string _delimiter = ",",
+    std::ostream &stream = std::cout) {
+  std::string delimiter;
+  for (const auto &elem : vec) {
+    stream << delimiter << elem;
+    delimiter = _delimiter;
+  }
+}
+
+template <typename T>
+DynMemRef *getRndRealDmr(
+    std::vector<INDEX_TYPE> sizes, T lb = -1.0, T ub = 1.0) {
+  // Will be used to obtain a seed for the random number engine
+  std::random_device rd;
+  // Standard mersenne_twister_engine seeded with rd()
+  std::mt19937 gen(rd());
+  std::uniform_real_distribution<> dis(lb, ub);
+  auto dmr = DynMemRef::create<T>(sizes);
+  auto ptr = (T *)dmr->data;
+  std::generate(ptr, ptr + dmr->size(), [&]() { return dis(gen); });
+  return dmr;
+}
+
+template <typename T>
+inline bool isDmrClose(
+    DynMemRef *a, DynMemRef *b, float rtol = 1e-5, float atol = 1e-5) {
+
+  // Compare shape.
+  auto aShape = std::vector<INDEX_TYPE>(a->sizes, a->sizes + a->rank);
+  auto bShape = std::vector<INDEX_TYPE>(b->sizes, b->sizes + b->rank);
+  if (aShape != bShape) {
+    std::cerr << "Shape mismatch ";
+    printVector(aShape, ",", std::cerr);
+    std::cerr << " != ";
+    printVector(bShape, ",", std::cerr);
+    return false;
+  }
+
+  // Compute absolute difference, verify it's within tolerable range.
+  std::vector<T> absoluteDiff(a->size());
+  std::transform(a->typedPtr<T>(), a->typedPtr<T>() + a->size(),
+      b->typedPtr<T>(), absoluteDiff.begin(), std::minus<>());
+  std::transform(absoluteDiff.begin(), absoluteDiff.end(), absoluteDiff.begin(),
+      static_cast<T (*)(T)>(&std::abs));
+  bool atolSatisfied = std::all_of(
+      absoluteDiff.begin(), absoluteDiff.end(), [&](T a) { return a < atol; });
+
+  // Compute relative difference, verify it's within tolerable range.
+  std::vector<T> relativeDiff(a->size());
+  std::transform(absoluteDiff.begin(), absoluteDiff.end(), a->typedPtr<T>(),
+      relativeDiff.begin(), std::divides<>());
+  bool rtolSatisfied = std::all_of(
+      relativeDiff.begin(), relativeDiff.end(), [&](T a) { return a < rtol; });
+
+  if (atolSatisfied && rtolSatisfied) {
+    return true;
+  } else {
+    // Figure out where and what went wrong, this can be slow; but hopefully we
+    // don't need this often.
+    for (const auto &idx : a->indexSet()) {
+      T aElem = a->elem<T>(idx);
+      T bElem = b->elem<T>(idx);
+      auto elmAbsDiff = std::abs(aElem - bElem);
+      auto withinRtol = (elmAbsDiff / aElem < rtol);
+      auto withinAtol = (elmAbsDiff < atol);
+      if (!withinRtol || !withinAtol) {
+        std::cerr << "a[";
+        printVector(idx, ",", std::cerr);
+        std::cerr << "] = " << aElem << " != ";
+        std::cerr << "b[";
+        printVector(idx, ",", std::cerr);
+        std::cerr << "] = " << bElem << std::endl;
+      }
+    }
+    return false;
+  }
+}
 #endif
diff --git a/src/Runtime/ExecusionSession.cpp b/src/Runtime/ExecusionSession.cpp
new file mode 100644
index 0000000..33855d2
--- /dev/null
+++ b/src/Runtime/ExecusionSession.cpp
@@ -0,0 +1,64 @@
+//===------- ExecusionSession.cpp - ExecutionSession Implementation -------===//
+//
+// Copyright 2019-2020 The IBM Research Authors.
+//
+// =============================================================================
+//
+// This file contains implementations of ExecusionSession class, which helps C++
+// programs interact with compiled binary model libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <vector>
+
+#include "ExecusionSession.hpp"
+
+namespace onnx_mlir {
+
+ExecutionSession::ExecutionSession(
+    std::string sharedLibPath, std::string entryPointName) {
+  // Adapted from https://www.tldp.org/HOWTO/html_single/C++-dlopen/.
+  _sharedLibraryHandle = dlopen(sharedLibPath.c_str(), RTLD_LAZY);
+  if (!_sharedLibraryHandle) {
+    std::stringstream errStr;
+    errStr << "Cannot open library: " << dlerror() << std::endl;
+    throw std::runtime_error(errStr.str());
+  }
+
+  // Reset errors.
+  dlerror();
+  _entryPointFunc =
+      (entryPointFuncType)dlsym(_sharedLibraryHandle, entryPointName.c_str());
+  auto *dlsymError = dlerror();
+  if (dlsymError) {
+    std::stringstream errStr;
+    errStr << "Cannot load symbol '" << entryPointName << "': " << dlsymError
+           << std::endl;
+    dlclose(_sharedLibraryHandle);
+    throw std::runtime_error(errStr.str());
+  }
+}
+
+std::vector<std::unique_ptr<DynMemRef>> ExecutionSession::run(
+    std::vector<std::unique_ptr<DynMemRef>> ins) {
+  auto *wrappedInput = createOrderedDynMemRefDict();
+  for (size_t i = 0; i < ins.size(); i++)
+    setDynMemRef(wrappedInput, i, ins.at(i).get());
+
+  auto *wrappedOutput = _entryPointFunc(wrappedInput);
+
+  std::vector<std::unique_ptr<DynMemRef>> outs;
+  auto outputSize = getSize(wrappedOutput);
+
+  for (size_t i = 0; i < getSize(wrappedOutput); i++) {
+    outs.emplace_back(
+        std::unique_ptr<DynMemRef>(getDynMemRef(wrappedOutput, i)));
+  }
+  return std::move(outs);
+}
+
+ExecutionSession::~ExecutionSession() { dlclose(_sharedLibraryHandle); }
+} // namespace onnx_mlir
\ No newline at end of file
diff --git a/src/Runtime/ExecusionSession.hpp b/src/Runtime/ExecusionSession.hpp
new file mode 100644
index 0000000..0aeb71a
--- /dev/null
+++ b/src/Runtime/ExecusionSession.hpp
@@ -0,0 +1,40 @@
+//===--------- ExecusionSession.hpp - ExecutionSession Declaration --------===//
+//
+// Copyright 2019-2020 The IBM Research Authors.
+//
+// =============================================================================
+//
+// This file contains declarations of ExecusionSession class, which helps C++
+// programs interact with compiled binary model libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <cassert>
+#include <dlfcn.h>
+#include <string>
+
+#include "src/Runtime/DynMemRef.h"
+
+namespace onnx_mlir {
+
+typedef OrderedDynMemRefDict *(*entryPointFuncType)(OrderedDynMemRefDict *);
+
+class ExecutionSession {
+public:
+  ExecutionSession(std::string sharedLibPath, std::string entryPointName);
+
+  std::vector<std::unique_ptr<DynMemRef>> run(
+      std::vector<std::unique_ptr<DynMemRef>>);
+
+  ~ExecutionSession();
+
+protected:
+  // Handler to the shared library file being loaded.
+  void *_sharedLibraryHandle = nullptr;
+
+  // Entry point function.
+  entryPointFuncType _entryPointFunc = nullptr;
+};
+} // namespace onnx_mlir
\ No newline at end of file
diff --git a/src/Runtime/Runtime.cpp b/src/Runtime/PyExecutionSession.cpp
similarity index 61%
rename from src/Runtime/Runtime.cpp
rename to src/Runtime/PyExecutionSession.cpp
index bda80c0..9ac407c 100644
--- a/src/Runtime/Runtime.cpp
+++ b/src/Runtime/PyExecutionSession.cpp
@@ -1,30 +1,19 @@
-#include "Runtime.hpp"
+//===----- PyExecusionSession.hpp - PyExecutionSession Implementation -----===//
+//
+// Copyright 2019-2020 The IBM Research Authors.
+//
+// =============================================================================
+//
+// This file contains implementations of PyExecusionSession class, which helps
+// python programs interact with compiled binary model libraries.
+//
+//===----------------------------------------------------------------------===//
 
-ExecutionSession::ExecutionSession(
-    std::string sharedLibPath, std::string entryPointName) {
-  // Adapted from https://www.tldp.org/HOWTO/html_single/C++-dlopen/.
-  _sharedLibraryHandle = dlopen(sharedLibPath.c_str(), RTLD_LAZY);
-  if (!_sharedLibraryHandle) {
-    std::stringstream errStr;
-    errStr << "Cannot open library: " << dlerror() << std::endl;
-    throw std::runtime_error(errStr.str());
-  }
+#include "PyExecutionSession.hpp"
 
-  // Reset errors.
-  dlerror();
-  _entryPointFunc =
-      (entryPointFuncType)dlsym(_sharedLibraryHandle, entryPointName.c_str());
-  auto *dlsymError = dlerror();
-  if (dlsymError) {
-    std::stringstream errStr;
-    errStr << "Cannot load symbol '" << entryPointName << "': " << dlsymError
-           << std::endl;
-    dlclose(_sharedLibraryHandle);
-    throw std::runtime_error(errStr.str());
-  }
-}
+namespace onnx_mlir {
 
-std::vector<py::array> ExecutionSession::run(
+std::vector<py::array> PyExecutionSession::pyRun(
     std::vector<py::array> inputsPyArray) {
   assert(_entryPointFunc && "Entry point not loaded.");
   auto *wrappedInput = createOrderedDynMemRefDict();
@@ -65,5 +54,4 @@ std::vector<py::array> ExecutionSession::run(
 
   return outputPyArrays;
 }
-
-ExecutionSession::~ExecutionSession() { dlclose(_sharedLibraryHandle); }
+} // namespace onnx_mlir
\ No newline at end of file
diff --git a/src/Runtime/PyExecutionSession.hpp b/src/Runtime/PyExecutionSession.hpp
new file mode 100644
index 0000000..c8fed79
--- /dev/null
+++ b/src/Runtime/PyExecutionSession.hpp
@@ -0,0 +1,35 @@
+//===------ PyExecusionSession.hpp - PyExecutionSession Declaration -------===//
+//
+// Copyright 2019-2020 The IBM Research Authors.
+//
+// =============================================================================
+//
+// This file contains declaration of PyExecusionSession class, which helps
+// python programs interact with compiled binary model libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+namespace py = pybind11;
+
+#include "src/Runtime/ExecusionSession.hpp"
+
+namespace onnx_mlir {
+
+class PyExecutionSession : public onnx_mlir::ExecutionSession {
+public:
+  PyExecutionSession(std::string sharedLibPath, std::string entryPointName)
+      : onnx_mlir::ExecutionSession(sharedLibPath, entryPointName){};
+
+  std::vector<py::array> pyRun(std::vector<py::array> inputsPyArray);
+};
+} // namespace onnx_mlir
+
+PYBIND11_MODULE(PyRuntime, m) {
+  py::class_<onnx_mlir::PyExecutionSession>(m, "ExecutionSession")
+      .def(py::init<const std::string &, const std::string &>())
+      .def("run", &onnx_mlir::PyExecutionSession::pyRun);
+}
\ No newline at end of file
diff --git a/src/Runtime/Runtime.hpp b/src/Runtime/Runtime.hpp
deleted file mode 100644
index 898d968..0000000
--- a/src/Runtime/Runtime.hpp
+++ /dev/null
@@ -1,37 +0,0 @@
-#pragma once
-
-#include <cassert>
-#include <string>
-
-#include <dlfcn.h>
-#include <pybind11/numpy.h>
-#include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-
-#include "DynMemRef.h"
-
-namespace py = pybind11;
-
-typedef OrderedDynMemRefDict *(*entryPointFuncType)(OrderedDynMemRefDict *);
-
-class ExecutionSession {
-public:
-  ExecutionSession(std::string sharedLibPath, std::string entryPointName);
-
-  std::vector<py::array> run(std::vector<py::array> inputsPyArray);
-
-  ~ExecutionSession();
-
-private:
-  // Handler to the shared library file being loaded.
-  void *_sharedLibraryHandle = nullptr;
-
-  // Entry point function.
-  entryPointFuncType _entryPointFunc = nullptr;
-};
-
-PYBIND11_MODULE(pyruntime, m) {
-  py::class_<ExecutionSession>(m, "ExecutionSession")
-      .def(py::init<const std::string &, const std::string &>())
-      .def("run", &ExecutionSession::run);
-}
\ No newline at end of file
diff --git a/src/Transform/LowerToLLVM.cpp b/src/Transform/LowerToLLVM.cpp
index b03177d..a2a260b 100644
--- a/src/Transform/LowerToLLVM.cpp
+++ b/src/Transform/LowerToLLVM.cpp
@@ -646,6 +646,7 @@ void KrnlToLLVMLoweringPass::runOnOperation() {
   populateStdToLLVMConversionPatterns(typeConverter, patterns,
       /*emitCWrapperS=*/true,
       /*useAlignedAlloc=*/false);
+
   patterns.insert<KrnlGlobalOpLowering>(&getContext(), typeConverter);
 
   // Lower from the `krnl` dialect i.e. the Reshape operation.
diff --git a/src/main.cpp b/src/main.cpp
index ebde6ed..69e115c 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -48,23 +48,5 @@ int main(int argc, char *argv[]) {
   string outputBaseName =
       inputFilename.substr(0, inputFilename.find_last_of("."));
 
-  mlir::PassManager pm(&context);
-  if (emissionTarget >= EmitONNXIR) {
-    addONNXToMLIRPasses(pm);
-  }
-
-  if (emissionTarget >= EmitMLIR) {
-    addONNXToKrnlPasses(pm);
-    addKrnlToAffinePasses(pm);
-  }
-
-  if (emissionTarget >= EmitLLVMIR)
-    addKrnlToLLVMPasses(pm);
-
-  if (mlir::failed(pm.run(*module)))
-    return 4;
-
-  emitOutputFiles(outputBaseName, emissionTarget, context, module);
-
-  return 0;
+  return compileModule(module, context, outputBaseName, emissionTarget);
 }
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 2e49add..40f342f 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,2 +1,3 @@
 add_subdirectory(mlir)
-add_subdirectory(backend)
\ No newline at end of file
+add_subdirectory(backend)
+add_subdirectory(numerical)
\ No newline at end of file
diff --git a/test/backend/CMakeLists.txt b/test/backend/CMakeLists.txt
index 109d94f..201f02d 100644
--- a/test/backend/CMakeLists.txt
+++ b/test/backend/CMakeLists.txt
@@ -16,4 +16,4 @@ add_custom_target(check-onnx-backend
         ${CMAKE_CURRENT_BINARY_DIR}/test.py)
 
 add_dependencies(check-onnx-backend onnx-mlir)
-add_dependencies(check-onnx-backend pyruntime)
+add_dependencies(check-onnx-backend PyRuntime)
diff --git a/test/backend/test.py b/test/backend/test.py
index e6d352d..62f6a7d 100644
--- a/test/backend/test.py
+++ b/test/backend/test.py
@@ -23,7 +23,7 @@ LLC = os.path.join(test_config.LLVM_PROJ_BUILD_PATH, "bin/llc")
 doc_check_base_dir = os.path.dirname(os.path.realpath(__file__))
 RUNTIME_DIR = os.path.join(test_config.ONNX_MLIR_BUILD_PATH, "lib")
 sys.path.append(RUNTIME_DIR)
-from pyruntime import ExecutionSession
+from PyRuntime import ExecutionSession
 
 
 def execute_commands(cmds):
diff --git a/test/numerical/CMakeLists.txt b/test/numerical/CMakeLists.txt
new file mode 100644
index 0000000..af72e58
--- /dev/null
+++ b/test/numerical/CMakeLists.txt
@@ -0,0 +1,28 @@
+add_executable(TestConv TestConv.cpp)
+target_link_libraries(TestConv
+        OMBuilder
+        OMKrnlOps
+        OMONNXOps
+        OMShapeInference
+        OMShapeInferenceOpInterface
+        OMAttributePromotion
+        OMPromotableConstOperandsOpInterface
+        OMElideConstants
+        OMElideKrnlGlobalConstants
+        OMKrnlToAffine
+        OMKrnlToLLVM
+        OMONNXToKrnl
+        OMONNXRewrite
+        ${MLIRLibs}
+        ${CMAKE_DL_LIBS}
+        rapidcheck
+        MainUtils
+        ExecutionSession
+        DynMemRefUtils)
+whole_archive_link_mlir(TestConv ${MLIRWholeArchiveLibs})
+target_include_directories(TestConv
+        PRIVATE
+        ${ONNX_MLIR_SRC_ROOT}
+        ${ONNX_MLIR_BIN_ROOT}
+        ${ONNX_MLIR_SRC_ROOT})
+add_test(NAME OMTestConv COMMAND TestConv)
\ No newline at end of file
diff --git a/test/numerical/TestConv.cpp b/test/numerical/TestConv.cpp
new file mode 100644
index 0000000..c2cbd66
--- /dev/null
+++ b/test/numerical/TestConv.cpp
@@ -0,0 +1,162 @@
+#include <algorithm>
+#include <cmath>
+#include <iostream>
+#include <random>
+#include <rapidcheck.h>
+#include <string>
+#include <vector>
+
+#include "mlir/IR/Module.h"
+#include "llvm/Support/FileSystem.h"
+
+#include "src/Dialect/ONNX/ONNXOps.hpp"
+#include "src/MainUtils.hpp"
+#include "src/Runtime/ExecusionSession.hpp"
+
+using namespace std;
+
+// Returns whether onnx-mlir compiled convolution is producing the same results
+// as a naive implementation of convolution for a specific set of convolution
+// parameters/configuration.
+bool isOMConvTheSameAsNaiveImplFor(const int N, const int C, const int H,
+    const int W, const int kH, const int kW, const int pHBegin, const int pHEnd,
+    const int pWBegin, const int pWEnd) {
+  registerDialects();
+  MLIRContext ctx;
+
+  auto module = ModuleOp::create(UnknownLoc::get(&ctx));
+  OpBuilder builder(&ctx);
+  llvm::SmallVector<int64_t, 4> xShape = {N, C, H, W};
+  llvm::SmallVector<int64_t, 1> bShape = {C};
+  llvm::SmallVector<int64_t, 4> wShape = {C, C, kH, kW};
+  auto xType = RankedTensorType::get(xShape, builder.getF32Type());
+  auto wType = RankedTensorType::get(wShape, builder.getF32Type());
+  auto yType = UnrankedTensorType::get(builder.getF32Type());
+
+  llvm::SmallVector<Type, 2> inputsType{xType, wType};
+  llvm::SmallVector<Type, 1> outputsType{yType};
+
+  auto funcType = builder.getFunctionType(inputsType, outputsType);
+  string funcName = "test_conv";
+  llvm::SmallVector<NamedAttribute, 1> attrs;
+  auto funcOp =
+      builder.create<FuncOp>(UnknownLoc::get(&ctx), funcName, funcType, attrs);
+
+  auto entryBlock = funcOp.addEntryBlock();
+  builder.setInsertionPointToStart(entryBlock);
+
+  auto xVal = entryBlock->getArgument(0);
+  auto wVal = entryBlock->getArgument(1);
+  auto bVal =
+      builder.create<ConstantOp>(UnknownLoc::get(&ctx), builder.getUnitAttr())
+          .getResult();
+
+  auto dilations = builder.getI64ArrayAttr({1, 1});
+  auto kernel_shape = builder.getI64ArrayAttr({kH, kW});
+  auto pads = builder.getI64ArrayAttr({pHBegin, pWBegin, pHEnd, pWEnd});
+  auto strides = builder.getI64ArrayAttr({1, 1});
+
+  auto convOp = builder.create<ONNXConvOp>(UnknownLoc::get(&ctx),
+      /*Y=*/yType,
+      /*X=*/xVal, /*W=*/wVal, /*B=*/bVal,
+      /*auto_pad=*/builder.getStringAttr("NOTSET"),
+      /*dilations=*/dilations, /*group=*/builder.getI64IntegerAttr(1),
+      /*kernel_shape=*/kernel_shape, /*pads=*/pads,
+      /*strides=*/strides);
+
+  // Use the convOp shape inference method to compute output shape, and unset
+  // the shape so that we don't leave IR in a inconsistent state.
+  convOp.inferShapes();
+  auto outputShape = convOp.getResult().getType().cast<ShapedType>().getShape();
+  auto NOut = outputShape[0];
+  auto COut = outputShape[1];
+  auto HOut = outputShape[2];
+  auto WOut = outputShape[3];
+  convOp.getResult().setType(yType);
+
+  llvm::SmallVector<Value, 1> results = {convOp.getResult()};
+  builder.create<ReturnOp>(UnknownLoc::get(&ctx), results);
+  module.push_back(funcOp);
+
+  // Emit the entry point operation which specifies the number of user
+  // inputs and outputs.
+  auto entryPoint = ONNXEntryPointOp::create(UnknownLoc::get(&ctx), funcOp,
+      /*numInputs=*/2,
+      /*numOutputs=*/1);
+  module.push_back(entryPoint);
+
+  OwningModuleRef moduleRef(module);
+
+  llvm::SmallVector<char, 10> path;
+  llvm::sys::fs::createTemporaryFile("_test_conv", "", path);
+  string pathStr(path.begin(), path.end());
+  llvm::FileRemover remover(path);
+
+  compileModule(moduleRef, ctx, pathStr, EmitLib);
+  onnx_mlir::ExecutionSession sess(
+      pathStr + ".so", "_dyn_entry_point_test_conv");
+
+  std::vector<unique_ptr<DynMemRef>> inputs;
+  auto xDmr = unique_ptr<DynMemRef>(getRndRealDmr<float>({N, C, H, W}));
+  inputs.emplace_back(move(xDmr));
+  auto wDmr = unique_ptr<DynMemRef>(getRndRealDmr<float>({C, C, kH, kW}));
+  inputs.emplace_back(move(wDmr));
+
+  auto ref = DynMemRef::create<float>({NOut, COut, HOut, WOut});
+  auto &img = inputs.at(0);
+  auto &filter = inputs.at(1);
+  for (int64_t n = 0; n < NOut; n++)
+    for (int64_t c = 0; c < COut; c++)
+      for (int64_t h = 0; h < HOut; h++)
+        for (int64_t w = 0; w < WOut; w++) {
+          ref->elem<float>({n, c, h, w}) = 0;
+          for (int64_t ci = 0; ci < C; ci++)
+            for (int64_t kh = 0; kh < kH; kh++)
+              for (int64_t kw = 0; kw < kW; kw++)
+                if ((h + kh - pHBegin >= 0 && h + kh - pHBegin < H) &&
+                    (w + kw - pWBegin >= 0 && w + kw - pWBegin < W))
+                  ref->elem<float>({n, c, h, w}) +=
+                      img->elem<float>(
+                          {n, ci, h + kh - pHBegin, w + kw - pWBegin}) *
+                      filter->elem<float>({c, ci, kh, kw});
+        }
+
+  auto outputs = sess.run(move(inputs));
+  auto &conv = outputs.at(0);
+
+  return isDmrClose<float>(conv.get(), ref);
+}
+
+int main() {
+  // RapidCheck test case generation.
+  rc::check("convolution implementation correctness", []() {
+    const auto N = *rc::gen::inRange(1, 10);
+    const auto C = *rc::gen::inRange(1, 20);
+    const auto H = *rc::gen::inRange(5, 20);
+    const auto W = *rc::gen::inRange(5, 20);
+
+    const auto kH = *rc::gen::inRange(1, 15);
+    const auto kW = *rc::gen::inRange(1, 15);
+
+    // We don't want an entire window of padding.
+    const auto pHBegin = *rc::gen::inRange(0, kH - 1);
+    const auto pHEnd = *rc::gen::inRange(0, kH - 1);
+    const auto pWBegin = *rc::gen::inRange(0, kW - 1);
+    const auto pWEnd = *rc::gen::inRange(0, kW - 1);
+
+    // Make sure we have at least 1 output per dimension.
+    RC_PRE((H >= kH) && (W > kW));
+
+    RC_ASSERT(isOMConvTheSameAsNaiveImplFor(
+        N, C, H, W, kH, kW, pHBegin, pHEnd, pWBegin, pWEnd));
+  });
+
+  // Exhaustive test case generation.
+  for (int pHBegin = 0; pHBegin < 3; pHBegin++)
+    for (int pHEnd = 0; pHEnd < 3; pHEnd++)
+      for (int pWBegin = 0; pWBegin < 3; pWBegin++)
+        for (int pWEnd = 0; pWEnd < 3; pWEnd++)
+          assert(isOMConvTheSameAsNaiveImplFor(
+              2, 4, 5, 5, 3, 3, pHBegin, pHEnd, pWBegin, pWEnd));
+  return 0;
+}
diff --git a/third_party/rapidcheck b/third_party/rapidcheck
new file mode 160000
index 0000000..7bc7d30
--- /dev/null
+++ b/third_party/rapidcheck
@@ -0,0 +1 @@
+Subproject commit 7bc7d302191a4f3d0bf005692677126136e02f60
diff --git a/utils/debug.py b/utils/debug.py
index 456c3f7..d8d987d 100644
--- a/utils/debug.py
+++ b/utils/debug.py
@@ -63,7 +63,7 @@ def main(model_path):
 
     with tempfile.TemporaryDirectory() as temp_dir:
         print("Temporary directory has been created at {}".format(temp_dir))
-
+        
         # Save modified model & invoke onnx-mlir to compile it.
         temp_model_path = os.path.join(temp_dir, "model.onnx")
         onnx.save(model, temp_model_path)
diff --git a/utils/install-onnx-mlir.sh b/utils/install-onnx-mlir.sh
index afb9491..2e10e35 100755
--- a/utils/install-onnx-mlir.sh
+++ b/utils/install-onnx-mlir.sh
@@ -4,7 +4,7 @@ export LLVM_PROJ_BUILD=$(pwd)/llvm-project/build
 
 mkdir onnx-mlir/build && cd onnx-mlir/build
 cmake ..
-cmake --build . --target onnx-mlir
+cmake --build .
 
 # Run FileCheck tests:
 export LIT_OPTS=-v