feat(tensor): support external buffer when creating input/output tensors (#389)

* support external buffer when creating input/output tensors * feat(tensor): add new map/unmap APIs
2022-05-18 23:38:26 +08:00 · 2022-05-18 23:38:26 +08:00 · f8741b4704
parent a9764291b0
commit f8741b4704
6 changed files with 140 additions and 6 deletions
--- a/include/tim/vx/graph.h
+++ b/include/tim/vx/graph.h
@ -45,6 +45,12 @@ class Graph {

  virtual std::shared_ptr<Tensor> CreateTensor(const TensorSpec& spec,
                                               const DmaBufferDesc& dmafd) = 0;
+
+  /// Create a tensor with given `TensorSpec`.
+  /// spec.attr_ must be TensorAttribute::Input or Output
+  virtual std::shared_ptr<Tensor> CreateIOTensor(const TensorSpec& spec,
+                                               void* data = nullptr) = 0;
+
  /// Create a placeholder tensor for optional inputs of operations
  virtual std::shared_ptr<Tensor> CreateTensorPlaceHolder() = 0;

--- a/include/tim/vx/tensor.h
+++ b/include/tim/vx/tensor.h
@ -134,6 +134,10 @@ class Tensor {
  virtual uint32_t GetId() = 0;
  virtual bool CopyDataToTensor(const void* data, uint32_t size_in_bytes = 0) = 0;
  virtual bool CopyDataFromTensor(void* data) = 0;
+  virtual bool FlushCacheForHandle() = 0;
+  virtual bool InvalidateCacheForHandle() = 0;
+  virtual void* map(bool invalidate_cpu_cache = false) = 0;
+  virtual void unmap() = 0;
  virtual bool IsPlaceHolder() = 0;
  virtual bool IsConstTensor() = 0;
  virtual const void* GetDataRef() const = 0;
--- a/src/tim/vx/graph.cc
+++ b/src/tim/vx/graph.cc
@ -141,6 +141,11 @@ std::shared_ptr<Tensor> GraphImpl::CreateTensor(const TensorSpec& spec,
  return std::make_shared<TensorImpl>(this, spec, dmafd);
 }

+std::shared_ptr<Tensor> GraphImpl::CreateIOTensor(const TensorSpec& spec,
+                                                void* data) {
+  return std::make_shared<TensorImpl>(this, spec, data);
+}
+
 std::shared_ptr<Tensor> GraphImpl::CreateTensorPlaceHolder() {
  if (!tensor_placeholder_) {
    tensor_placeholder_ = std::make_shared<TensorPlaceholder>(this);
--- a/src/tim/vx/graph_private.h
+++ b/src/tim/vx/graph_private.h
@ -70,6 +70,8 @@ class GraphImpl : public Graph {
                                       const void* data = nullptr) override;
  std::shared_ptr<Tensor> CreateTensor(const TensorSpec& spec,
                                       const DmaBufferDesc& dmafd) override;
+  std::shared_ptr<Tensor> CreateIOTensor(const TensorSpec& spec,
+                                       void* data = nullptr) override;
  std::shared_ptr<Tensor> CreateTensorPlaceHolder() override;

  bool Compile() override;
--- a/src/tim/vx/tensor.cc
+++ b/src/tim/vx/tensor.cc
@ -79,18 +79,35 @@ TensorImpl::TensorImpl(Graph* graph, const TensorSpec& spec, const void* data)
    : graph_(reinterpret_cast<GraphImpl*>(graph)),
      id_(VSI_NN_TENSOR_ID_NA),
      spec_(spec),
-      data_(data) {
+      data_(const_cast<void *>(data)) {
  Init();
+  if (spec_.attr_ & (TensorAttribute::INPUT | TensorAttribute::OUTPUT)) {
+    data_ = nullptr; // it's not needed to reset it in a constant tensor
+  }
 }

 TensorImpl::TensorImpl(Graph* graph, const TensorSpec& spec, const DmaBufferDesc& dmafd)
    : graph_(reinterpret_cast<GraphImpl*>(graph)),
      id_(VSI_NN_TENSOR_ID_NA),
      spec_(spec),
+      data_(nullptr),
      fd_(dmafd.fd) {
  Init();
 }

+TensorImpl::TensorImpl(Graph* graph, const TensorSpec& spec, void* data)
+    : graph_(reinterpret_cast<GraphImpl*>(graph)),
+      id_(VSI_NN_TENSOR_ID_NA),
+      spec_(spec),
+      data_(nullptr) {
+  if (!(spec_.attr_ & (TensorAttribute::INPUT | TensorAttribute::OUTPUT))) {
+    VSILOGE("TensorImpl with an external data got unexpected attr");
+    return;
+  }
+  Init(data);
+  data_ = data;
+}
+
 TensorImpl::~TensorImpl() {}

 bool TensorImpl::CopyDataToTensor(const void* data, uint32_t size_in_bytes) {
@ -167,7 +184,95 @@ bool TensorImpl::CopyDataFromTensor(void* data) {
  return retn;
 }

-bool TensorImpl::Init() {
+bool TensorImpl::FlushCacheForHandle() {
+  if (!(spec_.attr_ & TensorAttribute::INPUT)) {
+    return false;
+  }
+
+  bool retn = true;
+  if (VSI_NN_TENSOR_ID_NA != id_) {
+    retn = false;
+    vsi_nn_tensor_t* tensor = vsi_nn_GetTensor(graph_->graph(), id_);
+    if (tensor && tensor->attr.is_created_from_handle) {
+      retn = (VSI_SUCCESS == vsi_nn_FlushHandle(tensor));
+      if (!retn) {
+        VSILOGE("FlushHandle fail");
+      }
+    }
+  }
+  return retn;
+}
+
+bool TensorImpl::InvalidateCacheForHandle() {
+  if (!(spec_.attr_ & TensorAttribute::OUTPUT)) {
+    return false;
+  }
+
+  bool retn = true;
+  if (VSI_NN_TENSOR_ID_NA != id_) {
+    retn = false;
+    vsi_nn_tensor_t* tensor = vsi_nn_GetTensor(graph_->graph(), id_);
+    if (tensor && tensor->attr.is_created_from_handle) {
+      void* ptr = NULL;
+      retn = (VSI_SUCCESS == vsi_nn_GetTensorHandle(tensor, &ptr));
+      if (!retn) {
+        VSILOGE("GetTensorHandle fail");
+      }
+    }
+  }
+  return retn;
+}
+
+void* TensorImpl::map(bool invalidate_cpu_cache) {
+  if (!(spec_.attr_ & (TensorAttribute::INPUT | TensorAttribute::OUTPUT))) {
+    return nullptr;
+  }
+
+  void* cpu_ptr = nullptr;
+  if (VSI_NN_TENSOR_ID_NA != id_) {
+    vsi_nn_tensor_t* tensor = vsi_nn_GetTensor(graph_->graph(), id_);
+    if (tensor && tensor->attr.is_created_from_handle) {
+      // Here `cpu_cache` means L1/L2/... cache on a CPU chip.
+      // If data_ has been updated by other devices like NPU,
+      // then caches on CPU MUST be invalidated before reading.
+      if (data_ && !invalidate_cpu_cache) {
+        cpu_ptr = data_;
+      } else {
+        vsi_nn_GetTensorHandle(tensor, &cpu_ptr);
+        // TODO: what to do when fd_ != -1
+      }
+      if (!cpu_ptr) {
+        VSILOGE("GetTensorHandle fail");
+      }
+    }
+  }
+  return cpu_ptr;
+}
+
+void TensorImpl::unmap() {
+  if (!(spec_.attr_ & (TensorAttribute::INPUT | TensorAttribute::OUTPUT))) {
+    return;
+  }
+  if (VSI_NN_TENSOR_ID_NA == id_) {
+    return;
+  }
+  if (-1 == fd_) {
+    if (data_ && spec_.attr_ & TensorAttribute::INPUT) {
+      // Here data_ is an external buffer and may have been updated
+      vsi_nn_tensor_t* tensor = vsi_nn_GetTensor(graph_->graph(), id_);
+      if ( tensor && tensor->attr.is_created_from_handle) {
+        bool retn = (VSI_SUCCESS == vsi_nn_FlushHandle(tensor));
+        if (!retn) {
+          VSILOGE("FlushHandle fail");
+        }
+      }
+    }
+    return;
+  }
+  // TODO: unmap fd_
+}
+
+bool TensorImpl::Init(void *external_cache) {
  vsi_nn_tensor_attr_t attr;

  memset(&attr, 0x00, sizeof(attr));
@ -198,11 +303,11 @@ bool TensorImpl::Init() {
        graph_->graph(),
        VSI_NN_TENSOR_ID_AUTO,  // DMABUF's fd is created by TensorFromHandle as input or output,
        &attr,
-        fd_ != -1 ? (uint8_t*)fd_ : nullptr);  // and cannot be set to const
+        fd_ != -1 ? (uint8_t*)fd_ : (uint8_t*)external_cache); // and cannot be set to const
 #else
    if (-1 == fd_) {
      id_ = vsi_nn_AddTensorFromHandle(graph_->graph(), VSI_NN_TENSOR_ID_AUTO,
-                                       &attr, nullptr);
+                                       &attr, (uint8_t*)external_cache);
    } else {
      id_ = 0xFFFFFFFF;
      VSILOGE("Create tensor fail: low-level driver doesn't support dmabuffer");
--- a/src/tim/vx/tensor_private.h
+++ b/src/tim/vx/tensor_private.h
@ -34,9 +34,10 @@ class TensorImpl : public Tensor {
 public:
  TensorImpl(Graph* graph, const TensorSpec& spec, const void* data = nullptr);
  TensorImpl(Graph* graph, const TensorSpec& spec, const DmaBufferDesc& dmafd);
+  TensorImpl(Graph* graph, const TensorSpec& spec, void* data = nullptr);
  ~TensorImpl();

-  bool Init();
+  bool Init(void *external_cache = nullptr);
  bool IsWriteable();
  bool IsReadable();

@ -47,6 +48,10 @@ class TensorImpl : public Tensor {
  uint32_t GetId();
  bool CopyDataToTensor(const void* data, uint32_t size = 0);
  bool CopyDataFromTensor(void* data);
+  bool FlushCacheForHandle();
+  bool InvalidateCacheForHandle();
+  void* map(bool invalidate_cpu_cache = false);
+  void unmap();
  bool IsPlaceHolder() { return false; }
  bool IsConstTensor() {
    return spec_.attr_ == tim::vx::TensorAttribute::CONSTANT;
@ -56,7 +61,7 @@ class TensorImpl : public Tensor {
  GraphImpl* graph_;
  vsi_nn_tensor_id_t id_;
  TensorSpec spec_;
-  const void* data_;
+  void* data_;
  int64_t fd_{-1};
 };

@ -78,6 +83,13 @@ class TensorPlaceholder : public Tensor {
    (void)data;
    return false;
  }
+  bool InvalidateCacheForHandle() { return false; }
+  bool FlushCacheForHandle() { return false; }
+  void* map(bool invalidate_cpu_cache = false) {
+    (void)invalidate_cpu_cache;
+    return nullptr;
+  }
+  void unmap() { return; }
  bool IsPlaceHolder() { return true; }
  bool IsConstTensor() {
    return spec_.attr_ == tim::vx::TensorAttribute::CONSTANT;