diff --git a/imperative/python/megengine/core/tensor/megbrain_graph.py b/imperative/python/megengine/core/tensor/megbrain_graph.py
index 3f9a897309f2c078f7395030e5206365b2458bdd..c12fee0a795506069283dcd18ee48d151fe4b29b 100644
--- a/imperative/python/megengine/core/tensor/megbrain_graph.py
+++ b/imperative/python/megengine/core/tensor/megbrain_graph.py
@@ -529,7 +529,11 @@ class InputNode(OpNode):
 
     @property
     def device(self):
-        return self.outputs[0].device
+        var = self.outputs[0]
+        if isinstance(var, VarNode):
+            return var.device
+        else:
+            return var.comp_node
 
     @property
     def dtype(self):
diff --git a/imperative/python/megengine/device.py b/imperative/python/megengine/device.py
index 0ba39d47a8f734678c0a2c33acda0d8d490cbe41..853c3ec27b372c1bd146b372c0e8536f43caa58a 100644
--- a/imperative/python/megengine/device.py
+++ b/imperative/python/megengine/device.py
@@ -36,6 +36,10 @@ def _str2device_type(type_str: str, allow_unspec: bool = True):
         return DeviceType.CPU
     elif type_str == "GPU" or type_str == "CUDA":
         return DeviceType.CUDA
+    elif type_str == "CAMBRICON":
+        return DeviceType.CAMBRICON
+    elif type_str == "ATLAS":
+        return DeviceType.ATLAS
     else:
         assert allow_unspec and str == "XPU", "device type can only be cpu, gpu or xpu"
         return DeviceType.UNSPEC
@@ -65,6 +69,24 @@ def is_cuda_available() -> bool:
     return CompNode._get_device_count(t, False) > 0
 
 
+def is_cambricon_available() -> bool:
+    """
+    Returns whether cambricon device is available on this system.
+
+    """
+    t = _str2device_type("cambricon")
+    return CompNode._get_device_count(t, False) > 0
+
+
+def is_atlas_available() -> bool:
+    """
+    Returns whether atlas device is available on this system.
+
+    """
+    t = _str2device_type("atlas")
+    return CompNode._get_device_count(t, False) > 0
+
+
 def set_default_device(device: str = "xpux"):
     r"""
     Sets default computing node.
diff --git a/imperative/python/megengine/functional/external.py b/imperative/python/megengine/functional/external.py
index c1e6373f62cd7ed664315fa49b41b98b7b9eb4f2..8fba2e0cccda6a3d2757b457ab66c23adfa2141b 100644
--- a/imperative/python/megengine/functional/external.py
+++ b/imperative/python/megengine/functional/external.py
@@ -20,3 +20,30 @@ def tensorrt_runtime_opr(inputs, *, data: bytes = None):
     op = builtin.TensorRTRuntime(data, len(data))
     # return sequence of outputs
     return apply(op, *inputs)
+
+
+def cambricon_runtime_opr(inputs, data, symbol, tensor_dim_mutable):
+    r"""
+    Load a serialized Cambricon model as a runtime operator in MegEngine.
+
+    :param inputs: list of input tensors.
+    :param data: the serialized Cambricon model.
+    :param symbol: name of the function in Cambricon model.
+    :param tensor_dim_mutable: whether the input tensors' shapes are mutable
+        in ``cnrtModel_t``.
+    """
+
+    op = builtin.CambriconRuntime(data, len(data), symbol, tensor_dim_mutable)
+    return apply(op, *inputs)
+
+
+def atlas_runtime_opr(inputs, data):
+    r"""
+    Load a serialized Atlas model as a runtime operator in MegEngine.
+
+    :param inputs: list of input tensors.
+    :param data: the serialized Atlas model.
+    """
+
+    op = builtin.AtlasRuntime(data, len(data))
+    return apply(op, *inputs)
diff --git a/imperative/python/megengine/jit/tracing.py b/imperative/python/megengine/jit/tracing.py
index fc8f12e8687b71532d91b2219f22460814389377..8fddd0420138f31ff2710dd6365e547d359752a2 100644
--- a/imperative/python/megengine/jit/tracing.py
+++ b/imperative/python/megengine/jit/tracing.py
@@ -786,7 +786,11 @@ class trace:
             )
         output_names = output_names or self._output_names
 
-        dumped_device = as_device("xpux")
+        def dumped_device(info):
+            device_name = info.device.logical_name
+            if device_name[:3] in ("cpu", "gpu", "xpu"):
+                return as_device("xpux")
+            return info.device
 
         h2v = {}
         graph = G.Graph()
@@ -794,19 +798,21 @@ class trace:
         # apply graph_opt_level in dump
         if self._graph_opt_level is not None:
             graph.options.graph_opt_level = self._graph_opt_level
-
         for i, h in enumerate(self._arg_bindings):
             info = self._tinfo[h]
             h2v[h] = graph.make_h2d(
                 dtype=info.dtype,
-                device=dumped_device,
+                device=dumped_device(info),
                 shape=info.shape or (1,),
                 name=arg_names[i] if arg_names else None,
             )
         for k, h in self._kwarg_bindings.items():
             info = self._tinfo[h]
             h2v[h] = graph.make_h2d(
-                dtype=info.dtype, device=dumped_device, shape=info.shape or (1,), name=k
+                dtype=info.dtype,
+                device=dumped_device(info),
+                shape=info.shape or (1,),
+                name=k,
             )
 
         for op, ihandles, ohandles in self._seq:
@@ -833,7 +839,7 @@ class trace:
                     h2v[h] = graph.make_const(
                         info.bound_data.numpy(),
                         dtype=info.dtype,
-                        device=dumped_device,
+                        device=dumped_device(info),
                         name=info.name,
                     )
                 ivars.append(h2v[h])
diff --git a/imperative/python/megengine/module/external.py b/imperative/python/megengine/module/external.py
index c28595d9491288911b3707c59820124b7a242554..0bd94cc3bcd508ebe4b8a41ad9c246fb0a216856 100644
--- a/imperative/python/megengine/module/external.py
+++ b/imperative/python/megengine/module/external.py
@@ -9,7 +9,11 @@
 # pylint: disable=redefined-builtin
 import numpy as np
 
-from ..functional.external import tensorrt_runtime_opr
+from ..functional.external import (
+    atlas_runtime_opr,
+    cambricon_runtime_opr,
+    tensorrt_runtime_opr,
+)
 from .module import Module
 
 
@@ -33,3 +37,52 @@ class TensorrtRuntimeSubgraph(Module):
 
     def forward(self, *inputs):
         return tensorrt_runtime_opr(inputs, data=self._data)
+
+
+class CambriconRuntimeSubgraph(Module):
+    r"""Load a serialized CambriconRuntime subgraph.
+
+    See :func:`~.cambricon_runtime_opr` for more details.
+    """
+
+    def __init__(self, data, symbol, tensor_dim_mutable, **kwargs):
+        super(CambriconRuntimeSubgraph, self).__init__(**kwargs)
+        self._data = data
+        self.symbol = symbol
+        self.tensor_dim_mutable = tensor_dim_mutable
+
+    @property
+    def data(self):
+        return self._data
+
+    @data.setter
+    def data(self, val):
+        self._data = np.frombuffer(val, dtype=np.uint8)
+
+    def forward(self, *inputs):
+        outputs = cambricon_runtime_opr(
+            inputs, self._data, self.symbol, self.tensor_dim_mutable
+        )
+        return outputs
+
+
+class AtlasRuntimeSubgraph(Module):
+    r"""Load a serialized AtlasRuntime subgraph.
+
+    See :func:`~.atlas_runtime_opr` for more details.
+    """
+
+    def __init__(self, data, **kwargs):
+        super(AtlasRuntimeSubgraph, self).__init__(**kwargs)
+        self._data = data
+
+    @property
+    def data(self):
+        return self._data
+
+    @data.setter
+    def data(self, val):
+        self._data = np.frombuffer(val, dtype=np.uint8)
+
+    def forward(self, *inputs):
+        return atlas_runtime_opr(inputs, data=self._data)
diff --git a/imperative/python/megengine/utils/comp_graph_tools.py b/imperative/python/megengine/utils/comp_graph_tools.py
index c7262e0a2a7ca907ba00692eaf149916d2fd4629..614a8812aae61140b914209ccbc49ef02e11ebce 100644
--- a/imperative/python/megengine/utils/comp_graph_tools.py
+++ b/imperative/python/megengine/utils/comp_graph_tools.py
@@ -427,8 +427,9 @@ class GraphInference:
             list(self._inp_dict.keys()), list(inputs.keys())
         )
         for key in self._inp_dict:
-            self._inp_dict[key].set_value(Tensor(inputs[key])._dev_tensor())
-
+            self._inp_dict[key].set_value(
+                Tensor(inputs[key], device=self._inp_dict[key].device)._dev_tensor()
+            )
         self._func.execute()
         self._func.wait()
 
diff --git a/imperative/python/src/common.cpp b/imperative/python/src/common.cpp
index 5cede8a9afd0996842cbaece1254322a930030b9..f08a7498b07dc8466c78577f684a977d72718c3b 100644
--- a/imperative/python/src/common.cpp
+++ b/imperative/python/src/common.cpp
@@ -171,6 +171,8 @@ void init_common(py::module m) {
             .value("UNSPEC", CompNode::DeviceType::UNSPEC)
             .value("CUDA", CompNode::DeviceType::CUDA)
             .value("CPU", CompNode::DeviceType::CPU)
+            .value("CAMBRICON", CompNode::DeviceType::CAMBRICON)
+            .value("ATLAS", CompNode::DeviceType::ATLAS)
             .value("MULTITHREAD", CompNode::DeviceType::MULTITHREAD)
             .value("MAX_DEVICE_ID", CompNode::DeviceType::MAX_DEVICE_ID);
 
diff --git a/imperative/src/impl/ops/atlas_runtime.cpp b/imperative/src/impl/ops/atlas_runtime.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9afc758090af2d0a319d7d87638ed149a5a13ab8
--- /dev/null
+++ b/imperative/src/impl/ops/atlas_runtime.cpp
@@ -0,0 +1,36 @@
+/**
+ * \file imperative/src/impl/ops/tensorrt_runtime.cpp
+ * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
+ *
+ * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ */
+#include "../op_trait.h"
+#include "megbrain/imperative/ops/autogen.h"
+
+#if MGB_ATLAS
+#include "megbrain/opr/atlas_runtime_op.h"
+namespace mgb::imperative {
+
+namespace {
+namespace atlas_runtime {
+
+auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
+    auto&& op = static_cast<const AtlasRuntime&>(def);
+    SymbolVarArray symbol_var_inputs(inputs.begin(), inputs.end());
+    OperatorNodeConfig config{op.make_name()};
+    return opr::AtlasRuntimeOpr::make(op.buf.c_str(), op.buf_size,
+                                      symbol_var_inputs, config);
+}
+OP_TRAIT_REG(AtlasRuntime, AtlasRuntime)
+        .apply_on_var_node(apply_on_var_node)
+        .fallback();
+}  // namespace atlas_runtime
+}  // namespace
+
+}  // namespace mgb::imperative
+#endif
diff --git a/imperative/src/impl/ops/cambricon_runtime.cpp b/imperative/src/impl/ops/cambricon_runtime.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..699c384abe0b5612ccfac58adffe39b4398117bc
--- /dev/null
+++ b/imperative/src/impl/ops/cambricon_runtime.cpp
@@ -0,0 +1,37 @@
+/**
+ * \file imperative/src/impl/ops/tensorrt_runtime.cpp
+ * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
+ *
+ * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ */
+#include "../op_trait.h"
+#include "megbrain/imperative/ops/autogen.h"
+
+#if MGB_CAMBRICON
+#include "megbrain/cambricon/cambricon_runtime_opr.h"
+namespace mgb::imperative {
+
+namespace {
+namespace cambricon_runtime {
+
+auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
+    auto&& op = static_cast<const CambriconRuntime&>(def);
+    SymbolVarArray symbol_var_inputs(inputs.begin(), inputs.end());
+    OperatorNodeConfig config{op.make_name()};
+    return opr::CambriconRuntimeOpr::make(op.buf.c_str(), op.buf_size,
+                                          op.symbol, symbol_var_inputs,
+                                          op.tensor_dim_mutable, config);
+}
+OP_TRAIT_REG(CambriconRuntime, CambriconRuntime)
+        .apply_on_var_node(apply_on_var_node)
+        .fallback();
+}  // namespace cambricon_runtime
+}  // namespace
+
+}  // namespace mgb::imperative
+#endif
\ No newline at end of file
diff --git a/src/core/include/megbrain/ir/ops.td b/src/core/include/megbrain/ir/ops.td
index d4c117c7120329236ac759f4fc827d89b65fcf01..f8e15f92527ea248dd33a655b9220eef638c0fd4 100644
--- a/src/core/include/megbrain/ir/ops.td
+++ b/src/core/include/megbrain/ir/ops.td
@@ -266,6 +266,22 @@ def TensorRTRuntime: MgbHashableOp<"TensorRTRuntime"> {
   );
 }
 
+def AtlasRuntime: MgbHashableOp<"AtlasRuntime"> {
+  let extraArguments = (ins
+    MgbStringAttr:$buf,
+    MgbSizeTAddr:$buf_size
+  );
+}
+
+def CambriconRuntime: MgbHashableOp<"CambriconRuntime"> {
+  let extraArguments = (ins
+    MgbStringAttr:$buf,
+    MgbSizeTAddr:$buf_size,
+    MgbStringAttr:$symbol,
+    MgbBoolAttr:$tensor_dim_mutable
+  );
+}
+
 def CvtColor: MgbHashableOp<"CvtColor", [CvtColorParam]>;
 
 #endif // MGB_OPS