init project

43d1ea28 · ckey_Dou · 43d1ea28 · 43d1ea28 · 43d1ea28 · 43d1ea28
1000 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cmake_minimum_required(VERSION 3.14)
+
+if(POLICY CMP0054)
+  cmake_policy(SET CMP0054 NEW)
+endif()
+if(POLICY CMP0072)
+  cmake_policy(SET CMP0072 NEW)
+endif()
+
+project(akg C CXX)
+
+set(AKG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
+
+if(ENABLE_AKG)
+  message("-- Build akg in Mindspore")
+  execute_process(COMMAND bash ${AKG_SOURCE_DIR}/third_party/apply_patches.sh ${CMAKE_CURRENT_BINARY_DIR} "1")
+else()
+  message("-- Build akg alone")
+  include(cmake/utils.cmake)
+  include(cmake/external_libs/isl.cmake)
+  execute_process(COMMAND bash ${AKG_SOURCE_DIR}/third_party/apply_patches.sh ${CMAKE_CURRENT_BINARY_DIR} "akg")
+endif()
+
+set(TVM_DIR "${CMAKE_CURRENT_BINARY_DIR}/ktvm")
+set(ISL_DIR "${CMAKE_CURRENT_BINARY_DIR}/_deps/isl-src")
+
+file(COPY ${AKG_SOURCE_DIR}/python/akg DESTINATION
+    ${CMAKE_CURRENT_BINARY_DIR})
+
+# Utility functions
+include(${TVM_DIR}/cmake/util/Util.cmake)
+
+tvm_option(USE_CCE_RT "Build with cce with runtime support" OFF)
+tvm_option(USE_CCE_RT_SIM "Build cce with simulate runtime support" OFF)
+tvm_option(USE_KC_AIR "Build cce with kc air rpc support" OFF)
+tvm_option(USE_ASAN "Build with AddressSanitizer" OFF)
+tvm_option(
+  USE_DEFAULT_LOG
+  "Use Customize log to eliminate useless log. If you want to enable defalut log, set this option to ON"
+  OFF)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+# include directories
+include_directories(AFTER "${TVM_DIR}/include")
+include_directories(AFTER "${TVM_DIR}")
+include_directories(AFTER "${TVM_DIR}/src")
+include_directories(AFTER "${TVM_DIR}/src/schedule")
+include_directories(AFTER "${AKG_SOURCE_DIR}/src")
+include_directories(AFTER "${AKG_SOURCE_DIR}/src/include")
+
+# cce rt
+include_directories(AFTER "${AKG_SOURCE_DIR}/third_party/fwkacllib/inc")
+include_directories(AFTER "${AKG_SOURCE_DIR}/third_party/fwkacllib/inc/toolchain")
+
+# isl
+include_directories(AFTER "${AKG_SOURCE_DIR}/third_party/isl_wrap/include")
+include_directories(AFTER "${ISL_DIR}/include")
+include_directories(AFTER "${ISL_DIR}")
+
+include_directories(AFTER "${TVM_DIR}/3rdparty/dmlc-core/include")
+include_directories(AFTER "${TVM_DIR}/3rdparty/dlpack/include")
+include_directories(AFTER "${TVM_DIR}/3rdparty/compiler-rt")
+include_directories(AFTER "${TVM_DIR}/3rdparty/rang/include")
+include_directories(AFTER "${TVM_DIR}/3rdparty/picojson")
+
+add_subdirectory(${AKG_SOURCE_DIR}/third_party/isl_wrap isl_fixed)
+
+# initial variables
+set(TVM_LINKER_LIBS isl_fixed)
+# lib contain dlopen and dlclose
+set(TVM_RUNTIME_LINKER_LIBS ${CMAKE_DL_LIBS})
+
+add_definitions(-DPICOJSON_USE_INT64=1)
+add_definitions(-DDMLC_LOG_CUSTOMIZE=1)
+if(USE_AKG_LOG)
+  add_definitions(-DUSE_AKG_LOG=1)
+endif()
+
+include(cmake/RT.cmake)
+
+# Generic compilation options
+include(CheckCXXCompilerFlag)
+check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11)
+if(NOT SUPPORT_CXX11)
+  message(
+    FATAL_ERROR "-- please choose a compiler which support C++ 11 standard")
+endif()
+
+check_cxx_compiler_flag("-march=native" NATIVE_BUILD)
+if(NATIVE_BUILD)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
+endif()
+
+set(CMAKE_SKIP_RPATH TRUE)
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pipe -Wall -fPIC -fstack-protector-all")
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wl,-z,relro,-z,now,-z,noexecstack")
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pipe -Wall -fPIC -fstack-protector-all")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-z,relro,-z,now,-z,noexecstack")
+
+if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
+  message("-- Build in Debug mode")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O0 -g -rdynamic")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -rdynamic")
+else()
+  message("-- Build in Release mode")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2 -D_FORTIFY_SOURCE=2 -Werror")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -D_FORTIFY_SOURCE=2 -Werror")
+endif()
+if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION
+                                           VERSION_GREATER 7.0)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -faligned-new")
+endif()
+if(USE_ASAN)
+  set(CMAKE_C_FLAGS
+      "-fsanitize=address -fno-omit-frame-pointer  ${CMAKE_C_FLAGS}")
+  set(CMAKE_CXX_FLAGS
+      "-fsanitize=address -fno-omit-frame-pointer  ${CMAKE_CXX_FLAGS}")
+endif()
+
+# add source group
+file(GLOB_RECURSE GROUP_SOURCE "${TVM_DIR}/src/*.cc" "src/*.cc")
+file(GLOB_RECURSE GROUP_INCLUDE "${TVM_DIR}/src/*.h"
+     "${TVM_DIR}/include/*.h" "src/*.h" "include/*.h")
+assign_source_group("Source" ${GROUP_SOURCE})
+assign_source_group("Include" ${GROUP_INCLUDE})
+
+# Source file lists
+file(
+  GLOB
+  COMPILER_SRCS
+  ${TVM_DIR}/3rdparty/dmlc-core/src/io/filesys.cc
+  ${TVM_DIR}/3rdparty/dmlc-core/src/io/indexed_recordio_split.cc
+  ${TVM_DIR}/3rdparty/dmlc-core/src/io/input_split_base.cc
+  ${TVM_DIR}/3rdparty/dmlc-core/src/io/line_split.cc
+  ${TVM_DIR}/3rdparty/dmlc-core/src/io/recordio_split.cc
+  ${TVM_DIR}/3rdparty/dmlc-core/src/io/local_filesys.cc
+  ${TVM_DIR}/3rdparty/dmlc-core/src/io.cc
+  ${TVM_DIR}/src/api/*.cc
+  ${TVM_DIR}/src/arithmetic/*.cc
+  ${TVM_DIR}/src/codegen/*.cc
+  ${TVM_DIR}/src/lang/*.cc
+  ${TVM_DIR}/src/pass/*.cc
+  ${TVM_DIR}/src/op/*.cc
+  ${TVM_DIR}/src/node/*.cc
+  ${TVM_DIR}/src/schedule/*.cc
+  ${TVM_DIR}/src/runtime/*.cc
+  ${TVM_DIR}/src/runtime/cce/*.cc
+  ${TVM_DIR}/src/runtime/vm/*.cc
+  ${TVM_DIR}/src/runtime/vm/profiler/*.cc
+  ${TVM_DIR}/src/codegen/stackvm/*.cc
+  ${AKG_SOURCE_DIR}/src/poly/*.cc
+  ${AKG_SOURCE_DIR}/src/api/*.cc
+  ${AKG_SOURCE_DIR}/src/pass/*.cc
+  ${AKG_SOURCE_DIR}/src/rpc/*.cc
+  ${AKG_SOURCE_DIR}/src/schedule/*.cc
+  ${AKG_SOURCE_DIR}/src/emit_insn/*.cc
+  ${AKG_SOURCE_DIR}/src/contrib/cce_parm/*.cc
+  ${AKG_SOURCE_DIR}/src/contrib/parser/*.cc
+  ${AKG_SOURCE_DIR}/src/runtime/stub/*.cc
+  ${AKG_SOURCE_DIR}/src/codegen/*.cc
+  ${AKG_SOURCE_DIR}/src/composite/*.cc
+  ${AKG_SOURCE_DIR}/src/op/*.cc
+  ${AKG_SOURCE_DIR}/src/common/*.cc)
+
+file(GLOB DATATYPE_SRCS ${TVM_DIR}/src/codegen/datatype/*.cc)
+list(APPEND COMPILER_SRCS ${DATATYPE_SRCS})
+
+file(GLOB TOPI_SRCS ${TVM_DIR}/topi/src/*.cc)
+
+file(
+  GLOB
+  RUNTIME_SRCS
+  ${TVM_DIR}/src/runtime/*.cc
+  ${TVM_DIR}/src/runtime/cce/*.cc
+  ${TVM_DIR}/src/runtime/vm/*.cc
+  ${TVM_DIR}/src/runtime/stackvm/*.cc)
+
+file(GLOB RUNTIME_STUB_SRC ${AKG_SOURCE_DIR}/src/runtime/stub/*.cc)
+
+if(USE_CCE_RT
+   OR USE_CCE_RT_SIM
+   OR USE_KC_AIR)
+  list(REMOVE_ITEM COMPILER_SRCS ${RUNTIME_STUB_SRC})
+endif()
+
+if(USE_RPC)
+  message(STATUS "Build with RPC support...")
+  file(GLOB RUNTIME_RPC_SRCS ${TVM_DIR}/src/runtime/rpc/*.cc)
+  list(APPEND RUNTIME_SRCS ${RUNTIME_RPC_SRCS})
+endif(USE_RPC)
+
+add_library(akg SHARED ${COMPILER_SRCS} ${RUNTIME_SRCS} ${TOPI_SRCS} ${RUNTIME_SRCS})
+
+add_dependencies(akg isl_fixed)
+
+target_link_libraries(akg ${TVM_LINKER_LIBS} ${TVM_RUNTIME_LINKER_LIBS})
+
+# Related headers
+target_include_directories(akg PRIVATE "${TVM_DIR}/topi/include")
+
+# Installation rules
+install(TARGETS akg DESTINATION lib${LIB_SUFFIX})
+
+
+if(ENABLE_AKG)
+  install(
+	  DIRECTORY
+	       ${TVM_DIR}/python/tvm
+	       ${TVM_DIR}/topi/python/topi
+	  DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/akg)
+  #else()
+  #file(MAKE_DIRECTORY "${AKG_SOURCE_DIR}/output")
+  #file(COPY ${CMAKE_CURRENT_BINARY_DIR}/libakg.so DESTINATION "${AKG_SOURCE_DIR}/output")
+endif()
+
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
+# akg contributing guidelines
+
+<!-- TOC -->
+
+- [akg contributing guidelines](#akg-contributing-guidelines)
+    - [Contributor License Agreement](#contributor-license-agreement)
+    - [Getting Started](#getting-started)
+    - [Contribution workflow](#contribution-workflow)
+        - [Code style](#code-style)
+        - [Fork-Pull development model](#fork-pull-development-model)
+        - [Report issues](#report-issues)
+        - [Propose PRs](#propose-prs)
+
+<!-- /TOC -->
+
+## Contributor License Agreement
+
+It's required to sign CLA before your first code submission to MindSpore community.
+
+For individual contributor, please refer to [ICLA online document](https://www.mindspore.cn/icla) for the detailed information.
+
+## Getting Started
+
+- Fork the repository [Gitee](https://gitee.com/mindspore/akg).
+- Read the [README.md](README.md) for project information and build instructions.
+
+## Contribution Workflow
+
+### Code style
+
+Please follow this style to make akg easy to review, maintain and develop.
+
+* Coding guidelines
+
+    The *Python* coding style suggested by [Python PEP 8 Coding Style](https://pep8.org/) and *C++* coding style suggested by [Google C++ Coding Guidelines](http://google.github.io/styleguide/cppguide.html) are used in MindSpore community.
+
+* Unittest guidelines
+
+    The *Python* unittest style suggested by [pytest](http://www.pytest.org/en/latest/) and *C++* unittest style suggested by [Googletest Primer](https://github.com/google/googletest/blob/master/googletest/docs/primer.md) are used in MindSpore community.
+
+### Fork-Pull development model
+
+* Fork akg repository
+
+    Before submitting code to akg project, please make sure that this project have been forked to your own repository. It means that there will be parallel development between akg repository and your own repository, so be careful to avoid the inconsistency between them.
+
+* Clone the remote repository
+
+    If you want to download the code to the local machine, `git` is the best way:
+    ```shell
+    # For Gitee
+    git clone https://gitee.com/{insert_your_forked_repo}/akg.git
+    git remote add upstream https://gitee.com/mindspore/akg.git
+    ```
+
+* Develop code locally
+
+    To avoid inconsistency between multiple branches, checking out to a new branch is `SUGGESTED`:
+    ```shell
+    git checkout -b {new_branch_name} origin/master
+    ```
+
+    Then you can change the code arbitrarily.
+
+* Push the code to the remote repository
+
+    After updating the code, you should push the update in the formal way:
+    ```shell
+    git add .
+    git status # Check the update status
+    git commit -m "Your commit title"
+    git commit -s --amend #Add the concrete description of your commit
+    git push origin {new_branch_name}
+    ```
+
+* Pull a request to MindSpore repository
+
+    In the last step, your need to pull a compare request between your new branch and MindSpore `master` branch. After finishing the pull request, the Jenkins CI will be automatically set up for building test.
+
+### Report issues
+
+A great way to contribute to the project is to send a detailed report when you encounter an issue. We always appreciate a well-written, thorough bug report, and will thank you for it!
+
+When reporting issues, refer to this format:
+
+- What version of env (mindspore, os, python etc) are you using?
+- Is this a BUG REPORT or FEATURE REQUEST?
+- What happened?
+- What you expected to happen?
+- How to reproduce it?(as minimally and precisely as possible)
+- Special notes for your reviewers?
+
+**Issues advisory:**
+
+- **If you find an unclosed issue, which is exactly what you are going to solve,** please put some comments on that issue to tell others you would be in charge of it.
+- **If an issue is opened for a while,** it's recommended for contributors to precheck before working on solving that issue.
+- **If you resolve an issue which is reported by yourself,** it's also required to let others know before closing that issue.
+
+### Propose PRs
+
+* Raise your idea as an *issue* on [Gitee](https://gitee.com/mindspore/akg/issues)
+* If it is a new feature that needs lots of design details, a design proposal should also be submitted.
+* After reaching consensus in the issue discussions and design proposal reviews, complete the development on the forked repo and submit a PR.
+* None of PRs is not permitted until it receives **2+ LGTM** from approvers. Please NOTICE that approver is NOT allowed to add *LGTM* on his own PR.
+* After PR is sufficiently discussed, it will get merged, abandoned or rejected depending on the outcome of the discussion.
+
+**PRs advisory:**
+
+- Any irrelevant changes should be avoided.
+- Make sure your commit history being ordered.
+- Always keep your branch up with the master branch.
+- For bug-fix PRs, make sure all related issues being linked.
--- a/LICENSE
+++ b/LICENSE
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/NOTICE
+++ b/NOTICE
+MindSpore AKG
+Copyright 2019-2020 Huawei Technologies Co., Ltd
--- a/README.md
+++ b/README.md
+- [What Is AKG?](#what-is-akg)
+- [Hardware Backends Support](#hardware-backends-support)
+- [Build](#build)
+    - [Build With MindSpore](#build-with-mindspore)
+    - [Build Standalone](#build-standalone)
+- [Run](#run)
+- [Contributing](#contributing)
+- [Release Notes](#release-notes)
+- [License](#license)
+
+## What Is AKG
+AKG(Auto Kernel Generator) is an optimizer for operators in Deep Learning Networks. It provides the ability to automatically fuse ops with specific patterns. AKG works with MindSpore-GraphKernel to improve the performance of networks running on different hardware backends.
+
+AKG composes with four basic optimization module, normalization, auto schedule, instruction emit and backend optimization.
+- **normalization.** The mainly optimization of normalization includes three address transform, common subexpression elimination, copy propagation and so on.
+- **auto schedule.** The auto schedule module mainly have vectorization, loop tiling, mem promotion and loop distribution.
+- **instruction emit.** The instruction emitting module has the optimization about loop normalization, auto pragma and emit instruction.
+- **backend optimization.** The backend optimization module consists of double buffer optimization, storage rewrite optimization and inject sync optimization.
+
+  <img src="docs/akg-design.png" style="zoom:80%" div align=center/>
+
+## Hardware Backends Support
+At present, `Ascend910` is supported only. More Backends are on the list.
+
+## Build
+
+### Build With MindSpore
+See [MindSpore README.md](https://gitee.com/mindspore/mindspore/blob/master/README.md) for details.
+
+### Build Standalone
+We suggest you build and run akg together with MindSpore. And we also provide a way to run case in standalone mode for convenience sake.
+Refer to [MindSpore Installation](https://www.mindspore.cn/install/en) for more information about compilation dependencies.
+  ```
+  bash build.sh
+  ```
+## Run Standalone
+1. Set Environment
+  ```
+  cd tests
+  source ./test_env.sh amd64
+  export RUNTIME_MODE='air_cloud'
+  export PATH=${PATH}:${YOUR_CCEC_COMPILER_PATH}
+  ```
+
+2. Run test
+  ```
+  cd tests/operators/vector
+  pytest -s test_abs_001.py -m "level0" # run level0 testcases
+  ```
+
+## Contributing
+
+Welcome contributions. See our [Contributor Wiki](CONTRIBUTING.md) for
+more details.
+
+## Release Notes
+
+The release notes, see our [RELEASE](RELEASE.md).
+
+## License
+
+[Apache License 2.0](LICENSE)
--- a/RELEASE.md
+++ b/RELEASE.md
--- a/Third_Party_Open_Source_Software_Notice
+++ b/Third_Party_Open_Source_Software_Notice
--- a/build.sh
+++ b/build.sh
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+export AKG_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/" && pwd )"
+BUILD_DIR="${AKG_DIR}/build"
+if [ -d "$BUILD_DIR" ]; then
+    rm -rf $BUILD_DIR
+fi
+echo "mkdir $BUILD_DIR"
+mkdir -p $BUILD_DIR
+
+cd $BUILD_DIR
+
+cmake .. -DUSE_CCE_RT=1
+
+if [ $? -ne 0 ]
+then
+    echo "[ERROR] CMake failed!!!"
+    exit 1
+fi
+
+make -j16
+
+if [ $? -ne 0 ]
+then
+    echo "[ERROR] make failed!!!"
+    exit 1
+fi
+
+cd -
--- a/cmake/RT.cmake
+++ b/cmake/RT.cmake
+include(CheckSymbolExists)
+check_symbol_exists(__aarch64__ "" __CHECK_AARCH64)
+check_symbol_exists(__x86_64__ "" __CHECK_X86_64)
+set(AUTODIFF_SITE "http://autodiff.huawei.com:8080")
+if(USE_CCE_RT)
+  message("-- Build with cce runtime")
+  
+  if(NOT __CHECK_AARCH64 AND NOT __CHECK_X86_64)
+    message(FATAL_ERROR "runtime only support aarch64 and x86_64")
+  endif()
+
+  add_definitions(-DUSE_CCE_RT=1)
+  set(TVM_RUNTIME_LINKER_LIBS libruntime.so)
+  link_directories(/usr/local/Ascend/fwkacllib/lib64)
+
+elseif(USE_CCE_RT_SIM)
+  message("-- Build with cce runtime(camodel), Only Support AMD64")
+  
+  if(NOT __CHECK_X86_64)
+    message(FATAL_ERROR "camodel only support x86_64")
+  endif()
+  
+  add_definitions(-DUSE_CCE_RT_SIM=1)
+  set(TVM_RUNTIME_LINKER_LIBS libcamodel.so libslog.so libc_sec.so
+                              libruntime_camodel.so libtsch_camodel.so)
+  
+  foreach(LIB IN LISTS TVM_RUNTIME_LINKER_LIBS)
+    file(DOWNLOAD ${AUTODIFF_SITE}/x86_64/camodel/${LIB}
+        ${CMAKE_CURRENT_BINARY_DIR}/x86_64/camodel/${LIB})
+  endforeach()
+
+   link_directories(
+       "${CMAKE_CURRENT_BINARY_DIR}/x86_64/camodel")
+
+elseif(USE_KC_AIR)
+  message("-- Build with kc air")
+  if(NOT __CHECK_AARCH64 AND NOT __CHECK_X86_64)
+    message(FATAL_ERROR "-- now kc air only support amd64 and x86_64")
+  endif()
+  if(__CHECK_AARCH64)
+      file(DOWNLOAD ${AUTODIFF_SITE}/aarch64/kc_air/libkc_air.so
+          ${CMAKE_CURRENT_BINARY_DIR}/aarch64/libkc_air.so)
+      link_directories("${CMAKE_CURRENT_BINARY_DIR}/aarch64")
+  endif()
+  if(__CHECK_X86_64)
+      file(DOWNLOAD ${AUTODIFF_SITE}/x86_64/kc_air/libkc_air.so
+          ${CMAKE_CURRENT_BINARY_DIR}/x86_64/libkc_air.so)
+      link_directories("${CMAKE_CURRENT_BINARY_DIR}/x86_64")
+  endif()
+  add_definitions(-DUSE_CCE_RT=1)
+  add_definitions(-DUSE_KC_AIR=1)
+  set(TVM_RUNTIME_LINKER_LIBS kc_air)
+else()
+  message("-- Build without runtime support")
+  add_definitions(-DUSE_CCE_RT_STUB=1)
+  add_definitions(-DUSE_CCE_RT=1)
+endif()
--- a/cmake/external_libs/isl.cmake
+++ b/cmake/external_libs/isl.cmake
+set(isl_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2")
+set(isl_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
+mindspore_add_pkg(isl
+        VER 0.22
+        URL http://isl.gforge.inria.fr/isl-0.22.tar.gz
+        MD5 671d0a5e10467a5c6db0893255278845
+        PATCHES ${CMAKE_SOURCE_DIR}/third_party/patch/isl/isl.patch)
--- a/cmake/utils.cmake
+++ b/cmake/utils.cmake
+include(FetchContent)
+set(FETCHCONTENT_QUIET OFF)
+
+if (CMAKE_SYSTEM_NAME MATCHES "Windows" AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL 3.17.0)
+    set(CMAKE_FIND_LIBRARY_SUFFIXES .dll ${CMAKE_FIND_LIBRARY_SUFFIXES})
+endif ()
+
+function(mindspore_add_submodule_obj des_submodule_objs sub_dir submodule_name_obj)
+
+    add_subdirectory(${sub_dir})
+
+    if(NOT TARGET ${submodule_name_obj})
+        message(FATAL_ERROR "Can not find submodule '${submodule_name_obj}'. in ${CMAKE_CURRENT_LIST_FILE}")
+    endif()
+    if("$<TARGET_OBJECTS:${submodule_name_obj}>" IN_LIST ${des_submodule_objs})
+        message(FATAL_ERROR "submodule '${submodule_name_obj}' added more than once. in ${CMAKE_CURRENT_LIST_FILE}")
+    endif()
+
+    set(${des_submodule_objs} ${${des_submodule_objs}} $<TARGET_OBJECTS:${submodule_name_obj}> PARENT_SCOPE)
+
+endfunction()
+
+if (DEFINED ENV{MSLIBS_CACHE_PATH})
+    set(_MS_LIB_CACHE  $ENV{MSLIBS_CACHE_PATH})
+else()
+    set(_MS_LIB_CACHE ${CMAKE_BINARY_DIR}/.mslib)
+endif ()
+message("MS LIBS CACHE PATH:  ${_MS_LIB_CACHE}")
+
+if (NOT EXISTS ${_MS_LIB_CACHE})
+    file(MAKE_DIRECTORY ${_MS_LIB_CACHE})
+endif ()
+
+if (DEFINED ENV{MSLIBS_SERVER})
+    set(LOCAL_LIBS_SERVER  $ENV{MSLIBS_SERVER})
+    message("LOCAL_LIBS_SERVER:  ${LOCAL_LIBS_SERVER}")
+endif ()
+
+include(ProcessorCount)
+ProcessorCount(N)
+if (JOBS)
+    set(THNUM ${JOBS})
+else()
+    set(JOBS 8)
+    if (${JOBS} GREATER ${N})
+        set(THNUM ${N})
+    else()
+        set(THNUM ${JOBS})
+    endif()
+endif ()
+message("set make thread num: ${THNUM}")
+
+if(LOCAL_LIBS_SERVER)
+    if (NOT ENV{no_proxy})
+        set(ENV{no_proxy} "${LOCAL_LIBS_SERVER}")
+    else()
+        string(FIND $ENV{no_proxy} ${LOCAL_LIBS_SERVER} IP_POS)
+        if (${IP_POS} EQUAL -1)
+            set(ENV{no_proxy} "$ENV{no_proxy},${LOCAL_LIBS_SERVER}")
+        endif ()
+    endif ()
+endif()
+
+function(__download_pkg pkg_name pkg_url pkg_md5)
+
+    if(LOCAL_LIBS_SERVER)
+        get_filename_component(_URL_FILE_NAME ${pkg_url} NAME)
+        set(pkg_url "http://${LOCAL_LIBS_SERVER}:8081/libs/${pkg_name}/${_URL_FILE_NAME}" ${pkg_url})
+    endif()
+
+    FetchContent_Declare(
+            ${pkg_name}
+            URL      ${pkg_url}
+            URL_HASH MD5=${pkg_md5}
+    )
+    FetchContent_GetProperties(${pkg_name})
+    message("download: ${${pkg_name}_SOURCE_DIR} , ${pkg_name} , ${pkg_url}")
+    if(NOT ${pkg_name}_POPULATED)
+        FetchContent_Populate(${pkg_name})
+        set(${pkg_name}_SOURCE_DIR ${${pkg_name}_SOURCE_DIR} PARENT_SCOPE)
+    endif()
+
+endfunction()
+
+function(__download_pkg_with_git pkg_name pkg_url pkg_git_commit pkg_md5)
+
+    if(LOCAL_LIBS_SERVER)
+        set(pkg_url "http://${LOCAL_LIBS_SERVER}:8081/libs/${pkg_name}/${pkg_git_commit}")
+        FetchContent_Declare(
+                ${pkg_name}
+                URL      ${pkg_url}
+                URL_HASH MD5=${pkg_md5}
+    )
+    else()
+	FetchContent_Declare(
+            ${pkg_name}
+	    GIT_REPOSITORY      ${pkg_url}
+	    GIT_TAG             ${pkg_git_commit})
+    endif()
+    FetchContent_GetProperties(${pkg_name})
+    message("download: ${${pkg_name}_SOURCE_DIR} , ${pkg_name} , ${pkg_url}")
+    if(NOT ${pkg_name}_POPULATED)
+        FetchContent_Populate(${pkg_name})
+        set(${pkg_name}_SOURCE_DIR ${${pkg_name}_SOURCE_DIR} PARENT_SCOPE)
+    endif()
+
+endfunction()
+
+
+function(__find_pkg_then_add_target pkg_name pkg_exe lib_path)
+
+    unset(${pkg_name}_LIBS)
+
+    message("_FIND:${${pkg_name}_BASE_DIR}")
+
+    if(pkg_exe)
+        find_program(${pkg_exe}_EXE ${pkg_exe} PATHS ${${pkg_name}_BASE_DIR}/bin NO_DEFAULT_PATH)
+        if(NOT ${pkg_exe}_EXE)
+            return()
+        endif()
+        add_executable(${pkg_name}::${pkg_exe} IMPORTED GLOBAL)
+        set_target_properties(${pkg_name}::${pkg_exe} PROPERTIES
+                IMPORTED_LOCATION ${${pkg_exe}_EXE}
+                )
+        message("found ${${pkg_exe}_EXE}")
+    endif()
+
+    foreach(_LIB_NAME ${ARGN})
+        set(_LIB_SEARCH_NAME ${_LIB_NAME})
+        set(_LIB_TYPE SHARED)
+        if (${pkg_name}_USE_STATIC_LIBS)
+            set(_LIB_SEARCH_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+            set(_LIB_TYPE STATIC)
+        endif ()
+        set(${_LIB_NAME}_LIB ${_LIB_NAME}_LIB-NOTFOUND)
+        find_library(${_LIB_NAME}_LIB ${_LIB_SEARCH_NAME} PATHS ${${pkg_name}_BASE_DIR}/${lib_path} NO_DEFAULT_PATH)
+
+        if(NOT ${_LIB_NAME}_LIB)
+            return()
+        endif()
+
+        add_library(${pkg_name}::${_LIB_NAME} ${_LIB_TYPE} IMPORTED GLOBAL)
+        if (WIN32 AND ${_LIB_TYPE} STREQUAL "SHARED")
+            set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES IMPORTED_IMPLIB_RELEASE ${${_LIB_NAME}_LIB})
+        else()
+            set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES IMPORTED_LOCATION ${${_LIB_NAME}_LIB})
+        endif()
+
+        if (EXISTS ${${pkg_name}_BASE_DIR}/include)
+            set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES 
+                INTERFACE_INCLUDE_DIRECTORIES "${${pkg_name}_BASE_DIR}/include")
+        endif ()
+
+        list(APPEND ${pkg_name}_LIBS ${pkg_name}::${_LIB_NAME})
+        message("found ${${_LIB_NAME}_LIB}")
+        STRING( REGEX REPLACE "(.+)/(.+)" "\\1" LIBPATH ${${_LIB_NAME}_LIB})
+        set(${pkg_name}_LIBPATH ${LIBPATH} CACHE STRING INTERNAL)
+    endforeach(_LIB_NAME)
+
+    set(${pkg_name}_LIBS ${${pkg_name}_LIBS} PARENT_SCOPE)
+endfunction()
+
+function(__exec_cmd)
+    set(options )
+    set(oneValueArgs WORKING_DIRECTORY)
+    set(multiValueArgs COMMAND)
+
+    cmake_parse_arguments(EXEC "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )
+
+    execute_process(COMMAND ${EXEC_COMMAND}
+            WORKING_DIRECTORY ${EXEC_WORKING_DIRECTORY}
+            RESULT_VARIABLE RESULT)
+    if(NOT RESULT EQUAL "0")
+        message(FATAL_ERROR "error! when ${EXEC_COMMAND} in ${EXEC_WORKING_DIRECTORY}")
+    endif()
+endfunction()
+
+function(__check_patches pkg_patches)
+    # check patches
+    if (PKG_PATCHES)
+        file(TOUCH ${_MS_LIB_CACHE}/${pkg_name}_patch.md5)
+        file(READ ${_MS_LIB_CACHE}/${pkg_name}_patch.md5 ${pkg_name}_PATCHES_MD5)
+
+        message("patches md5:${${pkg_name}_PATCHES_MD5}")
+
+        set(${pkg_name}_PATCHES_NEW_MD5 )
+        foreach(_PATCH ${PKG_PATCHES})
+            file(MD5 ${_PATCH} _PF_MD5)
+            set(${pkg_name}_PATCHES_NEW_MD5 "${${pkg_name}_PATCHES_NEW_MD5},${_PF_MD5}")
+        endforeach(_PATCH)
+
+        if (NOT ${pkg_name}_PATCHES_MD5 STREQUAL ${pkg_name}_PATCHES_NEW_MD5)
+            set(${pkg_name}_PATCHES ${PKG_PATCHES})
+            file(REMOVE_RECURSE "${_MS_LIB_CACHE}/${pkg_name}-subbuild")
+            file(WRITE ${_MS_LIB_CACHE}/${pkg_name}_patch.md5 ${${pkg_name}_PATCHES_NEW_MD5})
+            message("patches changed : ${${pkg_name}_PATCHES_NEW_MD5}")
+        endif ()
+    endif ()
+endfunction()
+
+set(MS_FIND_NO_DEFAULT_PATH NO_CMAKE_PATH NO_CMAKE_ENVIRONMENT_PATH NO_SYSTEM_ENVIRONMENT_PATH
+                            NO_CMAKE_BUILDS_PATH NO_CMAKE_PACKAGE_REGISTRY NO_CMAKE_SYSTEM_PATH
+                            NO_CMAKE_SYSTEM_PACKAGE_REGISTRY)
+function(mindspore_add_pkg pkg_name )
+
+    set(options )
+    set(oneValueArgs URL MD5 GIT_REPOSITORY GIT_TAG VER EXE DIR HEAD_ONLY CMAKE_PATH RELEASE LIB_PATH CUSTOM_CMAKE)
+    set(multiValueArgs CMAKE_OPTION LIBS PRE_CONFIGURE_COMMAND CONFIGURE_COMMAND BUILD_OPTION INSTALL_INCS INSTALL_LIBS PATCHES SUBMODULES SOURCEMODULES)
+    cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )
+
+    if (NOT PKG_LIB_PATH)
+        set(PKG_LIB_PATH lib)
+    endif ()
+
+    if(NOT PKG_EXE)
+        set(PKG_EXE 0)
+    endif()
+
+    set(__FIND_PKG_NAME ${pkg_name})
+    string(TOLOWER ${pkg_name} pkg_name)
+    message("pkg name:${__FIND_PKG_NAME},${pkg_name}")
+
+    set(${pkg_name}_PATCHES_HASH )
+    foreach(_PATCH ${PKG_PATCHES})
+        file(MD5 ${_PATCH} _PF_MD5)
+        set(${pkg_name}_PATCHES_HASH "${${pkg_name}_PATCHES_HASH},${_PF_MD5}")
+    endforeach(_PATCH)
+
+    # check options
+    set(${pkg_name}_CONFIG_TXT
+            "${CMAKE_CXX_COMPILER_VERSION}-${CMAKE_C_COMPILER_VERSION}
+            ${ARGN} - ${${pkg_name}_USE_STATIC_LIBS}- ${${pkg_name}_PATCHES_HASH}
+            ${${pkg_name}_CXXFLAGS}--${${pkg_name}_CFLAGS}--${${pkg_name}_LDFLAGS}")
+    string(REPLACE ";" "-" ${pkg_name}_CONFIG_TXT ${${pkg_name}_CONFIG_TXT})
+    string(MD5 ${pkg_name}_CONFIG_HASH ${${pkg_name}_CONFIG_TXT})
+
+    message("${pkg_name} config hash: ${${pkg_name}_CONFIG_HASH}")
+
+    set(${pkg_name}_BASE_DIR ${_MS_LIB_CACHE}/${pkg_name}_${${pkg_name}_CONFIG_HASH})
+    set(${pkg_name}_DIRPATH ${${pkg_name}_BASE_DIR} CACHE STRING INTERNAL)
+
+    if(EXISTS ${${pkg_name}_BASE_DIR}/options.txt AND PKG_HEAD_ONLY)
+        set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/${PKG_HEAD_ONLY} PARENT_SCOPE)
+        add_library(${pkg_name} INTERFACE)
+        target_include_directories(${pkg_name} INTERFACE ${${pkg_name}_INC})
+        if (${PKG_RELEASE})
+            __find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIB_PATH} ${PKG_LIBS})
+        endif ()
+        return()
+    endif ()
+
+    set(${__FIND_PKG_NAME}_ROOT ${${pkg_name}_BASE_DIR})
+    set(${__FIND_PKG_NAME}_ROOT ${${pkg_name}_BASE_DIR} PARENT_SCOPE)
+
+    if (PKG_LIBS)
+        __find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIB_PATH} ${PKG_LIBS})
+        if(${pkg_name}_LIBS)
+            set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE)
+            message("Found libs: ${${pkg_name}_LIBS}")
+            return()
+        endif()
+    elseif(NOT PKG_HEAD_ONLY)
+        find_package(${__FIND_PKG_NAME} ${PKG_VER} ${MS_FIND_NO_DEFAULT_PATH})
+        if (${__FIND_PKG_NAME}_FOUND)
+            set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE)
+            message("Found pkg: ${__FIND_PKG_NAME}")
+            return()
+        endif ()
+    endif ()
+    if (NOT PKG_DIR)
+        if (PKG_GIT_REPOSITORY)
+            __download_pkg_with_git(${pkg_name} ${PKG_GIT_REPOSITORY} ${PKG_GIT_TAG} ${PKG_MD5})
+        else()
+            message("--__download_pkg")
+	    __download_pkg(${pkg_name} ${PKG_URL} ${PKG_MD5})
+        endif()
+        foreach(_SUBMODULE_FILE ${PKG_SUBMODULES})
+            STRING( REGEX REPLACE "(.+)_(.+)" "\\1" _SUBMODEPATH ${_SUBMODULE_FILE})
+            STRING( REGEX REPLACE "(.+)/(.+)" "\\2" _SUBMODENAME ${_SUBMODEPATH})
+            file(GLOB ${pkg_name}_INSTALL_SUBMODULE ${_SUBMODULE_FILE}/*)
+            file(COPY ${${pkg_name}_INSTALL_SUBMODULE} DESTINATION ${${pkg_name}_SOURCE_DIR}/3rdparty/${_SUBMODENAME})
+        endforeach (_SUBMODULE_FILE)
+    else()
+        set(${pkg_name}_SOURCE_DIR ${PKG_DIR})
+    endif ()
+    file(WRITE ${${pkg_name}_BASE_DIR}/options.txt ${${pkg_name}_CONFIG_TXT})
+    message("${pkg_name}_SOURCE_DIR : ${${pkg_name}_SOURCE_DIR}")
+
+    foreach(_PATCH_FILE ${PKG_PATCHES})
+        get_filename_component(_PATCH_FILE_NAME ${_PATCH_FILE} NAME)
+        set(_LF_PATCH_FILE ${CMAKE_BINARY_DIR}/_ms_patch/${_PATCH_FILE_NAME})
+        configure_file(${_PATCH_FILE} ${_LF_PATCH_FILE} NEWLINE_STYLE LF)
+
+        message("patching ${${pkg_name}_SOURCE_DIR} -p1 < ${_LF_PATCH_FILE}")
+        execute_process(COMMAND patch -p1 INPUT_FILE ${_LF_PATCH_FILE}
+                WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}
+                RESULT_VARIABLE Result)
+        if(NOT Result EQUAL "0")
+            message(FATAL_ERROR "Failed patch: ${_LF_PATCH_FILE}")
+        endif()
+    endforeach(_PATCH_FILE)
+ 
+    file(LOCK ${${pkg_name}_BASE_DIR} DIRECTORY GUARD FUNCTION RESULT_VARIABLE ${pkg_name}_LOCK_RET TIMEOUT 600)
+    if(NOT ${pkg_name}_LOCK_RET EQUAL "0")
+        message(FATAL_ERROR "error! when try lock ${${pkg_name}_BASE_DIR} : ${${pkg_name}_LOCK_RET}")
+    endif()
+
+    if (PKG_CUSTOM_CMAKE)
+        file(GLOB ${pkg_name}_cmake ${PKG_CUSTOM_CMAKE}/CMakeLists.txt)
+        file(COPY ${${pkg_name}_cmake} DESTINATION ${${pkg_name}_SOURCE_DIR})
+    endif ()
+
+    if (PKG_LIBS)
+        __find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIB_PATH} ${PKG_LIBS})
+        set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE)
+        if(NOT ${pkg_name}_LIBS)
+            message(FATAL_ERROR "Can not find pkg: ${pkg_name}")
+        endif()
+    else()
+        find_package(${__FIND_PKG_NAME} ${PKG_VER} QUIET ${MS_FIND_NO_DEFAULT_PATH})
+        if (${__FIND_PKG_NAME}_FOUND)
+            set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE)
+            message("Found pkg: ${${__FIND_PKG_NAME}_LIBRARIES}")
+            return()
+        endif ()
+    endif ()
+endfunction()
--- a/docs/akg-design.png
+++ b/docs/akg-design.png
--- a/python/akg/__init__.py
+++ b/python/akg/__init__.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Additional IR Pass for CCE
+"""
+from __future__ import absolute_import as _abs
+import sys
+import os
+import logging
+
+def AKGAddPath():
+    """akg add path."""
+    pwd = os.path.dirname(os.path.realpath(__file__))
+    tvm_path = os.path.realpath(pwd)
+    if tvm_path not in sys.path:
+        sys.path.insert(0, tvm_path)
+    else:
+        sys.path.remove(tvm_path)
+        sys.path.insert(0, tvm_path)
+
+
+class AKGMetaPathFinder:
+    """class AKGMetaPath finder."""
+
+    def find_module(self, fullname, path=None):
+        """method akg find module."""
+        if fullname.startswith("akg.tvm"):
+            rname = fullname[4:]
+            return AKGMetaPathLoader(rname)
+        if fullname.startswith("akg.topi"):
+            rname = fullname[4:]
+            return AKGMetaPathLoader(rname)
+        if fullname == "akg.topi.cce.cce_extended_op_build":
+            logging.warning("akg error: 'akg.topi.cce.cce_extended_op_build' has been deprecated, please using "
+                            "'akg.topi.cce.te_op_build' instead ")
+        return None
+
+
+class AKGMetaPathLoader:
+    """class AKGMetaPathLoader loader."""
+    def __init__(self, rname):
+        self.__rname = rname
+
+    def load_module(self, fullname):
+        if self.__rname in sys.modules:
+            sys.modules.pop(self.__rname)
+        AKGAddPath()
+        __import__(self.__rname, globals(), locals())
+        self.__target_module = sys.modules[self.__rname]
+        sys.modules[fullname] = self.__target_module
+        return self.__target_module
+
+
+sys.meta_path.insert(0, AKGMetaPathFinder())
+
+from . import autodiff
+from .build_module import build, build_to_func, lower, build_config
+from .autodiff import differentiate
+from .autodiff import get_variables
+from .autodiff import register_variables
+from .lang.cce.te_compute.common import fargmax, fargmin, mad
+from . import lang
+
+__all__ = ["differentiate"]
--- a/python/akg/autodiff.py
+++ b/python/akg/autodiff.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Automatic differentiation of tensor expressions."""
+import akg
+from akg.tvm._ffi.function import _init_api
+from akg.tvm._ffi.node import NodeBase, register_node
+from akg.utils.format_transform import get_shape
+
+_init_api("akg.autodiff")
+
+def collect_subtensors_by_name(tensor, name, result):
+    """
+    find all the subtensors with names matched the pattern `name`.
+
+    Args:
+        tensor: An input tensor.
+        name: the `name` pattern to be matched.
+        result: list of all subtensors found with name matched.
+
+    Returns:
+        list of all subtensors found with name matched.
+    """
+    for child in tensor.op.input_tensors:
+        child_result = collect_by_name(child, name, result)
+        result.extend(child_result)
+    if tensor.op.name.find(name) != -1:
+        result.append([tensor])
+    return result
+
+
+@akg.tvm.register_func("akg.autodiff.export_to_DOT")
+def export_to_dot(tensors, filename="test.dot"):
+    """
+    Export computation tree of tensors to a DOT file.
+
+    Args:
+        tensors: A single/list/array of input tensors.
+        filename: the name of the DOT file to be generated.
+    """
+
+    def export_tensor_shape(a_shape):
+        result = "("
+        for _, a_shp in enumerate(a_shape):
+            result = result + str(a_shp.value) + ", "
+        result = result + ")"
+        return result
+
+    def recursive_collect_nodes(tensor, exported_op_nodes, repeat_name):
+        if tensor in exported_op_nodes:
+            return exported_op_nodes, repeat_name
+
+        if not exported_op_nodes:
+            exported_op_nodes = {tensor: tensor.op.name}
+        else:
+            if tensor.op.name in exported_op_nodes.values():
+                exported_op_nodes[tensor] = tensor.op.name + '_r' + str(repeat_name)
+                repeat_name = repeat_name + 1
+            else:
+                exported_op_nodes[tensor] = tensor.op.name
+        # exported_op_nodes[tensor] contains the name in DOT for "tensor"
+        # If name is duplicated, a postfix '-r' + number is add to the end
+        for child in tensor.op.input_tensors:
+            if child not in exported_op_nodes:
+                exported_op_nodes, repeat_name = recursive_collect_nodes(child, exported_op_nodes, repeat_name)
+        return exported_op_nodes, repeat_name
+
+    def export_node_name(tensor):
+        if isinstance(tensor.op, akg.tvm.tensor.ComputeOp):
+            if isinstance(tensor.op.body[0], akg.tvm.expr.Reduce):
+                tensor_opcode_name = 'Reduce'
+            elif isinstance(tensor.op.body[0], akg.tvm.expr.Mul):
+                tensor_opcode_name = '*'
+            elif isinstance(tensor.op.body[0], akg.tvm.expr.Add):
+                tensor_opcode_name = '+'
+            elif isinstance(tensor.op.body[0], akg.tvm.expr.Sub):
+                tensor_opcode_name = '-'
+            elif isinstance(tensor.op.body[0], akg.tvm.expr.Div):
+                tensor_opcode_name = '/'
+            elif isinstance(tensor.op.body[0], akg.tvm.expr.Call):
+                tensor_opcode_name = 'Call ' + tensor.op.body[0].name
+            elif isinstance(tensor.op.body[0], akg.tvm.expr.Cast):
+                tensor_opcode_name = 'Cast:' + tensor.op.input_tensors[0].dtype + '=>' + tensor.dtype
+            else:
+                tensor_opcode_name = 'Unsupported yet OP'
+            tensor_node_name = '    "' + exported_op_nodes[tensor] + '" [label = "' + exported_op_nodes[tensor] +\
+                               '\\n' + export_tensor_shape(tensor.shape) + '; ' + tensor.dtype + '\\n' +\
+                               tensor_opcode_name + '"; shape = ellipse; style = filled; color = lightgrey];'
+        else:  # isinstance(tensor.op,akg.tvm.tensor.PlaceholderOp):
+            tensor_node_name = '    "' + exported_op_nodes[tensor] + '" [label = "' + exported_op_nodes[tensor] +\
+                               '\\n' + export_tensor_shape(tensor.shape) +\
+                               '"; shape = box; style = filled; color = lightseagreen];'
+        return tensor_node_name
+
+    def recursive_export_nodes_name(tensor, f, exported_op_nodes):
+        for child in tensor.op.input_tensors:
+            recursive_export_nodes_name(child, f, exported_op_nodes)
+
+        if isinstance(tensor.op, akg.tvm.tensor.ComputeOp):
+            if isinstance(tensor.op.body[0], (akg.tvm.expr.Mul, akg.tvm.expr.Add, akg.tvm.expr.Sub, akg.tvm.expr.Div)):
+                if len(tensor.op.input_tensors) < 2:
+                    if isinstance(tensor.op.body[0].a, akg.tvm.expr.FloatImm):
+                        tensor_node_name = '    "Const_a_' + exported_op_nodes[tensor] +\
+                                           '" [label = "' + str(tensor.op.body[0].a.value) + '\\n' +\
+                                           tensor.op.body[0].a.dtype +\
+                                           '"; shape = box; style = filled; color = lightseagreen];'
+                        f.write(tensor_node_name + "\n")
+                    if isinstance(tensor.op.body[0].b, akg.tvm.expr.FloatImm):
+                        tensor_node_name = '    "Const_b_' + exported_op_nodes[tensor] +\
+                                           '" [label = "' + str(tensor.op.body[0].b.value) + '\\n' +\
+                                           tensor.op.body[0].b.dtype +\
+                                           '"; shape = box; style = filled; color = lightseagreen];'
+                        f.write(tensor_node_name + "\n")
+        f.write(export_node_name(tensor) + "\n")
+
+    def recursive_export_edges(tensor, f, exported_op_nodes, exported_edges):
+        to_name = '"' + exported_op_nodes[tensor] + '"'
+        for child in tensor.op.input_tensors:
+            recursive_export_edges(child, f, exported_op_nodes, exported_edges)
+            from_name = '"' + exported_op_nodes[child] + '"'
+            if (from_name, to_name) not in exported_edges:
+                exported_edges.add((from_name, to_name))
+                f.write('    ' + from_name + " -> " + to_name
+                        + '   [label = "' + export_tensor_shape(child.shape) + '"];\n')
+        if isinstance(tensor.op, akg.tvm.tensor.ComputeOp):
+            if isinstance(tensor.op.body[0], (akg.tvm.expr.Mul, akg.tvm.expr.Add, akg.tvm.expr.Sub, akg.tvm.expr.Div)):
+                if len(tensor.op.input_tensors) < 2:
+                    if isinstance(tensor.op.body[0].a, akg.tvm.expr.FloatImm):
+                        from_name = '"Const_a_' + exported_op_nodes[tensor] + '"'
+                        if (from_name, to_name) not in exported_edges:
+                            exported_edges.add((from_name, to_name))
+                            f.write('    ' + from_name + " -> " + to_name + '   [label = "(const)"];\n')
+                    if isinstance(tensor.op.body[0].b, akg.tvm.expr.FloatImm):
+                        from_name = '"Const_b_' + exported_op_nodes[tensor] + '"'
+                        if (from_name, to_name) not in exported_edges:
+                            exported_edges.add((from_name, to_name))
+                            f.write('    ' + from_name + " -> " + to_name + '   [label = "(const)"];\n')
+        return exported_edges
+
+    with open(filename, "w+") as f_out:
+        f_out.write('digraph G {\n  ration = compress;\n  nodesep = 0.1;  rankdir = BT\n')
+
+        exported_op_nodes = dict()  # dict of {tensor, tensor_name}
+        exported_edges = set()
+        repeat_name = 0
+
+        if isinstance(tensors, akg.tvm.container.Array):
+            list_tensors = [x for x in tensors]
+        else:
+            if isinstance(tensors, akg.tvm.tensor.Tensor):
+                list_tensors = [tensors]
+            else:
+                list_tensors = []
+
+        for a_tensor in list_tensors:
+            exported_op_nodes, repeat_name = recursive_collect_nodes(a_tensor, exported_op_nodes, repeat_name)
+            recursive_export_nodes_name(a_tensor, f_out, exported_op_nodes)
+            exported_edges = recursive_export_edges(a_tensor, f_out, exported_op_nodes, exported_edges)
+
+        f_out.write("\n}\n")
+
+
+variable_map = {}
+
+
+def register_variables(name, input, output):
+    """register variables as a dictionary."""
+    if not isinstance(name, str):
+        raise ValueError("key {} is not str.".format(name))
+    variable_map[name] = [output, input]
+
+
+def get_variables(name):
+    """get variables from dictionary."""
+    if isinstance(name, str):
+        if not variable_map[name]:
+            raise ValueError("value to key {} is empty.".format(name))
+        return variable_map[name]
+    raise ValueError("key {} is not str.".format(name))
+
+
+@register_node
+class DifferentiationResult(NodeBase):
+    """
+    Result of differentiation.
+
+    Args:
+        result (list[tvm.tensor.Tensor]):
+            The requested adjoints, i.e. the Jacobians or gradients of the given output
+            wrt to the given inputs.
+        adjoints (dict[tvm.tensor.Tensor, tvm.tensor.Tensor]):
+            A map from tensors to the corresponding adjoints (including internal nodes).
+        adjoint_summands (dict[tvm.tensor.Tensor, dict[tvm.tensor.Tensor, tvm.tensor.Tensor]]):
+            Single summands of the adjoints.
+    """
+
+    # Here we convert tvm Maps to dicts because Map compares keys by reference which is
+    # wrong for tvm.tensor.Tensors. Hopefully, in the future Map gets fixed somehow, and these properties
+    # may be removed then.
+
+    @property
+    def adjoints(self):
+        res = NodeBase.__getattr__(self, 'adjoints')
+        return dict(res.items())
+
+    @property
+    def adjoint_summands(self):
+        res = NodeBase.__getattr__(self, 'adjoint_summands')
+        return {k: dict(v.items()) for k, v in res.items()}
+
+    def _check_not_empty(self):
+        if not self.result:
+            raise ValueError("The result of differentiation does not contain any explicitly "
+                             "requested results, so using it as an iterable is probably a mistake. "
+                             "Please explicitly use res.adjoints to get adjoints or res.result to "
+                             "get the empty list.")
+
+    def __getitem__(self, i):
+        self._check_not_empty()
+        return self.result[i]
+
+    def __len__(self):
+        self._check_not_empty()
+        return len(self.result)
+
+
+def differentiate(output, inputs=None, head=None, ad_attrs=None, new_pld_array=None, override=None, fdiff=None):
+    """
+    Perform operator-level automatic differentiation.
+
+    Args:
+        output (tvm.tensor.Tensor): The tensor to differentiate.
+        inputs (list[tvm.tensor.Tensor]): The list of input tensors.
+            When the list is empty or None, will perform differentiation with respect to all tensors the output depends
+            on (i.e. will compute all adjoints and populate the corresponding dict, but the list of results will be
+            empty). Default: None.
+        head (tvm.tensor.Tensor): The adjoint of the output.
+            in other words, some tensors, by which the Jacobians will be multiplied. Its shape must be of the form
+            `prefix + output.shape`. For example, if the shape of `output` is (2, 3), the shape of `head` could
+            be (2, 3), (?, 2, 3) and etc.
+            If `None` is passed, the identity tensor of shape `output.shape + output.shape` will be used.
+            Default: None.
+        ad_attrs (dict): The additional attributes for the auto-differentiate computation. Default: None.
+        new_pld_array (list): List of additional variables which could be used in differentiation. Default: None.
+        override (dict): A dictionary to override differentiation for certain tensors.
+            Override is a dictionary with types: {tvm.tensor.Tensor: (list[tvm.tensor.Tensor],
+            callable[tvm.tensor.Tensor, list[tvm.tensor.Tensor], tvm.tensor.Tensor, list[tvm.tensor.Tensor]])}.
+            This dict maps tensors `t` to pairs `(dependencies, custom_diff)` where `dependencies` is a list of
+            tensors which are considered to be inputs of `t` (which may differ from the immediate inputs),
+            and `custom_diff` is a custom differentiation function which will be called as
+            `custom_diff(t, dependencies, adjoint, new_pld_array)` and should return a list of adjoints
+            corresponding to dependencies.
+            Note that this function differs from the one required for `fdiff`
+            in that it takes a list of inputs instead of a single input
+            and returns a list of adjoints instead of a single adjoint. Default: None.
+        fdiff (callable[tvm.tensor.Tensor, tvm.tensor.Tensor, tvm.tensor.Tensor, tvm.tensor.Tensor]): The default
+            function performing differentiation and multiplication, by default `akg.autodiff.DiffBuildingBlock` is used.
+            The function must accept parameters:
+
+                - `output` - an output tensor
+
+                - `input` - an input tensor
+
+                - `head` - the adjoint of the output tensor
+
+                - `ad_attrs` - the additional attributes for the auto-differentiate computation
+
+                - `new_pld_array` - the additional tensors with information for the auto-differentiate computation
+
+            The result should be `head` multiplied by the Jacobians of `output` wrt `input`. Default: None.
+
+    Returns:
+        DifferentiationResult.
+        class DifferentiationResult is used to represent a differentiation result, including:
+            - result (list[tvm.tensor.Tensor]):
+              The requested adjoints, i.e. the Jacobians or gradients of the given output
+              with respect to the given inputs.
+
+            - adjoints (dict{tvm.tensor.Tensor: tvm.tensor.Tensor}):
+              A dict from tensors to the corresponding adjoints (including internal nodes).
+
+            - adjoint_summands (dict{tvm.tensor.Tensor: dict{tvm.tensor.Tensor: tvm.tensor.Tensor}}):
+              Single summands of the adjoints.
+
+    Raises:
+        ValueError: If the shape of `head` is invalid.
+
+    Examples:
+        >>> x = akg.tvm.placeholder((32, 3, 28, 28), name='x')
+        >>> w1 = akg.tvm.placeholder((10, 3, 3, 3), name='w1')
+        >>> z1 = akg.topi.nn.conv2d(x, w1, 1, 0, 1)
+        >>> z2 = akg.topi.nn.flatten(z1)
+        >>> y = akg.topi.sum(z2)
+        >>>
+        >>> # produce gradients
+        >>> [dw1, dw2] = akg.differentiate(y, [x, w1])
+        >>>
+        >>> # produce Jacobians
+        >>> [jw1, jw2] = akg.differentiate(z2, [x, w1])
+        >>>
+        >>> # produce Jacobians, the head adjoint for z2 is provided manually
+        >>> [dw1, dw2] = akg.differentiate(z2, [x, w1], akg.topi.full_like(z2, 1.0))
+        >>>
+        >>> # produce gradients wrt all inputs
+        >>> res = akg.differentiate(y)
+        >>> dw1 = res.adjoints[x]
+        >>> dw2 = res.adjoints[w1]
+        >>>
+        >>> # a custom differentiation function
+        >>> head = akg.tvm.placeholder((1,), name = 'head')
+        >>> def my_fdiff(out, inp, head, ad_attrs, new_pld_array):
+        >>>     return [akg.tvm.compute(inp[0].shape, lambda ax0, ax1, ax2, ax3: head[ax0, ax3 + ax2*26 + ax1*676])]
+        >>>
+        >>> # using a custom differentiation function only for z2
+        >>> res = akg.differentiate(y, [x, w1], head, None, None, override={z2: ([z1], my_fdiff)})
+    """
+
+    # check whether head shape is compatible with output shape.
+    if head is not None:
+        output_shape = get_shape(output)
+        head_shape = get_shape(head)
+        output_dim = len(output_shape)
+        head_last_shape = head_shape[-output_dim:]
+        if head_last_shape != output_shape:
+            raise ValueError("operands could not be broadcast together with head shape %s and output shape %s" %
+                             (str(head_shape), str(output_shape)))
+
+    if inputs is None:
+        inputs = []
+
+    if override is not None:
+        override_deps = []
+
+    if fdiff is None:
+        fdiff = DiffBuildingBlock
+
+    if override is not None:
+        def modified_fdiff(out, inp, head, ad_attrs, new_pld_array, override=override, old_fdiff=fdiff, cache=None):
+            if cache is None:
+                cache = {}
+            if out in override:
+                if (out, head) not in cache:
+                    cache[(out, head)] = override[out][1](out, override[out][0], head, ad_attrs, new_pld_array)
+                idx = override[out][0].index(inp)
+                return cache[(out, head)][idx]
+            return old_fdiff(out, inp, head, ad_attrs, new_pld_array)
+
+        fdiff = modified_fdiff
+
+        override_deps = {t: deps for t, (deps, _) in override.items()}
+        return akg.autodiff.Differentiate(output, inputs, head, ad_attrs, None, fdiff, override_deps)
+
+    if new_pld_array is None:
+        return akg.autodiff.Differentiate(output, inputs, head, ad_attrs, [], fdiff)
+    return akg.autodiff.Differentiate(output, inputs, head, ad_attrs, new_pld_array, fdiff)
--- a/python/akg/backend/__init__.py
+++ b/python/akg/backend/__init__.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""__init__"""
+from __future__ import absolute_import as _abs
+import logging
+from akg.tvm._ffi.function import _init_api
+if __name__ == "platform":
+    import sys
+    import os
+    logging.info("Using python build-in 'platform'")
+    tp_ = os.path.realpath(os.path.join(os.path.dirname(__file__), ".."))
+
+    bak_path = sys.path[:]
+    for item in bak_path:
+        if (item == '' or os.path.realpath(item) == tp_) and item in sys.path:
+            sys.path.remove(item)
+
+    sys.modules.pop('platform')
+    sys.path.insert(0, '')
+    sys.path.append(tp_)
+else:
+    from .cce_params import *
+    from .cce_build import *
+    from .cce_conf import *
+    from .cce_runtime import *
+    from .cce_buffer import *
+_init_api("akg.build_module")
--- a/python/akg/backend/aic_model.py
+++ b/python/akg/backend/aic_model.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""aic model simulation util."""
+import os
+import subprocess
+import json
+import numpy as np
+import akg.tvm
+
+
+class Spec():
+    MINI = "davinci_mini.spec"
+    LITE = "davinci_lite.spec"
+    LITE2 = "davinci_lite2.spec"
+    CLOUD = "davinci_cloud.spec"
+    ORLANDO_CS = "orlando_cs.spec"
+    PHEONIX_CS = "pheonix_cs.spec"
+
+
+def launch(kernel, args, output=(-1,), kernel_meta_path='./kernel_meta', spec=Spec.MINI):
+    """
+    simulated run CCE kernel by aic model.
+
+    Args:
+        kernel (str): str of kernel name, or CCE Module.
+        args (Union[list, tuple]): list or tuple of numpy array.
+        output (Union[list, tuple]): list or tuple of output argment index.
+        kernel_meta_path : kernel meta directory path of the kernel.
+        spec : target chip specification.
+
+    Returns:
+        output numpy array, or tuple of numpy array if multi-output.
+    """
+    if isinstance(kernel, akg.tvm.module.Module):
+        code = kernel.imported_modules[0].get_source()
+        kernel_name = code.split("_kernel")[0].split(" ")[-1]
+    else:
+        kernel_name = kernel
+    hbm_addr = 0x4000000
+    hbm_unit = 0x1000000
+    aic_model_path = os.getenv('AIC_MODEL_PATH')
+    if not aic_model_path:
+        msg = "AIC_MODEL_PATH environment variable is not set. Please set it to the dir of model_exe"
+        raise RuntimeError(msg)
+    aic_model_path = os.path.realpath(aic_model_path)
+    if not os.path.exists(aic_model_path):
+        msg = "The parameter aic_model_path can not be found, please check"
+        raise RuntimeError(msg)
+
+    aic_out_path = os.path.realpath("aic_out")
+    if not os.path.exists(aic_out_path):
+        os.mkdir(aic_out_path)
+    calog_path = aic_out_path + "/calog"
+    if not os.path.exists(calog_path):
+        os.mkdir(calog_path)
+
+    model_path = aic_out_path + "/model"
+    if not os.path.exists(model_path):
+        subprocess.call(["ln", "-s", aic_model_path + "/model", model_path])
+
+    kernel_meta_realpath = os.path.realpath(kernel_meta_path)
+    if not os.path.exists(kernel_meta_realpath):
+        msg = "The parameter kernel_meta_realpath  can not be found, please check"
+        raise RuntimeError(msg)
+
+    o_name = kernel_meta_realpath + "/" + kernel_name + ".o"
+    bin_name = aic_out_path + "/kernel.bin"
+    subprocess.call(["aicore-elf-objcopy", "-O", "binary", "-j", ".text", o_name, bin_name])
+
+    load_dict = {}
+    with open("%s/%s.json" % (kernel_meta_realpath, kernel_name), "r") as f:
+        load_dict = json.load(f)
+
+    arg_info = []  # [{"bin": "xx.bin", "out" : False, "size":100, "addr": 200},]
+    desc = {"args": arg_info,
+            "para_addr": hbm_addr,
+            "bin_addr": hbm_addr + 0x100000,
+            "bin": "kernel.bin",
+            "block": load_dict["blockDim"],
+            "spec": aic_model_path + '/' + spec,
+            "path": aic_out_path}
+    hbm_addr += hbm_unit
+
+    for i, arg in enumerate(args):
+        bin_name = "a_%d.bin" % (i)
+        arg.tofile(os.path.join(aic_out_path, bin_name))
+        info = {"bin": bin_name,
+                "size": arg.size * arg.dtype.itemsize,
+                "addr": hbm_addr,
+                "out": False}
+        arg_info.append(info)
+        need_size = arg.size
+        if need_size % hbm_unit:
+            need_size += hbm_unit - (need_size % hbm_unit)
+        hbm_addr += need_size
+    for i in output:
+        arg_info[len(arg_info) + i if i < 0 else i]['out'] = True
+
+    config_path = aic_out_path + "/config.toml"
+    if os.path.exists(config_path):
+        os.remove(config_path)
+    with os.fdopen(os.open(config_path, os.O_WRONLY | os.O_CREAT, 0o400), 'w') as f:
+        f.write('title="Sim Config"\n')
+        f.write('log_open_value=0xffffffff\n')
+        f.write('chip_version=1\n')
+        f.write('block_dim=%d\n' % (desc['block']))
+        f.write('specPathName="%s"\n' % (desc["spec"]))
+        f.write('path="%s/"\n' % (desc["path"]))
+        f.write('hbm_para_addr=0x%x\n' % (desc["para_addr"]))
+        f.write('[BIN]\n')
+        f.write('name="%s"\n' % (desc['bin']))
+        f.write('addr=0x%x\n' % (desc['bin_addr']))
+        for arg in arg_info:
+            f.write('[[output_para_array]]\n' if arg['out'] else '[[input_para_array]]\n')
+            f.write('name="%s"\n' % (arg['bin']))
+            f.write('addr=0x%x\n' % (arg['addr']))
+            f.write('valid=1\n')
+            if arg['out']:
+                f.write('size=0x%x\n' % (arg['size']))
+
+    run_path = aic_out_path + "/run.sh"
+    if os.path.exists(run_path):
+        os.remove(run_path)
+    with os.fdopen(os.open(run_path, os.O_WRONLY | os.O_CREAT, 0o500), 'w') as f:
+        f.write("cd " + aic_out_path + "\n")
+        f.write("export DVCSPEC_DIR=" + aic_model_path + "\n")
+        f.write(aic_model_path + "/v100_ca_tag_master --gtest_filter=test_st_case.test_st_ca\n")
+    subprocess.call(["sh", aic_out_path + "/run.sh"])
+    out_list = []
+    for i, arg_ in enumerate(args):
+        if arg_info[i]['out']:
+            out_data = np.fromfile(os.path.join(aic_out_path, arg_info[i]['bin']), arg_.dtype)
+            if out_data.size > args[i].size:  # strip unneeded data copied back by aic model
+                out_data = out_data[0:arg_.size]
+            out_arg = out_data.reshape(arg_.shape)
+            out_list.append(out_arg)
+    return out_list[0] if len(out_list) == 1 else tuple(out_list)
--- a/python/akg/backend/cce_buffer.py
+++ b/python/akg/backend/cce_buffer.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""register the cce buffer info"""
+from __future__ import absolute_import as _abs
+
+import akg.tvm
+from akg.utils import kernel_exec
+from .cce_conf import cce_product_
+
+# add default product, default value is 200
+# get the CceProductParams instance
+mode = kernel_exec.get_runtime_mode()
+
+# currently we have 5 kinds of runtime modes:ca/aic/rpc/aic_cloud/rpc_cloud
+# aic means aic_mini;rpc means rpc_mini
+# the default target is mini
+if mode in ('aic', 'rpc', 'compile_mini', 'air'):
+    cur_cce_product_params = cce_product_("1.1.xxx.xxx")
+    target = akg.tvm.target.cce("mini")
+elif mode in ('aic_cloud', 'rpc_cloud', 'compile_cloud', 'air_cloud'):
+    cur_cce_product_params = cce_product_("1.6.xxx.xxx")
+    target = akg.tvm.target.cce("cloud")
+elif mode == 'ca':
+    cur_cce_product_params = cce_product_("1.1.xxx.xxx")
+    target = akg.tvm.target.cce("mini")
+else:
+    cur_cce_product_params = cce_product_("1.1.xxx.xxx")
+    target = akg.tvm.target.cce("mini")
+target.__enter__()
--- a/python/akg/backend/cce_build.py
+++ b/python/akg/backend/cce_build.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Runtime function related hooks."""
+from __future__ import absolute_import as _abs
+from akg import build_module
+
+
+def debug_mode(debug_flag):
+    """
+    Pass to enable tpu debug mode.
+
+    Args:
+        debug_flag (int): The dbeug flag to be passed.
+
+    Returns:
+        list of function, the pass to set to build_config(add_lower_pass=tpu.debug_mode(mode)).
+    """
+    # the number in pass_list such as 0,1,2,3 represents the order of the pass called
+    pass_list = []
+    if debug_flag == 1:
+        pass_list.append((0, ir_pass.inject_dma_intrin))
+    return pass_list
+
+
+# Add a lower pass to sync uop
+build_config = build_module.build_config(add_lower_pass=debug_mode(0), dump_pass_ir=True)
--- a/python/akg/backend/cce_conf.py
+++ b/python/akg/backend/cce_conf.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""parser the config params"""
+from __future__ import absolute_import as _abs
+
+import akg.tvm
+
+
+def cce_product_(product):
+    """
+    dynamic load the product params.
+
+    Args:
+        product (str): product name.
+    """
+    product = product.lower()
+    if isinstance(product, str):
+        versions = product.split(".")
+        if len(versions) != 4 and len(versions) != 5:
+            raise RuntimeError("Donot support specify the product %s" % product)
+
+        if product.startswith("3.5."):
+            product = "3.5"
+        elif product.startswith("3.3."):
+            product = "3.3"
+        elif product.startswith("1.1."):
+            product = "1.1"
+        elif product.startswith("1.2."):
+            product = "1.2"
+        elif product.startswith("1.6."):
+            product = "1.6"
+        else:
+            raise RuntimeError("Donot support specify the product %s" % product)
+    else:
+        raise RuntimeError("The Supported product type error")
+
+    cur_cce_product_params = CceProductParams()
+    cur_cce_product_params.cce_product = product
+
+    # set section to conf
+    f = akg.tvm.get_global_func("cce.set_product_section")
+    f(product)
+
+    return cur_cce_product_params
+
+
+def get_value(product, key):
+    """
+    call global func to get product value.
+
+    Args:
+        product (str): product name.
+        key (str): key.
+    """
+    if "Buffer" in key:
+        f = akg.tvm.get_global_func("cce.product_conf_buffer")
+
+        value = f(product, key)
+        if value == 0:
+            raise RuntimeError("Get the cce product value is 0")
+
+        return value
+    if "Compiler" in key:
+        f = akg.tvm.get_global_func("cce.product_conf_compiler")
+
+        value = f(product, key)
+        if value == "":
+            raise RuntimeError("Get the cce product value is None")
+
+        return value
+    if "Intrinsic" in key:
+        f = akg.tvm.get_global_func("cce.product_conf_intrinsic")
+
+        value = f(product, key)
+        if value == "":
+            raise RuntimeError("Get the cce product value is None")
+
+        return value
+    if "Core" in key:
+        f = akg.tvm.get_global_func("cce.product_conf_core")
+
+        value = f(product, key)
+        if value == 0:
+            raise RuntimeError("Get the cce product value is None")
+
+        return value
+    return None
+
+
+class CceProductParams():
+    """define Cce Product Params class."""
+
+    _instance = None
+    cce_product = None
+    # set false to switch off aicpuos feature
+    enable_aicpuos = True
+
+    def __init__(self):
+        pass
+
+    # singleton pattern
+    def __new__(cls, *args, **kwargs):
+        if cls._instance is None:
+            cls._instance = object.__new__(cls, *args, **kwargs)
+        return cls._instance
+
+    def get_params_(self, key):
+        """get parameters."""
+        if self.cce_product is None:
+            raise RuntimeError("not set product info")
+
+        value = get_value(self.cce_product, key)
+
+        # if product supports os
+        if key == "Compiler_aicpu_support_os":
+            # string to bool
+            value = bool(value == "true")
+
+        return value
+
+
+def set_status_check(bl):
+    """
+    call global func to set debug mode to add status special register check code to check if the compute overflow.
+
+    Args:
+        bl (bool): when true, the code will print the check code.
+    """
+    if not isinstance(bl, bool):
+        raise TypeError("The input value type must be boolean")
+
+    f = akg.tvm.get_global_func("cce.status_check")
+
+    f(bl)
--- a/python/akg/backend/cce_device_api_intrin.py
+++ b/python/akg/backend/cce_device_api_intrin.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""CCE configuration constants"""
+
+from __future__ import absolute_import as _abs
+
+import akg.tvm
+
+
+def cc_device_exp(input_data, p_scale, p_shift, p_base, p_shape):
+    """
+    Take exp of input_data by cc_device_api.(cc_device_exp).
+
+    Args:
+    input_data (tvm.tensor.Tensor): Input argument.
+    p_scale : default [1].
+    p_shift: default [0].
+    p_base: default [-1].
+    p_shape: default [1].
+
+    Returns:
+        tvm.expr.Expr. The result.
+    """
+    return akg.tvm.call_pure_intrin(input_data.dtype, "cc_device_exp", input_data, p_scale, p_shift, p_base, p_shape)
+
+
+def cc_device_log(input_data, p_scale, p_shift, p_base, p_shape):
+    """
+    Take log of input_data by cc_device_api(cc_device_log).
+
+    Args:
+        input_data (tvm.tensor.Tensor): Input argument.
+        p_scale: default [1].
+        p_shift: default [0].
+        p_base: default [-1].
+        p_shape: default [1].
+
+    Returns:
+        tvm.Expr.expr. The result.
+    """
+    return akg.tvm.call_pure_intrin(input_data.dtype, "cc_device_log", input_data, p_scale, p_shift, p_base, p_shape)
--- a/python/akg/backend/cce_params.py
+++ b/python/akg/backend/cce_params.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""CCE configuration constants"""
+from __future__ import absolute_import as _abs
+import akg.tvm
+
+# def the buffer var
+scope_cbuf = "local.L1"
+scope_ubuf = "local.UB"
+scope_ca = "local.L0A"
+scope_cb = "local.L0B"
+scope_cc = "local.L0C"
+scope_reg = "local.REG"
+scope_aicpu = "local_aicpu"
+
+dma_copy = "dma_copy"
+dma_copy_global = "global"
+
+f = akg.tvm.get_global_func("cce_util.GetCceAxis")
+
+# def the cce thread axis for sync
+CCE_AXIS = f()
+
+# def the gemm const
+WGT_WIDTH = 16
+INP_WIDTH = 16
+OUT_WIDTH = 16
+BLOCK_IN = 16
+BLOCK_OUT = 16
+BLOCK_REDUCE = 16
+
+INP_ELEM_BYTES = (BLOCK_IN * BLOCK_REDUCE * INP_WIDTH // 8)
+WGT_ELEM_BYTES = (BLOCK_OUT * BLOCK_REDUCE * WGT_WIDTH // 8)
+OUT_ELEM_BYTES = (BLOCK_IN * BLOCK_OUT * OUT_WIDTH // 8)
+GLB_ELEM_BYTES = (16 * OUT_WIDTH // 8)
--- a/python/akg/backend/cce_runtime.py
+++ b/python/akg/backend/cce_runtime.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Runtime function related hooks"""
+from __future__ import absolute_import as _abs
+import os
+import json
+import hashlib
+import akg.tvm
+from akg.backend import cce_conf as cceconf
+
+def write_code(js_dict, fname):
+    if os.path.exists(fname):
+        os.remove(fname)
+    with os.fdopen(os.open(fname, os.O_WRONLY | os.O_CREAT, 0o400), 'w') as f:
+        json.dump(js_dict, f,
+                  sort_keys=True, indent=4, separators=(',', ':'))
+# block_dim: cpu num,default value is 1.
+@akg.tvm.register_func
+def tvm_callback_cce_postproc(code, block_dim=1):
+    """
+    Function for dumping json datas from cce code.
+
+    Args:
+        code: cce code.
+        block_dim: Default: 1.
+
+    Returns:
+        code.
+    """
+    kernel_name = code.split("_kernel")[0].split(" ")[-1]
+    is_aicpu = False
+    if "__aicore__" in code:
+        title_dict = {"magic": "RT_DEV_BINARY_MAGIC_ELF"}
+    elif "__aicpu__" in code:
+        title_dict = {"magic": "RT_DEV_BINARY_MAGIC_ELF_AICPU"}
+        is_aicpu = True
+    elif "aarch64-hisilicon-cce" in code:
+        title_dict = {"magic": "RT_DEV_BINARY_MAGIC_ELF_AICPU"}
+        is_aicpu = True
+        file_name = "kernel_meta/" + kernel_name[1:] + ".json"
+    else:
+        title_dict = dict()
+    title_dict["blockDim"] = block_dim
+
+    # bin file without suffix
+    bin_file_name = ""
+    bin_file_suffix = ".o"
+    # for aicpu support os only
+    cce_product_params = cceconf.CceProductParams()
+    aicpu_support_os = cce_product_params.get_params_("Compiler_aicpu_support_os")
+    bin_file_name = kernel_name
+    if is_aicpu and aicpu_support_os:
+        bin_file_name = "lib" + bin_file_name
+        bin_file_suffix = ".so"
+    if cce_product_params.enable_aicpuos:
+        # new parameters in aicpuos feature
+        title_dict["kernelName"] = kernel_name + "_kernel0"
+        title_dict["binFileSuffix"] = bin_file_suffix
+        title_dict["binFileName"] = bin_file_name
+
+    # the op json file used by domi
+    file_name = "kernel_meta/" + kernel_name + ".json"
+
+    kernel_file_name = "kernel_meta/" + bin_file_name + bin_file_suffix
+    buf_size = 64 * 1024  # once read 64kb
+    sha256 = hashlib.sha256()
+    with open(kernel_file_name, 'rb') as kf:
+        while True:
+            data = kf.read(buf_size)
+            if not data:
+                break
+            sha256.update(data)
+    title_dict["sha256"] = sha256.hexdigest()
+
+    load_dict = {}
+    if not os.path.exists("kernel_meta"):
+        try:
+            os.mkdir("kernel_meta")
+        except OSError as err:
+            # 17, OSError: [Errno 17] File exists
+            if err.errno == 17:
+                pass
+            else:
+                raise err
+    else:
+        fname = "kernel_meta/" + kernel_name + "wk.json"
+        if os.path.exists(fname):
+            with open(fname, "r") as f:
+                load_dict = json.load(f)
+            os.remove(fname)
+
+    final_dict = title_dict.copy()
+    final_dict.update(load_dict)
+    write_code(final_dict, file_name)
+
+    return code
--- a/python/akg/backend/parsing_profiling_data.py
+++ b/python/akg/backend/parsing_profiling_data.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""parsing_profiling_data"""
+import os
+import subprocess
+import struct
+import re
+from tabulate import tabulate
+
+OUTPUT_FORMAT_DATA = "./jobs/output_format_data_hwts.txt"
+BLOCK_LEN = 32
+max_time_consume = 9999999999
+def get_log_slice_id(file_name):
+    pattern = re.compile(r'(?<=slice_)\d+')
+    slice_ = pattern.findall(file_name)
+    index = re.findall(r'\d+', slice_[0])
+    return int(index[0])
+
+
+def get_file_join_name(input_path=None, file_name=None):
+    """Function for getting join name from input path."""
+    name_list = []
+    file_join_name = ''
+    if os.path.exists(input_path):
+        files = os.listdir(input_path)
+        for f in files:
+            if file_name in f and not f.endswith('.done') and not f.endswith('.join'):
+                name_list.append(f)
+        # resort name_list
+        name_list.sort(key=get_log_slice_id)
+
+    if len(name_list) == 1:
+        file_join_name = input_path + os.sep + name_list[0]
+    elif len(name_list) > 1:
+        file_join_name = input_path + os.sep + '%s.join' % file_name
+        if os.path.exists(file_join_name):
+            os.remove(file_join_name)
+        with open(file_join_name, 'ab') as bin_data:
+            for i in name_list:
+                file = input_path + os.sep + i
+                with open(file, 'rb') as txt:
+                    bin_data.write(txt.read())
+    return file_join_name
+
+
+def get_first_runtime_task_trace(input_file=None):
+    """Function for getting first task trace from runtime."""
+    result_data = []
+    format_ = "BBHIQHHHHII"
+    format_last = "B"
+    with open(input_file, 'rb') as bin_data:
+        while True:
+            line_ = bin_data.read(96)
+            if line_:
+                if not line_.strip():
+                    continue
+            else:
+                break
+            if len(line_) == 96:
+                unpack_tuple = struct.unpack(format_, line_[0:32])
+                char_string = line_[32:95].decode().strip(b'\x00'.decode())
+                result_last = [hex(i) for i in struct.unpack(format_last, line_[95:96])]
+                byte01 = bin(int(result_last[0].replace('0x', ''), 16)).replace('0b', '').zfill(8)
+                persistant_1bit = byte01[-1]
+                reserved_7bit = byte01[0:7]
+                kernelname = char_string
+                result_data.append((unpack_tuple[0], unpack_tuple[1], unpack_tuple[2], unpack_tuple[3],
+                                    unpack_tuple[4], unpack_tuple[5], unpack_tuple[6], unpack_tuple[7],
+                                    unpack_tuple[8], unpack_tuple[9], unpack_tuple[10],
+                                    kernelname, persistant_1bit, reserved_7bit))
+    return result_data
+
+
+def get_44_tsch_fw_timeline(input_file=None):
+    """Function for getting tsch_fw_timeline from input file."""
+    result_data = []
+    format_ = "BBHIHHHHQII"
+    with open(input_file, 'rb') as bin_data:
+        while True:
+            line_ = bin_data.read(32)
+            if line_:
+                if not line_.strip():
+                    continue
+            else:
+                break
+            if len(line_) == 32:
+                result_ = struct.unpack(format_, line_)
+                result_data.append((result_[0], result_[1], result_[2], result_[3], result_[4], result_[5], result_[6],
+                                    result_[7], result_[8], result_[9], result_[10]))
+    return result_data
+
+
+def get_43_ai_core_data(input_file=None):
+    """Function for getting datas from aicore: ov/cnt/total_cyc/ov_cyc/pmu_cnt/stream_id."""
+    result_data = []
+    with open(input_file, 'rb') as ai_core_file:
+        while True:
+            line_ = ai_core_file.read(128)
+            if line_:
+                if not line_.strip():
+                    continue
+            else:
+                break
+            format_ = "BBHHHIIqqqqqqqqqqIIIIIIII"
+            result_ = [hex(i) for i in struct.unpack(format_, line_)]
+            byte01 = bin(int(result_[0].replace('0x', ''), 16)).replace('0b', '').zfill(8)
+            ov = byte01[-4]
+            cnt = byte01[0:4]
+            total_cyc = int(result_[7].replace('0x', ''), 16)
+            ov_cyc = int(result_[8].replace('0x', ''), 16)
+            pmu_cnt = tuple(int(i.replace('0x', ''), 16) for i in result_[9:17])
+            stream_id = int(result_[17].replace('0x', ''), 16)
+            result_data.append((ov, cnt, total_cyc, ov_cyc, stream_id, pmu_cnt))
+    return result_data
+
+
+def get_last_tsch_training_trace(input_file=None):
+    """Function for getting last tsch training trace from input file."""
+    result_data = []
+    format_ = "LLHHLL"
+    with open(input_file, 'rb') as bin_data:
+        while True:
+            line_ = bin_data.read(20)
+            if line_:
+                if not line_.strip():
+                    continue
+            else:
+                break
+            if len(line_) == 20:
+                result_ = struct.unpack(format_, line_)
+                result_data.append((result_[0], result_[1], result_[3], result_[2], result_[4], result_[5]))
+    return result_data
+
+
+def get_45_hwts_log(input_file=None):
+    """Function for getting hwts log from input file."""
+    format_ = ['QIIIIIIIIIIII', 'QIIQIIIIIIII', 'IIIIQIIIIIIII']
+    log_type = ['Start of task', 'End of task', 'Start of block', 'End of block', 'Block PMU']
+    type1, type2, type3 = [], [], []
+    with open(input_file, 'rb') as hwts_data:
+        while True:
+            line_ = hwts_data.read(64)
+            if line_:
+                if not line_.strip():
+                    continue
+            else:
+                break
+            byte_first_four = struct.unpack('BBHHH', line_[0:8])
+            byte_first = bin(byte_first_four[0]).replace('0b', '').zfill(8)
+            type_ = byte_first[-3:]
+            is_warn_res0_ov = byte_first[4]
+            cnt = int(byte_first[0:4], 2)
+            core_id = byte_first_four[1]
+            blk_id, task_id = byte_first_four[3], byte_first_four[4]
+            if type_ in ['000', '001', '010']:  # log type 0,1,2
+                result_ = struct.unpack(format_[0], line_[8:])
+                syscnt = result_[0]
+                stream_id = result_[1]
+                type1.append((log_type[int(type_, 2)], cnt, core_id, blk_id, task_id, syscnt, stream_id))
+
+            elif type_ == '011':  # log type 3
+                result_ = struct.unpack(format_[1], line_[8:])
+                syscnt = result_[0]
+                stream_id = result_[1]
+                if is_warn_res0_ov == '1':
+                    warn_status = result_[3]
+                else:
+                    warn_status = None
+                type2.append(
+                    (log_type[int(type_, 2)], cnt, is_warn_res0_ov, core_id, blk_id, task_id, syscnt, stream_id,
+                     warn_status))
+                type1.append((log_type[int(type_, 2)], cnt, core_id, blk_id, task_id, syscnt, stream_id))
+            elif type_ == '100':  # log type 4
+                result_ = struct.unpack(format_[2], line_[8:])
+                stream_id = result_[2]
+                if is_warn_res0_ov == '0':
+                    total_cyc = result_[4]
+                    ov_cyc = None
+                else:
+                    total_cyc = None
+                    ov_cyc = result_[4]
+                pmu_events = result_[-8:]
+                type3.append((log_type[int(type_, 2)], cnt, is_warn_res0_ov, core_id, blk_id, task_id, stream_id,
+                              total_cyc, ov_cyc, pmu_events))
+                type1.append((log_type[int(type_, 2)], cnt, core_id, blk_id, task_id, total_cyc, stream_id))
+
+    return type1, type2, type3
+
+
+def fwrite_format(output_data_path=OUTPUT_FORMAT_DATA, data_source=None, is_start=False):
+    if is_start and os.path.exists(OUTPUT_FORMAT_DATA):
+        os.remove(OUTPUT_FORMAT_DATA)
+    with open(output_data_path, 'a+') as f:
+        f.write(data_source)
+        f.write("\n")
+
+
+def parsing(source_path):
+    """Function for parsing aicore data/tsch fw timeline data/HWTS data/last tsch training trace data."""
+    subprocess.run("cp -r %s ./jobs/" % source_path, shell=True)
+    job_name = source_path.split('/')[-1]
+    job_path = "./jobs/" + job_name
+    fwrite_format(data_source='====================starting  parse task ==================', is_start=True)
+    result = get_file_join_name(input_path=job_path, file_name='runtime.host.runtime')
+    if result:
+        runtime_task_trace_data = get_first_runtime_task_trace(input_file=result)
+        fwrite_format(data_source='====================first runtime task trace data==================')
+        fwrite_format(data_source=tabulate(runtime_task_trace_data,
+                                           ['mode', 'rpttype', 'bufsize', 'reserved', 'timestamp', 'eventname',
+                                            'tasktype', 'streamid',
+                                            'task_id', 'thread', 'device_id', 'kernelname', 'persistant_1bit',
+                                            'reserved_7bit'],
+                                           tablefmt='simple'))
+    result = get_file_join_name(input_path=job_path, file_name='aicore.data.43.dev.profiler_default_tag')
+    if result:
+        ai_core_data = get_43_ai_core_data(input_file=result)
+        fwrite_format(data_source='============================43 AI core data =========================')
+        fwrite_format(data_source=tabulate(ai_core_data,
+                                           ['Overflow', 'cnt', 'Total cycles', 'overflowed cycles', 'Stream ID',
+                                            'PMU events'],
+                                           tablefmt='simple'))
+    result = get_file_join_name(input_path=job_path, file_name='ts_track.data.44.dev.profiler_default_tag')
+    if result:
+        tsch_fw_timeline_data = get_44_tsch_fw_timeline(input_file=result)
+        fwrite_format(data_source='============================44 tsch fw timeline  data =========================')
+        fwrite_format(data_source=tabulate(tsch_fw_timeline_data,
+                                           ['mode', 'rptType', 'bufSize', 'reserved', 'task_type', 'task_state',
+                                            'stream_id',
+                                            'task_id', 'timestamp', 'thread', 'device_id'], tablefmt='simple'))
+    result = get_file_join_name(input_path=job_path, file_name='hwts.log.data.45.dev.profiler_default_tag')
+    start_time = 0
+    end_time = 0
+    if result:
+        data_1, data_2, data_3 = get_45_hwts_log(input_file=result)
+        fwrite_format(data_source='============================45 HWTS data ============================')
+        for i in data_1:
+            if i[0] == 'Start of task' and i[4] == 60000 and start_time == 0:
+                start_time = i[5]
+            if i[0] == 'End of task' and i[4] == 60000 and end_time == 0:
+                end_time = i[5]
+
+        fwrite_format(data_source=tabulate(data_1,
+                                           ['Type', 'cnt', 'Core ID', 'Block ID', 'Task ID', 'Cycle counter',
+                                            'Stream ID'],
+                                           tablefmt='simple'))
+        fwrite_format(data_source=tabulate(data_2,
+                                           ['Type', 'cnt', 'WARN', 'Core ID', 'Block ID', 'Task ID', 'Cycle counter',
+                                            'Stream ID', 'WARN Status'],
+                                           tablefmt='simple'))
+        fwrite_format(data_source=tabulate(data_3,
+                                           ['Type', 'cnt', 'Overflow', 'Core ID', 'Block ID', 'Task ID', 'Stream ID',
+                                            'Total cycles',
+                                            'Overflowed cycles',
+                                            'PMU events'], tablefmt='simple'))
+
+    result = get_file_join_name(input_path=job_path, file_name='training_trace.dev.profiler_default_tag')
+    if result:
+        tsch_training_trace_data = get_last_tsch_training_trace(input_file=result)
+        fwrite_format(data_source='============================last tsch training_trace data=========================')
+        fwrite_format(data_source=tabulate(tsch_training_trace_data,
+                                           ['id_lo', 'id_hi', 'stream_id', 'task_id', 'syscnt_lo', 'syscnt_hi'],
+                                           tablefmt='simple'))
+
+    try:
+        time_consume = abs(int(start_time) - int(end_time))
+        return time_consume if time_consume != 0 else max_time_consume
+    except SyntaxError:
+        return max_time_consume
--- a/python/akg/build_module.py
+++ b/python/akg/build_module.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+The build utils in python.
+This module provides the functions to transform schedule to
+LoweredFunc and compiled Module.
+"""
+from __future__ import absolute_import as _abs
+import sys
+import logging
+from akg.utils import validation_check as vc_util
+import akg.tvm
+from akg.tvm import _api_internal
+from akg.tvm import schedule
+
+
+tuning_spaces = None
+help_tiling_level = {
+    "None": 0, "General": 1, "Candidates": 2, "Tuning": 3
+}
+EMPTY_CODE = 0
+L0_DEFAULT_TILING = 1
+
+
+def dump_tiling_info(level):
+    """Dump tiling info."""
+    if tuning_spaces is None:
+        return
+    logging.getLogger().setLevel(logging.INFO)
+    if level >= help_tiling_level["General"]:
+        logging.info("==========General tiling help info=============")
+        indice = tuning_spaces["index"]
+        if isinstance(indice, list):
+            for i in range(len(indice)):
+                info = "index %d, axis %d, l1_tile_ranges [%d, %d](jump by %d),l0_tile_ranges [%d, %d](jump by %d)"
+                logging.info(info, tuning_spaces["index"][i][0], tuning_spaces["index"][i][1],
+                             tuning_spaces["l1_range"][i][0], tuning_spaces["l1_range"][i][1],
+                             tuning_spaces["l1_mod"][i][0], tuning_spaces["l0_range"][i][0],
+                             tuning_spaces["l0_range"][i][1], tuning_spaces["l0_mod"][i][0])
+            logging.info("===============================================")
+        elif isinstance(indice, int) and indice == EMPTY_CODE:
+            logging.info("Empty tiling space.")
+
+    if level >= help_tiling_level["Candidates"]:
+        logging.info("")
+        logging.info("==========Detailed tiling help info(Only L1)=============")
+        logging.info("index 0 has %d candidate(s) tiling factors", len(tuning_spaces["tuning_space"]))
+        tuning_spaces_len = len(tuning_spaces["tuning_space"])
+        for i in range(tuning_spaces_len):
+            info = "candicate %d:("
+            for l1_candidate in tuning_spaces["tuning_space"][i]:
+                info += ("(" + str(l1_candidate) + ", " + str(L0_DEFAULT_TILING) + "),")
+            info += ")"
+            logging.info(info, i)
+    logging.info("=============================================================")
+    logging.info("")
+    logging.info("Please read this tiling help info and set tiling factor.")
+    logging.info("And then set attr \"help_tiling\" value to 0 and re-run.")
+    logging.info("Exit.")
+    sys.exit()
+
+
+def build_config(**kwargs):
+    """build config."""
+    return akg.tvm.build_config(**kwargs)
+
+
+@vc_util.check_input_type(schedule.Schedule, (list, tuple), (list, tuple), str,
+                          (dict, type(None)), (dict, type(None)), bool, bool, bool, bool)
+def lower(sch, args, shape_params=None, name="default_function", binds=None, attrs=None,
+          simple_mode=False, polyhedral=False, tuning=False, aicpu=False):
+    """Lowering function."""
+    tmp_binds = None
+    if binds is not None:
+        tmp_binds = None if not bool(binds) else binds
+    tmp_attrs = None
+    if attrs is not None:
+        tmp_attrs = None if not bool(attrs) else attrs
+    if shape_params is None:
+        shape_params = []
+
+    cfg = _api_internal._GetCurrentBuildConfig()
+    ret = _api_internal._Lower(sch, args, shape_params, name,
+                               tmp_binds, tmp_attrs, simple_mode,
+                               polyhedral, tuning, aicpu, cfg)
+
+    level = tmp_attrs.get("help_tiling")
+    if tuning or (level is not None and level > help_tiling_level['None']):
+        level = help_tiling_level['Tuning'] if tuning else level
+        global tuning_spaces
+        tuning_spaces = {}
+        tuning_spaces["index"] = ret.index_table.asnumpy().tolist()
+        tuning_spaces["l1_range"] = ret.l1_tile_range_table.asnumpy().tolist()
+        tuning_spaces["l0_range"] = ret.l0_tile_range_table.asnumpy().tolist()
+        tuning_spaces["l1_mod"] = ret.l1_tile_mod_table.asnumpy().tolist()
+        tuning_spaces["l0_mod"] = ret.l0_tile_mod_table.asnumpy().tolist()
+        if level >= help_tiling_level["Candidates"]:
+            tuning_spaces["tuning_space"] = ret.tiling_candidate.asnumpy().tolist()
+        if not tuning:
+            dump_tiling_info(level)
+    return ret
+
+
+@vc_util.check_input_type(schedule.Schedule, (list, tuple), (list, tuple, type(None)), str,
+                          (dict, type(None)), (dict, type(None)), bool, bool)
+def build_to_func(inputs, args, shape_params=None, name="default_function",
+                  binds=None, attrs=None, polyhedral=False, aicpu=False):
+    """Build module."""
+    tmp_binds = None
+    if binds is not None:
+        tmp_binds = None if not bool(binds) else binds
+    tmp_attrs = None
+    if attrs is not None:
+        tmp_attrs = None if not bool(attrs) else attrs
+    for arg in args:
+        vc_util.tensor_max_size_check(arg)
+    if shape_params is None:
+        shape_params = []
+    cfg = _api_internal._GetCurrentBuildConfig()
+    return _api_internal._BuildToFunc(inputs, args, shape_params, name, tmp_binds, tmp_attrs,
+                                      polyhedral, aicpu, cfg)
+
+@vc_util.check_input_type(schedule.Schedule, (list, tuple), (str, type(None)), (list, tuple), str,
+                          (dict, type(None)), (dict, type(None)), bool, bool)
+def build(inputs, args, target=None, shape_params=None, name="default_function",
+          binds=None, attrs=None, polyhedral=False, aicpu=False):
+    tmp_rst = build_to_func(inputs, args, shape_params=shape_params, name=name, binds=binds,
+                            attrs=attrs, polyhedral=polyhedral, aicpu=aicpu)
+
+    tmp_target = target if target is not None else 'cce'
+    return _api_internal._BuildToModule(tmp_rst, tmp_target)
--- a/python/akg/composite/__init__.py
+++ b/python/akg/composite/__init__.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .build_module import build, _build, _build_to_func, generate_trait, get_tiling_space
--- a/python/akg/composite/build_module.py
+++ b/python/akg/composite/build_module.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""build module"""
+import json
+from akg import tvm
+from akg.tvm import _api_internal
+from .repository import __all__ as repository
+
+
+def generate_trait(desc):
+    """ generate trait of kernel description """
+    def generate_compute_trait():
+        tensor_idx = {}
+        counter = 0
+        for in_desc in desc['input_desc']:
+            tensor_idx[in_desc[0]['tensor_name']] = counter
+            counter += 1
+        traits = [str(len(desc['input_desc']))]
+        for op in desc['op_desc']:
+            input_idx = []
+            for input_desc in op['input_desc']:
+                if input_desc[0].get('value', None) is None:
+                    input_idx.append(counter - tensor_idx[input_desc[0]['tensor_name']])
+            input_idx.sort()
+            input_idx_str = ''.join([str(i) for i in input_idx])
+            traits.append(op['name'] + input_idx_str)
+            tensor_idx[op['output_desc'][0]['tensor_name']] = counter
+            counter += 1
+        output_idx = []
+        for out_desc in desc['output_desc']:
+            output_idx.append(tensor_idx[out_desc['tensor_name']])
+        output_idx.sort()
+        traits.append(''.join([str(i) for i in output_idx]))
+        return '.'.join(traits)
+
+    def append_trait(traits, data):
+        if traits and traits[-1].rstrip('-') == data:
+            traits[-1] += '-'
+        else:
+            traits.append(data)
+
+    def generate_shape_trait():
+        traits = []
+        for in_desc in desc['input_desc']:
+            shape_s = '_'.join([str(i) for i in in_desc[0]['shape']])
+            append_trait(traits, shape_s)
+        for out_desc in desc['output_desc']:
+            shape_s = '_'.join([str(i) for i in out_desc['shape']])
+            append_trait(traits, shape_s)
+        return '.'.join(traits)
+
+    def generate_dtype_trait():
+        traits = []
+        for in_desc in desc['input_desc']:
+            dtype = in_desc[0]['data_type']
+            append_trait(traits, dtype)
+        for out_desc in desc['output_desc']:
+            dtype = out_desc['data_type']
+            append_trait(traits, dtype)
+        return '.'.join(traits)
+
+    compute = generate_compute_trait()
+    shape = generate_shape_trait()
+    dtype = generate_dtype_trait()
+    return compute, shape, dtype
+
+def _build_to_func(desc_s, desc_d, attr=None):
+    """
+    build kernel with compute description in json format
+    Args:
+       desc_s : str of compute description
+       desc_d : dict of compute description
+       attr   : dict of build attributes
+
+    Returns:
+       Module.
+    """
+    def get_repo(keys, default=None):
+        repo = repository
+        for key in keys:
+            repo = repo.get(key)
+            if not repo:
+                return default
+        return repo
+    if attr is None:
+        attr = {'dim': ''}
+    # turn 'enable_auto_inline' off for composite op by default.
+    if 'enable_auto_inline' not in attr:
+        attr['enable_auto_inline'] = False
+    compute, shape, dtype = generate_trait(desc_d)
+    repo_attr = get_repo([compute, shape, dtype, 'metadata', 'attrs'], {})
+    if not repo_attr:
+        repo_attr = get_repo([compute, 'metadata', 'attrs'], {})
+    for a in repo_attr:
+        if not attr.get(a):
+            attr[a] = repo_attr[a]
+    if attr.get('dim') in (None, ''):
+        tiling = get_repo([compute, shape, dtype, 'dim'])
+        if tiling:
+            attr['dim'] = tiling
+    func = tvm.get_global_func("composite_with_json_to_func")
+    return func(desc_s, attr)
+
+def _build(desc_s, desc_d, attr=None):
+    rst = _build_to_func(desc_s, desc_d, attr)
+    return _api_internal._BuildToModule(rst)
+
+def build(kernel_desc, attr=None):
+    """
+    build kernel with compute description in json format
+    Args:
+       kernel_desc : str or dict of compute description
+       attr   : dict of build attributes
+
+    Returns:
+       Module.
+    """
+    if isinstance(kernel_desc, str):
+        desc_s = kernel_desc
+        desc_d = json.loads(kernel_desc)
+    else:
+        assert isinstance(kernel_desc, dict)
+        desc_s = json.dumps(kernel_desc)
+        desc_d = kernel_desc
+    return _build(desc_s, desc_d, attr)
+
+def get_tiling_space(kernel_desc, level=1, attr=None):
+    """
+    get tiling space of composite kernel
+    Args:
+       kernel_desc : str of compute description
+       level       : info level
+       attr        : dict of build attributes
+
+    Returns:
+       Module.
+    """
+    if attr is None:
+        attr = {}
+    attr['help_tiling'] = level
+    func = tvm.get_global_func('composite_lower')
+    ret = func(kernel_desc, attr)
+    spaces = {}
+    spaces['index'] = ret.index_table.asnumpy().tolist()
+    spaces['l1_range'] = ret.l1_tile_range_table.asnumpy().tolist()
+    spaces['l0_range'] = ret.l0_tile_range_table.asnumpy().tolist()
+    spaces['l1_mod'] = ret.l1_tile_mod_table.asnumpy().tolist()
+    spaces['l0_mod'] = ret.l0_tile_mod_table.asnumpy().tolist()
+    if level >= 2:
+        spaces['tuning_space'] = ret.tiling_candidate.asnumpy().tolist()
+    return spaces
--- a/python/akg/composite/repository.py
+++ b/python/akg/composite/repository.py
--- a/python/akg/contrib/__init__.py
+++ b/python/akg/contrib/__init__.py
--- a/python/akg/dim.py
+++ b/python/akg/dim.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""dim"""
+DIM = 'dim'
+checkname = ["index", "axis", "tilel1", "tilel0"]
+
+
+class Dim():
+    """class Dim"""
+    def __init__(self):
+        self.dim = ""
+
+    def setdim(self, **kwargs):
+        sorted_keys = sorted([x for x in kwargs], key=checkname.index)
+        for key in sorted_keys:
+            if key not in checkname:
+                raise ValueError("Set dim error!")
+            self.dim += ' ' + str(kwargs[key])
+
+    def __str__(self):
+        return self.dim
--- a/python/akg/lang/__init__.py
+++ b/python/akg/lang/__init__.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""__init__"""
+from . import cce
--- a/python/akg/lang/cce/__init__.py
+++ b/python/akg/lang/cce/__init__.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""comput schedule init"""
+
+from .te_compute import *
--- a/python/akg/lang/cce/te_compute/__init__.py
+++ b/python/akg/lang/cce/te_compute/__init__.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""__init__"""
+from .broadcast_compute import *
+from .concat_compute import *
+from .cast_compute import *
+from .elewise_compute import *
+from .reduction_compute import *
+from .segment_compute import *
+from .dim_conv import *
+from .common import *
--- a/python/akg/lang/cce/te_compute/broadcast_compute.py
+++ b/python/akg/lang/cce/te_compute/broadcast_compute.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""broadcat compute"""
+
+import akg.tvm
+from .util import dtype_check_decorator, shape_to_list, judge_var
+
+_name_index = [0]
+
+
+@dtype_check_decorator
+def broadcast(var, shape, output_dtype=None):
+    """
+    broadcast scalar to tensor, only support float16
+
+    Args:
+        var (Union[int, float, tvm.const]): input
+        shape (tvm.tensor.Tensor): shape
+        output_dtype (tvm.tensor.Tensor): var.dtype
+
+    Returns:
+        tvm.tensor.Tensor, broadcast tensor
+    """
+    if isinstance(shape, akg.tvm.container.Array):
+        shape = shape_to_list(shape)
+    if isinstance(var, akg.tvm.tensor.Tensor):
+        tensor = var
+        orig_shape = shape_to_list(tensor.shape)
+        if len(orig_shape) > len(shape):
+            raise RuntimeError(
+                "Length of shape of input must be less than or equal to output for Tensor Broadcasting, while " +
+                "input shape is %s, and output shape is %s" % (str(orig_shape), str(shape)))
+        expand_shape_len = len(shape) - len(orig_shape)
+        check_equal = 0
+        for so, sd in zip(orig_shape, shape[expand_shape_len:]):
+            if so == sd:
+                check_equal += 1
+                continue
+            elif so == 1:
+                continue
+            raise RuntimeError(
+                "For tensor broadcasting, shape must be the same or corresponding shape of src tensor is 1"
+                "while src shape is %s, and dst shape is %s" % (str(orig_shape), str(shape)))
+        if check_equal == len(shape):
+            return tensor
+
+        name = "broadcast_tensor_" + str(_name_index[0])
+        _name_index[0] += 1
+
+        op = 'broadcast_for_tensor'
+        lambda_func = lambda *indice: tensor(*([0 if orig_shape[i] == 1
+                                                else indice[i + expand_shape_len] for i in range(len(orig_shape))]))
+
+        with akg.tvm.tag_scope(op):
+            out = akg.tvm.compute(shape, lambda_func, name=name)
+        return out
+    var_type = judge_var(var)
+    tmp_args = var
+    if var_type == "python_const":
+        if isinstance(tmp_args, float):
+            tmp_args = akg.tvm.const(tmp_args, dtype="float16")
+        else:
+            tmp_args = akg.tvm.const(tmp_args, dtype="int32")
+
+    if not output_dtype:
+        output_dtype = tmp_args.dtype
+
+    tmp_args = tmp_args.astype(output_dtype)
+
+    lambda_func = lambda *indice: tmp_args
+
+    name = "broadcast_" + str(_name_index[0])
+    _name_index[0] += 1
+
+    op = 'broadcast'
+    with akg.tvm.tag_scope(op):
+        out = akg.tvm.compute(shape, lambda_func, name=name)
+    return out
--- a/python/akg/lang/cce/te_compute/cast_compute.py
+++ b/python/akg/lang/cce/te_compute/cast_compute.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""cast compute"""
+from decorator import decorator
+import akg.tvm
+from .util import save_op_output_dtype, get_intr_types, is_cast_support, shape_to_list
+
+name_index = [0]
+
+
+@decorator
+def auto_cast_of_cast(func, *args, **kwargs):
+    """
+    auto cast dectorator.
+
+    Note:
+        Before calling elewise api, check the input tensor is supported by the intr.
+        If not supported, casting the input tensor to supported dtype.
+
+    Raises:
+        TypeError, If the cast type is not supported.
+    """
+    intr = func.__name__
+
+    save_op_output_dtype(func, *args)
+
+    supported_types = get_intr_types("Intrinsic_" + intr)
+    if len(args) == 1:
+        raw_tensor = args[0]
+        src_dtype = raw_tensor.dtype
+
+        temp_tensor = raw_tensor
+        if src_dtype not in supported_types:
+            if "float32" in supported_types and is_cast_support(src_dtype, "float32"):
+                temp_tensor = cast(raw_tensor, "float32")
+            else:
+                temp_tensor = cast(raw_tensor, "float16")
+        return func(temp_tensor)
+    return func(*args, **kwargs)
+
+
+def cast(raw_tensor, dst_dtype):
+    """
+    cast tensor from src_type to dst_dtype, only support f322f16, f162f32, f162s8, s82f16, f162u8, u82f16
+
+    Args:
+        raw_tensor (tvm.tensor.Tensor): input
+        dst_dtype : destinatin type
+
+    Returns:
+        tvm.tensor.Tensor, casted tensor
+    """
+    src_dtype = raw_tensor.dtype
+    dst_dtype_lower = dst_dtype.lower()
+    if dst_dtype_lower == src_dtype:
+        return raw_tensor
+
+    if not is_cast_support(src_dtype, dst_dtype_lower):
+        if is_cast_support(src_dtype, "float32") and is_cast_support("float32", dst_dtype_lower):
+            raw_tensor = cast_op(raw_tensor, "float32", 'elewise_single_cast')
+        elif is_cast_support(src_dtype, "float16") and is_cast_support("float16", dst_dtype_lower):
+            raw_tensor = cast_op(raw_tensor, "float16", 'elewise_single_cast')
+        else:
+            raise TypeError("Unsupported cast type!")
+
+    return cast_op(raw_tensor, dst_dtype_lower, 'elewise_single_cast')
+
+
+@auto_cast_of_cast
+def ceil(raw_tensor):
+    """
+    cast tensor from src_type to dst_dtype with ceiling method
+
+    Args:
+        raw_tensor (tvm.tensor.Tensor): input
+
+    Returns:
+        tvm.tensor.Tensor, casted tensor
+    """
+    dst_dtype = "int32"
+
+    return cast_op(raw_tensor, dst_dtype, "elewise_single_ceil")
+
+
+@auto_cast_of_cast
+def floor(raw_tensor):
+    """
+    cast tensor from src_type to dst_dtype with flooring method
+
+    Args:
+        raw_tensor (tvm.tensor.Tensor): input
+
+    Returns:
+        tvm.tensor.Tensor, casted tensor
+    """
+    dst_dtype = "int32"
+
+    return cast_op(raw_tensor, dst_dtype, "elewise_single_floor")
+
+
+@auto_cast_of_cast
+def round(raw_tensor):
+    """
+    cast tensor from src_type to dst_dtype with rounding method
+
+    Args:
+        raw_tensor (tvm.tensor.Tensor): input
+
+    Returns:
+        tvm.tensor.Tensor, casted tensor
+    """
+    dst_dtype = "int32"
+
+    return cast_op(raw_tensor, dst_dtype, "elewise_single_round")
+
+
+@auto_cast_of_cast
+def trunc(raw_tensor):
+    """
+    cast tensor from src_type to dst_dtype with trunc method
+
+    Args:
+        raw_tensor (tvm.tensor.Tensor): input
+
+    Returns:
+        tvm.tensor.Tensor, casted tensor
+    """
+    dst_dtype = "int32"
+
+    return cast_op(raw_tensor, dst_dtype, "elewise_single_trunc")
+
+
+def cast_op(input_tensor, output_dtype, op):
+    """factory method of single elewise operations"""
+    in_tensor = input_tensor
+    shape = shape_to_list(in_tensor.shape)
+
+    if op == "elewise_single_cast":
+        lambda_func = lambda *indice: in_tensor(*indice).astype(output_dtype)
+    elif op == "elewise_single_round":
+        lambda_func = lambda *indice: akg.tvm.round(in_tensor(*indice)).astype(output_dtype)
+    elif op == "elewise_single_ceil":
+        lambda_func = lambda *indice: akg.tvm.ceil(in_tensor(*indice)).astype(output_dtype)
+    elif op == "elewise_single_floor":
+        lambda_func = lambda *indice: akg.tvm.floor(in_tensor(*indice)).astype(output_dtype)
+    elif op == "elewise_single_trunc":
+        lambda_func = lambda *indice: akg.tvm.trunc(in_tensor(*indice)).astype(output_dtype)
+    else:
+        raise ValueError("operation %s not support yet" % op)
+
+    name = op.split("_")[-1] + "_" + str(name_index[0])
+    name_index[0] += 1
+
+    with akg.tvm.tag_scope(op):
+        tmp = akg.tvm.compute(shape, lambda_func, name=name)
+    return tmp
--- a/python/akg/lang/cce/te_compute/common.py
+++ b/python/akg/lang/cce/te_compute/common.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""common"""
+import akg.tvm
+from .elewise_compute import vmuls, vadds, vmax, vmin, vabs, vrec, vmul, set_is_need_save_dtype
+from .cast_compute import floor, round, cast
+
+
+def fargmax(x, y):
+    """
+    Build expression for the index of maximum value among input expressions x and y.
+
+    Args:
+        x (tvm.expr.Expr): Input expression.
+        y (tvm.expr.Expr): Input expression.
+
+    Returns:
+        tvm.expr.Expr. The call expression.
+
+    Examples:
+        >>> n = akg.tvm.var('n')
+        >>> m = akg.tvm.var('m')
+        >>> data = akg.tvm.placeholder((n, m), name='data')
+        >>> k = akg.tvm.reduce_axis((0, m), "k")
+        >>> reducer = akg.tvm.comm_reducer(lambda x,y: akg.fargmax(x, y), lambda t: akg.tvm.min_value(t), name="argmax")
+        >>> res = akg.tvm.compute((n,), lambda *indice: reducer(data(*indice, k), axis=k), name="res")
+    """
+    return akg.tvm.call_pure_intrin(x.dtype, "fargmax", x, y)
+
+
+def fargmin(x, y):
+    """
+    Build expression for the index of minimum value among input expressions x and y.
+
+    Args:
+        x (tvm.expr.Expr): Input expression.
+        y (tvm.expr.Expr): Input expression.
+
+    Returns:
+        tvm.expr.Expr. The call expression.
+    """
+    return akg.tvm.call_pure_intrin(x.dtype, "fargmin", x, y)
+
+
+def mad(x, y):
+    """
+    Build expression for two matrices multiplication and add.
+
+    Args:
+        x (tvm.expr.Expr): Input expression.
+        y (tvm.expr.Expr): Input expression.
+
+    Returns:
+        tvm.expr.Expr. The call expression.
+
+    Examples:
+        >>> n = akg.tvm.var('n')
+        >>> m = akg.tvm.var('m')
+        >>> k = akg.tvm.var('k')
+        >>> A = akg.tvm.placeholder((m, k), name='A')
+        >>> B = akg.tvm.placeholder((k, n), name='B')
+        >>> kk = akg.tvm.reduce_axis((0, k), name='kk')
+        >>> mmad = akg.tvm.comm_reducer(lambda x, y: akg.mad(x, y), lambda t: akg.tvm.const(0, dtype=t), name="mmad")
+        >>> C = akg.tvm.compute((m, n), lambda i, j: mmad(A[i, kk] * B[kk, j], axis=kk), name="C")
+    """
+    return akg.tvm.call_pure_intrin(x.dtype, "mad", x, y)
+
+
+mmad = akg.tvm.comm_reducer(lambda x, y: mad(x, y), lambda t: akg.tvm.const(0, dtype=t), name="mmad")
+
+
+def dropout(x, y):
+    """
+    Build expression with dropout function.
+
+    Args:
+        x (tvm.expr.Expr): Input expression.
+        y (tvm.expr.Expr): Input expression.
+
+    Returns:
+        tvm.expr.Expr. The call expression.
+    """
+    return akg.tvm.call_pure_intrin(y.dtype, "dropout", x, y)
+
+
+def iou(x, y):
+    """
+    Return the intersection over union of x, y box.
+
+    Args:
+        x (tvm.expr.Expr): Input expression.
+        y (tvm.expr.Expr): Input expression.
+
+    Returns:
+        tvm.expr.Expr. The call expression.
+    """
+    return akg.tvm.call_pure_intrin(x.dtype, "iou", x, y)
+
+
+def nms(x, y, scalar):
+    """
+    return nonmaximum suppresion result x, y box.
+
+    Args:
+        x (tvm.expr.Expr): Input argument of reduced tensor.
+        y (tvm.expr.Expr): Input argument.
+        scalar (Union[tvm.expr.Expr, float]): Score threshold of nms.
+
+    Returns:
+        z : tvm.expr.Expr. The result is store in fp16, each fp16 is a hex number indicating suppresion.
+    """
+    return akg.tvm.call_pure_intrin(x.dtype, "nms", x, y, scalar)
+
+
+def topk_sort(dst, src, topk):
+    """
+    sort the proposal box and return topk result, used when the sort process need partition the sorting loop.
+
+    Args:
+        dst (tvm.expr.Expr): Input argument. The destination of sort generated by common reducer.
+        src (tvm.expr.Expr): Input argument.
+            Strictly required that the box number can be divisible by 16 and item number is 8.
+        topk (tvm.expr.Expr): Input argument. Constant tvm.expr.Expr indicating the required topk number.
+
+    Returns:
+        z : tvm.expr.Expr. The result.
+    """
+    return akg.tvm.call_pure_intrin(src.dtype, "topk_sort", dst, src, topk)
+
+
+def proposal_sort(dst, src, topk):
+    """
+    sort the proposal box and return topk result.
+
+    Args:
+        dst (tvm.expr.Expr): Input argument. The destination of sort generated by common reducer.
+        src (tvm.expr.Expr): Input argument.
+            Strictly required that the box number can be divisible by 16 and item number is 8.
+        topk (tvm.expr.Expr): Input argument. Constant tvm.expr.Expr indicating the required topk number.
+
+    Returns:
+        z : tvm.expr.Expr. The result.
+    """
+    return akg.tvm.call_pure_intrin(src.dtype, "proposal_sort", dst, src, topk)
+
+
+def fnot(x):
+    return akg.tvm.call_pure_intrin(x.dtype, "not", x)
+
+
+def f_all(x, y):
+    return akg.tvm.call_pure_intrin(x.dtype, "vand", x, y)
+
+
+all_op = akg.tvm.comm_reducer(lambda x, y: f_all(x, y), lambda t: akg.tvm.const(1, dtype=t), name='all_op')
+
+
+def round_to(data, max_, min_):
+    """
+    round data to [min,max]
+
+    Args:
+        data (Tensor): tensors need to change dtype.
+        max_ (float): the range of res.
+        min_ (float): the range of res.
+
+    Returns:
+        tensor : akg.tvm.tensor ,elements in tensor is in range [min,max]
+    """
+    data_tmp = vmuls(data, 0)
+    data_min = vadds(data_tmp, min_)
+    data_max = vadds(data_tmp, max_)
+    data1 = vmax(data, data_min)
+    data1 = vmin(data1, data_max)
+    return data1
+
+
+def cast_to(data, dtype, f1628_int_flag=False):
+    """
+    a wrapped cast operations , cast data to the type of dtype
+
+    Args:
+        data (Tensor): akg.tvm.tensor needs to change dtype.
+        dtype (String): dst dtype need to cast to.
+        f1628_int_flag (bool): before fp16->int8/uint8, the data is all interger or not. default value is False.
+
+    Returns:
+        tensor : akg.tvm.tensor.
+    """
+    if isinstance(data, akg.tvm.tensor.Tensor):
+        data_dtype = getattr(data, 'dtype')
+    else:
+        raise RuntimeError("The cast input type must be akg.tvm.tensor")
+
+    if (data_dtype == "float16") and (dtype == "int32"):
+        fp16_max = akg.tvm.const(32768, dtype="float16")
+        fp16_min = akg.tvm.const(2 ** (-15), dtype="float16")
+
+        data1 = round_to(data, 0.5, -0.5)
+
+        new_data = vmuls(data1, fp16_max)
+        tmp2 = vabs(new_data)
+        tmp3 = vadds(tmp2, fp16_min)
+        fp16_res = vmul(new_data, vrec(tmp3))
+        sign_res = round(fp16_res)
+
+        floor_data = floor(vabs(data))
+        res = vmul(floor_data, sign_res)
+        return res
+    if data_dtype == "float16" and dtype in ("int8", "uint8") and not f1628_int_flag:
+        fp16_half = akg.tvm.const(-0.5, dtype="float16")
+        set_is_need_save_dtype()
+        data = vadds(data, fp16_half)
+
+    if data_dtype == dtype:
+        return data
+    if data_dtype == "float16":
+        tmp = data
+    else:
+        tmp = cast(data, dst_dtype="float16")
+    return cast(tmp, dst_dtype=dtype)
+
+
+def four2five_nchw(data):
+    return akg.tvm.call_pure_intrin(data.dtype, "four2five_nchw", data)
+
+def load3d_l1_ub(data, pad_h, pad_t, pad_l, pad_r,
+                 fm_h, fm_w, stride_h, stride_w,
+                 filter_h, filter_w, dilation_h, dilation_w, repeat_mode, jmp_offset):
+    return akg.tvm.call_pure_intrin(data.dtype, "load3d_l1_ub", data, pad_h, pad_t, pad_l, pad_r,
+                                    fm_h, fm_w, stride_h, stride_w,
+                                    filter_h, filter_w, dilation_h, dilation_w, repeat_mode, jmp_offset)
+
+def sin(data):
+    return akg.tvm.call_pure_intrin(data.dtype, "sin", data)
+
+def cos(data):
+    return akg.tvm.call_pure_intrin(data.dtype, "cos", data)
+
+def sinh(data):
+    return akg.tvm.call_pure_intrin(data.dtype, "sinh", data)
+
+def cosh(data):
+    return akg.tvm.call_pure_intrin(data.dtype, "cosh", data)
+
+def divide_var(data, divisor):
+    return akg.tvm.call_pure_intrin(data.dtype, "divide_var", data, divisor)
+
+def vmadd(x, y, z):
+    """
+    Call the vmadd instruction to calculate :math:`x * y + z`.
+
+    Args:
+        x (tvm.tensor.Tensor): input x.
+        y (tvm.tensor.Tensor): input y.
+        z (tvm.tensor.Tensor): input z.
+
+    Returns:
+        tensor : akg.tvm.tensor.
+    """
+    return akg.tvm.call_pure_intrin(x.dtype, "vmadd", y, z, x)
+
+def vmla(x, y, z):
+    """
+    Call the vmla instruction to calculate :math:`x + y * z`.
+
+    Args:
+        x (tvm.tensor.Tensor): input x.
+        y (tvm.tensor.Tensor): input y.
+        z (tvm.tensor.Tensor): input z.
+
+    Returns:
+        tensor : akg.tvm.tensor.
+    """
+    return akg.tvm.call_pure_intrin(x.dtype, "vmla", y, z, x)
--- a/python/akg/lang/cce/te_compute/concat_compute.py
+++ b/python/akg/lang/cce/te_compute/concat_compute.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""concat compute"""
+import akg.tvm
+from .util import dtype_check_decorator
+
+
+@dtype_check_decorator
+def concat(raw_tensors, axis):
+    """
+    concat shapes at axis,  support int8, uint8, int16, int32 float16, float32
+
+    Args:
+        raw_tensors (list[tvm.tensor.Tensor]): list of tensors
+        axis (int): concat axis
+
+    Returns:
+        concat tensor
+    """
+    concat_para_check(raw_tensors, axis)
+
+    def _get_input_tensors():
+        shapes = []
+        for in_tensor in list(raw_tensors):
+            shape = [int(in_tensor.shape[i].value) for i in range(len(in_tensor.shape))]
+            shapes.append(shape)
+
+        shapes_list = list(shapes)
+        return shapes_list
+
+    shapes = _get_input_tensors()
+
+    res_shape = shapes[0][:]
+    for i in range(1, len(shapes)):
+        res_shape[axis] += shapes[i][axis]
+
+    sel = []
+    n_tensor = len(raw_tensors)
+
+    def compute_func(*indice):
+        if n_tensor > 1:
+            for nn in range(n_tensor - 1):
+                if nn == 0:
+                    tensor_a = raw_tensors[0]
+                    tensor_b = raw_tensors[1]
+                    c_shape = shapes[0][:]
+                    indice2 = list(indice[:])
+                    indice2[axis] = indice[axis] - tensor_a.shape[axis]
+                    sel.append(akg.tvm.expr.Select(indice[axis] < c_shape[axis],
+                                                   tensor_a[indice], tensor_b[tuple(indice2)]))
+
+                    c_shape[axis] += shapes[1][axis]
+                else:
+                    tensor_a = sel[nn - 1]
+                    tensor_b = raw_tensors[nn + 1]
+                    indice2 = list(indice[:])
+                    indice2[axis] = indice[axis] - c_shape[axis]
+                    sel.append(akg.tvm.expr.Select(indice[axis] < c_shape[axis], tensor_a, tensor_b[tuple(indice2)]))
+                    c_shape[axis] += shapes[nn + 1][axis]
+        else:
+            return raw_tensors[0][indice]
+
+        return sel[-1]
+
+    res = akg.tvm.compute(res_shape, compute_func, name="concat", tag="concat")
+
+    return res
+
+
+def concat_para_check(raw_tensors, axis):
+    """
+    concat parameter check
+
+    Args:
+        raw_tensors (list[tvm.tensor.Tensor]): list of tensors
+        axis (int): concat axis
+
+    Returns:
+        rasie runtime error
+    """
+
+    # check shape
+    if axis < 0 or axis >= len(raw_tensors[0].shape):
+        raise RuntimeError("concat axis must be in 0-%d, actual is %d" % (len(raw_tensors[0].shape), axis))
+
+    for i in range(1, len(raw_tensors)):
+        if raw_tensors[i].dtype != raw_tensors[0].dtype:
+            raise RuntimeError("dtype must be the same to each other")
+        for j in range(len(raw_tensors[0].shape)):
+            if (j != axis) and (raw_tensors[i].shape[j].value != raw_tensors[0].shape[j].value):
+                raise RuntimeError("concat input shape len must be the same to each other except concat axis")
--- a/python/akg/lang/cce/te_compute/dim_conv.py
+++ b/python/akg/lang/cce/te_compute/dim_conv.py
--- a/python/akg/lang/cce/te_compute/elewise_compute.py
+++ b/python/akg/lang/cce/te_compute/elewise_compute.py
--- a/python/akg/lang/cce/te_compute/reduction_compute.py
+++ b/python/akg/lang/cce/te_compute/reduction_compute.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""reduction compute"""
+from decorator import decorator
+import akg.tvm
+from .cast_compute import cast
+from .util import save_op_output_dtype, shape_to_list, refine_axis
+
+
+reduce_supported_types = {
+    "sum": ["float16", "float32"],
+    "reduce_min": ["float16"],
+    "reduce_max": ["float16"],
+}
+
+
+@decorator
+def auto_cast_of_reduce(func, *args, **kwargs):
+    """
+    auto cast dectorator.
+
+    Note:
+        Before calling elewise api, check the input tensor is supported by the intr.
+        If not supported, casting the input tensor to supported dtype. (On condition
+        that the cast type is supported.If the cast type is not supported,raising
+        a RuntimeError).
+    """
+    intr = func.__name__
+
+    save_op_output_dtype(func, *args)
+
+    supported_types = reduce_supported_types[intr]
+
+    if len(args) == 3:
+        raw_tensor = args[0]
+        axis = args[1]
+        keepdims = args[2]
+
+        dtype = raw_tensor.dtype
+
+        temp_tensor = raw_tensor
+        if dtype not in supported_types:
+            temp_tensor = cast(raw_tensor, "float16")
+
+        return func(temp_tensor, axis, keepdims)
+    return func(*args, **kwargs)
+
+
+name_index = [0]
+
+
+@auto_cast_of_reduce
+def sum(raw_tensor, axis, keepdims=False):
+    """
+    calculate sum of raw_tensor, only support float16
+
+    Args:
+        raw_tensor (tvm.tensor.Tensor): input tensor
+        axis (Union[int, list]): reduce axis (range : [-len(raw_tensor.shape), len(raw_tensor.shape) - 1])
+        keepdims (bool): if true, retains reduced dimensions with length 1, default value is None
+
+    Returns:
+        tvm.tensor.Tensor, res
+    """
+    return single_reduce_op(raw_tensor, axis, "reduce_sum", keepdims)
+
+
+@auto_cast_of_reduce
+def reduce_min(raw_tensor, axis, keepdims=False):
+    """
+    calculate reduce_min of raw_tensor, only support float16
+
+    Args:
+        raw_tensor (tvm.tensor.Tensor): input tensor
+        axis (Union[int, list]): reduce axis (range : [-len(raw_tensor.shape), len(raw_tensor.shape) - 1])
+        keepdims (bool): if true, retains reduced dimensions with length 1, default value is None
+
+    Returns:
+        tvm.tensor.Tensor, res
+    """
+    return single_reduce_op(raw_tensor, axis, "reduce_min", keepdims)
+
+
+@auto_cast_of_reduce
+def reduce_max(raw_tensor, axis, keepdims=False):
+    """
+    calculate reduce_max of raw_tensor, only support float16
+
+    Args:
+        raw_tensor (tvm.tensor.Tensor): input tensor
+        keepdims (bool): if true, retains reduced dimensions with length 1, default value is None
+        axis (Union[int, list]): reduce axis (range : [-len(raw_tensor.shape), len(raw_tensor.shape) - 1])
+
+    Returns:
+        tvm.tensor.Tensor, res
+    """
+    return single_reduce_op(raw_tensor, axis, "reduce_max", keepdims)
+
+
+def single_reduce_op(input_tensor, axis, op, keepdims=False):
+    """factory method of single reduce operations"""
+    def reduce_compute(data_shape, axis, tensor, func):
+        def compute_func(*indice):
+            count_indice = 0
+            count_reduce = 0
+            res_indice = []
+            for index in range(len(data_shape)):
+                if index not in axis:
+                    res_indice.append(indice[count_indice])
+                    count_indice += 1
+                else:
+                    res_indice.append(reduce_axises[count_reduce])
+                    count_reduce += 1
+                    if keepdims:
+                        count_indice += 1
+
+            return func(tensor(*res_indice), axis=reduce_axises)
+
+        reduce_axises = []
+        for index, axis_num in enumerate(axis):
+            reduce_axises.append(akg.tvm.reduce_axis((0, data_shape[axis_num]), name='k' + str(index + 1)))
+        res_reshape = []
+        for index, shape_l in enumerate(data_shape):
+            if index not in axis:
+                res_reshape.append(shape_l)
+            else:
+                if keepdims:
+                    res_reshape.append(1)
+        if is_last_axis and not keepdims:
+            res_reshape.append(1)
+
+        name = "reduce_" + str(name_index[0])
+        name_index[0] += 1
+
+        reduce_res = akg.tvm.compute(res_reshape, compute_func, name=name)
+        return reduce_res
+
+    if op.lower() == "reduce_min":
+        reduce_func = akg.tvm.min
+    elif op.lower() == "reduce_max":
+        reduce_func = akg.tvm.max
+    elif op.lower() == "reduce_sum":
+        reduce_func = akg.tvm.sum
+    else:
+        raise RuntimeError("Not Support yet for op %s." % op)
+
+    op_tensor = input_tensor
+    shape = shape_to_list(op_tensor.shape)
+    res_axis = refine_axis(axis, shape)
+
+    if not res_axis:
+        return input_tensor
+
+    for i in res_axis:
+        is_last_axis = (i == len(shape) - 1)
+        if is_last_axis:
+            break
+
+    with akg.tvm.tag_scope(op.lower()):
+        res = reduce_compute(shape, res_axis, op_tensor, reduce_func)
+
+    return res
--- a/python/akg/lang/cce/te_compute/segment_compute.py
+++ b/python/akg/lang/cce/te_compute/segment_compute.py
--- a/python/akg/lang/cce/te_compute/util.py
+++ b/python/akg/lang/cce/te_compute/util.py
--- a/python/akg/ms/__init__.py
+++ b/python/akg/ms/__init__.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""__init__"""
+from .op_build import op_build, op_build_to_func
+from .message import compilewithjson
+from .cce import *
--- a/python/akg/ms/cce/__init__.py
+++ b/python/akg/ms/cce/__init__.py
--- a/python/akg/ms/cce/add.py
+++ b/python/akg/ms/cce/add.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""add"""
+from akg.ops.math import add
+
+def TensorAdd(x, y):
+    """add"""
+    return add.add(x, y, scale=1.0, polyhedral=True, attrs=None)
--- a/python/akg/ms/cce/addn.py
+++ b/python/akg/ms/cce/addn.py
+#!/usr/bin/env python3
+# coding: utf-8
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""addn"""
+from akg.ops.math import addn
+# Note input num equal to 0 is means the inputs are flexible
+
+def AddN(inputs):
+    """addn"""
+    return addn.addn(inputs)
--- a/python/akg/ms/cce/apply_momentum.py
+++ b/python/akg/ms/cce/apply_momentum.py
--- a/python/akg/ms/cce/argmax.py
+++ b/python/akg/ms/cce/argmax.py
--- a/python/akg/ms/cce/assign_add.py
+++ b/python/akg/ms/cce/assign_add.py
--- a/python/akg/ms/cce/bias_add.py
+++ b/python/akg/ms/cce/bias_add.py
--- a/python/akg/ms/cce/bias_add_grad.py
+++ b/python/akg/ms/cce/bias_add_grad.py
--- a/python/akg/ms/cce/broadcast_util.py
+++ b/python/akg/ms/cce/broadcast_util.py
--- a/python/akg/ms/cce/cast.py
+++ b/python/akg/ms/cce/cast.py
--- a/python/akg/ms/cce/clear_zero.py
+++ b/python/akg/ms/cce/clear_zero.py
--- a/python/akg/ms/cce/conv.py
+++ b/python/akg/ms/cce/conv.py
--- a/python/akg/ms/cce/conv_backprop_filter.py
+++ b/python/akg/ms/cce/conv_backprop_filter.py
--- a/python/akg/ms/cce/conv_backprop_input.py
+++ b/python/akg/ms/cce/conv_backprop_input.py
--- a/python/akg/ms/cce/conv_bn1.py
+++ b/python/akg/ms/cce/conv_bn1.py
--- a/python/akg/ms/cce/div.py
+++ b/python/akg/ms/cce/div.py
--- a/python/akg/ms/cce/equal.py
+++ b/python/akg/ms/cce/equal.py
--- a/python/akg/ms/cce/equal_count.py
+++ b/python/akg/ms/cce/equal_count.py
--- a/python/akg/ms/cce/exp.py
+++ b/python/akg/ms/cce/exp.py
--- a/python/akg/ms/cce/five2four.py
+++ b/python/akg/ms/cce/five2four.py
--- a/python/akg/ms/cce/floordiv.py
+++ b/python/akg/ms/cce/floordiv.py
--- a/python/akg/ms/cce/four2five.py
+++ b/python/akg/ms/cce/four2five.py
--- a/python/akg/ms/cce/fused_batch_norm.py
+++ b/python/akg/ms/cce/fused_batch_norm.py
--- a/python/akg/ms/cce/fused_batch_norm_grad.py
+++ b/python/akg/ms/cce/fused_batch_norm_grad.py
--- a/python/akg/ms/cce/fused_batchnorm_infer.py
+++ b/python/akg/ms/cce/fused_batchnorm_infer.py
--- a/python/akg/ms/cce/fused_bn1.py
+++ b/python/akg/ms/cce/fused_bn1.py
--- a/python/akg/ms/cce/fused_bn2.py
+++ b/python/akg/ms/cce/fused_bn2.py
--- a/python/akg/ms/cce/fused_bn3.py
+++ b/python/akg/ms/cce/fused_bn3.py
--- a/python/akg/ms/cce/fused_bn_grad1.py
+++ b/python/akg/ms/cce/fused_bn_grad1.py
--- a/python/akg/ms/cce/fused_bn_grad2.py
+++ b/python/akg/ms/cce/fused_bn_grad2.py
--- a/python/akg/ms/cce/fused_bn_grad3.py
+++ b/python/akg/ms/cce/fused_bn_grad3.py
--- a/python/akg/ms/cce/gather_v2.py
+++ b/python/akg/ms/cce/gather_v2.py
--- a/python/akg/ms/cce/less.py
+++ b/python/akg/ms/cce/less.py
--- a/python/akg/ms/cce/log.py
+++ b/python/akg/ms/cce/log.py
--- a/python/akg/ms/cce/matmul.py
+++ b/python/akg/ms/cce/matmul.py
--- a/python/akg/ms/cce/max.py
+++ b/python/akg/ms/cce/max.py
--- a/python/akg/ms/cce/max_pool_grad_with_arg_max.py
+++ b/python/akg/ms/cce/max_pool_grad_with_arg_max.py
--- a/python/akg/ms/cce/max_pool_with_argmax.py
+++ b/python/akg/ms/cce/max_pool_with_argmax.py
--- a/python/akg/ms/cce/mean.py
+++ b/python/akg/ms/cce/mean.py
--- a/python/akg/ms/cce/mean_grad.py
+++ b/python/akg/ms/cce/mean_grad.py
--- a/python/akg/ms/cce/mul.py
+++ b/python/akg/ms/cce/mul.py
--- a/python/akg/ms/cce/neg.py
+++ b/python/akg/ms/cce/neg.py
--- a/python/akg/ms/cce/one_hot.py
+++ b/python/akg/ms/cce/one_hot.py
--- a/python/akg/ms/cce/realdiv.py
+++ b/python/akg/ms/cce/realdiv.py
--- a/python/akg/ms/cce/reciprocal.py
+++ b/python/akg/ms/cce/reciprocal.py
--- a/python/akg/ms/cce/reduce_mean.py
+++ b/python/akg/ms/cce/reduce_mean.py
--- a/python/akg/ms/cce/relu.py
+++ b/python/akg/ms/cce/relu.py
--- a/python/akg/ms/cce/relu_grad.py
+++ b/python/akg/ms/cce/relu_grad.py
--- a/python/akg/ms/cce/reshape.py
+++ b/python/akg/ms/cce/reshape.py
--- a/python/akg/ms/cce/softmax.py
+++ b/python/akg/ms/cce/softmax.py
--- a/python/akg/ms/cce/sparse_softmax_cross_entropy_with_logits.py
+++ b/python/akg/ms/cce/sparse_softmax_cross_entropy_with_logits.py
--- a/python/akg/ms/cce/strided_slice.py
+++ b/python/akg/ms/cce/strided_slice.py
--- a/python/akg/ms/cce/sub.py
+++ b/python/akg/ms/cce/sub.py
--- a/python/akg/ms/cce/sum.py
+++ b/python/akg/ms/cce/sum.py
--- a/python/akg/ms/cce/tile.py
+++ b/python/akg/ms/cce/tile.py
--- a/python/akg/ms/cce/zeros_like.py
+++ b/python/akg/ms/cce/zeros_like.py
--- a/python/akg/ms/gpu/__init__.py
+++ b/python/akg/ms/gpu/__init__.py
--- a/python/akg/ms/gpu/cast.py
+++ b/python/akg/ms/gpu/cast.py
--- a/python/akg/ms/gpu/default_schedule.py
+++ b/python/akg/ms/gpu/default_schedule.py
--- a/python/akg/ms/gpu/equal.py
+++ b/python/akg/ms/gpu/equal.py
--- a/python/akg/ms/gpu/mean.py
+++ b/python/akg/ms/gpu/mean.py
--- a/python/akg/ms/gpu/mean_grad.py
+++ b/python/akg/ms/gpu/mean_grad.py
--- a/python/akg/ms/gpu/mul.py
+++ b/python/akg/ms/gpu/mul.py
--- a/python/akg/ms/gpu/relu6.py
+++ b/python/akg/ms/gpu/relu6.py
--- a/python/akg/ms/gpu/relu6_grad.py
+++ b/python/akg/ms/gpu/relu6_grad.py
--- a/python/akg/ms/gpu/squeeze.py
+++ b/python/akg/ms/gpu/squeeze.py
--- a/python/akg/ms/gpu/squeeze_grad.py
+++ b/python/akg/ms/gpu/squeeze_grad.py
--- a/python/akg/ms/gpu/tile.py
+++ b/python/akg/ms/gpu/tile.py
--- a/python/akg/ms/message.py
+++ b/python/akg/ms/message.py
--- a/python/akg/ms/op_build.py
+++ b/python/akg/ms/op_build.py
--- a/python/akg/ms/save_gpu_param.py
+++ b/python/akg/ms/save_gpu_param.py
--- a/python/akg/ms/utils.py
+++ b/python/akg/ms/utils.py
--- a/python/akg/ops/__init__.py
+++ b/python/akg/ops/__init__.py
--- a/python/akg/ops/array/__init__.py
+++ b/python/akg/ops/array/__init__.py
--- a/python/akg/ops/array/five2four.py
+++ b/python/akg/ops/array/five2four.py
--- a/python/akg/ops/array/four2five.py
+++ b/python/akg/ops/array/four2five.py
--- a/python/akg/ops/array/gather_v2.py
+++ b/python/akg/ops/array/gather_v2.py
--- a/python/akg/ops/array/one_hot.py
+++ b/python/akg/ops/array/one_hot.py
--- a/python/akg/ops/array/reshape.py
+++ b/python/akg/ops/array/reshape.py
--- a/python/akg/ops/array/strided_slice.py
+++ b/python/akg/ops/array/strided_slice.py
--- a/python/akg/ops/array/tile.py
+++ b/python/akg/ops/array/tile.py
--- a/python/akg/ops/math/__init__.py
+++ b/python/akg/ops/math/__init__.py
--- a/python/akg/ops/math/abs.py
+++ b/python/akg/ops/math/abs.py
--- a/python/akg/ops/math/abs_ad.py
+++ b/python/akg/ops/math/abs_ad.py
--- a/python/akg/ops/math/add.py
+++ b/python/akg/ops/math/add.py
--- a/python/akg/ops/math/addn.py
+++ b/python/akg/ops/math/addn.py
--- a/python/akg/ops/math/argmax.py
+++ b/python/akg/ops/math/argmax.py
--- a/python/akg/ops/math/argmin_argmax_common.py
+++ b/python/akg/ops/math/argmin_argmax_common.py
--- a/python/akg/ops/math/cast.py
+++ b/python/akg/ops/math/cast.py
--- a/python/akg/ops/math/div.py
+++ b/python/akg/ops/math/div.py
--- a/python/akg/ops/math/equal.py
+++ b/python/akg/ops/math/equal.py
--- a/python/akg/ops/math/equal_count.py
+++ b/python/akg/ops/math/equal_count.py
--- a/python/akg/ops/math/exp.py
+++ b/python/akg/ops/math/exp.py
--- a/python/akg/ops/math/exp_ad.py
+++ b/python/akg/ops/math/exp_ad.py
--- a/python/akg/ops/math/floor.py
+++ b/python/akg/ops/math/floor.py
--- a/python/akg/ops/math/floordiv.py
+++ b/python/akg/ops/math/floordiv.py
--- a/python/akg/ops/math/less.py
+++ b/python/akg/ops/math/less.py
--- a/python/akg/ops/math/log.py
+++ b/python/akg/ops/math/log.py
--- a/python/akg/ops/math/log_ad.py
+++ b/python/akg/ops/math/log_ad.py
--- a/python/akg/ops/math/mean.py
+++ b/python/akg/ops/math/mean.py
--- a/python/akg/ops/math/mul.py
+++ b/python/akg/ops/math/mul.py
--- a/python/akg/ops/math/neg.py
+++ b/python/akg/ops/math/neg.py
--- a/python/akg/ops/math/realdiv.py
+++ b/python/akg/ops/math/realdiv.py
--- a/python/akg/ops/math/rec_positive.py
+++ b/python/akg/ops/math/rec_positive.py
--- a/python/akg/ops/math/reciprocal.py
+++ b/python/akg/ops/math/reciprocal.py
--- a/python/akg/ops/math/reduce_max.py
+++ b/python/akg/ops/math/reduce_max.py
--- a/python/akg/ops/math/reduce_min_max_common.py
+++ b/python/akg/ops/math/reduce_min_max_common.py
--- a/python/akg/ops/math/rsqrt.py
+++ b/python/akg/ops/math/rsqrt.py
--- a/python/akg/ops/math/sub.py
+++ b/python/akg/ops/math/sub.py
--- a/python/akg/ops/math/sum.py
+++ b/python/akg/ops/math/sum.py
--- a/python/akg/ops/math/tanh.py
+++ b/python/akg/ops/math/tanh.py
--- a/python/akg/ops/math/tanh_ad.py
+++ b/python/akg/ops/math/tanh_ad.py
--- a/python/akg/ops/nn/__init__.py
+++ b/python/akg/ops/nn/__init__.py
--- a/python/akg/ops/nn/avgpool.py
+++ b/python/akg/ops/nn/avgpool.py
--- a/python/akg/ops/nn/batch_norm_ad.py
+++ b/python/akg/ops/nn/batch_norm_ad.py
--- a/python/akg/ops/nn/batchmatmul.py
+++ b/python/akg/ops/nn/batchmatmul.py
--- a/python/akg/ops/nn/bias_add.py
+++ b/python/akg/ops/nn/bias_add.py
--- a/python/akg/ops/nn/bias_add_ad.py
+++ b/python/akg/ops/nn/bias_add_ad.py
--- a/python/akg/ops/nn/conv.py
+++ b/python/akg/ops/nn/conv.py
--- a/python/akg/ops/nn/conv_backprop_filter.py
+++ b/python/akg/ops/nn/conv_backprop_filter.py
--- a/python/akg/ops/nn/conv_backprop_input.py
+++ b/python/akg/ops/nn/conv_backprop_input.py
--- a/python/akg/ops/nn/conv_bn1.py
+++ b/python/akg/ops/nn/conv_bn1.py
--- a/python/akg/ops/nn/conv_filter_ad.py
+++ b/python/akg/ops/nn/conv_filter_ad.py
--- a/python/akg/ops/nn/conv_input_ad.py
+++ b/python/akg/ops/nn/conv_input_ad.py
--- a/python/akg/ops/nn/fused_batch_norm.py
+++ b/python/akg/ops/nn/fused_batch_norm.py
--- a/python/akg/ops/nn/fused_batch_norm_grad.py
+++ b/python/akg/ops/nn/fused_batch_norm_grad.py
--- a/python/akg/ops/nn/fused_batch_norm_grad_split.py
+++ b/python/akg/ops/nn/fused_batch_norm_grad_split.py
--- a/python/akg/ops/nn/fused_batch_norm_split.py
+++ b/python/akg/ops/nn/fused_batch_norm_split.py
--- a/python/akg/ops/nn/maxpool.py
+++ b/python/akg/ops/nn/maxpool.py
--- a/python/akg/ops/nn/maxpool_ad.py
+++ b/python/akg/ops/nn/maxpool_ad.py
--- a/python/akg/ops/nn/maxpool_grad.py
+++ b/python/akg/ops/nn/maxpool_grad.py
--- a/python/akg/ops/nn/maxpool_grad_with_argmax.py
+++ b/python/akg/ops/nn/maxpool_grad_with_argmax.py
--- a/python/akg/ops/nn/mean_ad.py
+++ b/python/akg/ops/nn/mean_ad.py
--- a/python/akg/ops/nn/relu.py
+++ b/python/akg/ops/nn/relu.py
--- a/python/akg/ops/nn/relu_ad.py
+++ b/python/akg/ops/nn/relu_ad.py
--- a/python/akg/ops/nn/softmax.py
+++ b/python/akg/ops/nn/softmax.py
--- a/python/akg/ops/nn/sparse_softmax_cross_entropy_with_logits.py
+++ b/python/akg/ops/nn/sparse_softmax_cross_entropy_with_logits.py
--- a/python/akg/ops/nn/sparse_softmax_cross_entropy_with_logits_ad.py
+++ b/python/akg/ops/nn/sparse_softmax_cross_entropy_with_logits_ad.py
--- a/python/akg/ops/nn/zeros_like.py
+++ b/python/akg/ops/nn/zeros_like.py
--- a/python/akg/ops/optimizers/__init__.py
+++ b/python/akg/ops/optimizers/__init__.py
--- a/python/akg/ops/optimizers/apply_momentum.py
+++ b/python/akg/ops/optimizers/apply_momentum.py
--- a/python/akg/ops/state/__init__.py
+++ b/python/akg/ops/state/__init__.py
--- a/python/akg/ops/state/assign_add.py
+++ b/python/akg/ops/state/assign_add.py
--- a/python/akg/ops/state/clear_zero.py
+++ b/python/akg/ops/state/clear_zero.py
--- a/python/akg/utils/__init__.py
+++ b/python/akg/utils/__init__.py
--- a/python/akg/utils/custom_tiling.py
+++ b/python/akg/utils/custom_tiling.py
--- a/python/akg/utils/dsl_create.py
+++ b/python/akg/utils/dsl_create.py
--- a/python/akg/utils/dynamic_shape.py
+++ b/python/akg/utils/dynamic_shape.py
--- a/python/akg/utils/format_transform.py
+++ b/python/akg/utils/format_transform.py
--- a/python/akg/utils/kernel_exec.py
+++ b/python/akg/utils/kernel_exec.py
--- a/python/akg/utils/math.py
+++ b/python/akg/utils/math.py
--- a/python/akg/utils/result_analysis.py
+++ b/python/akg/utils/result_analysis.py
--- a/python/akg/utils/validation_check.py
+++ b/python/akg/utils/validation_check.py
--- a/requirements.txt
+++ b/requirements.txt
--- a/src/api/api_mem.cc
+++ b/src/api/api_mem.cc
--- a/src/api/api_pass.cc
+++ b/src/api/api_pass.cc
--- a/src/api/api_schedule.cc
+++ b/src/api/api_schedule.cc
--- a/src/codegen/build_cce.cc
+++ b/src/codegen/build_cce.cc
--- a/src/codegen/build_module.cc
+++ b/src/codegen/build_module.cc
--- a/src/codegen/intrin_rule_cce.cc
+++ b/src/codegen/intrin_rule_cce.cc
--- a/src/codegen/pass_mgr.cc
+++ b/src/codegen/pass_mgr.cc
--- a/src/codegen/pass_mgr.h
+++ b/src/codegen/pass_mgr.h
--- a/src/codegen/util.cc
+++ b/src/codegen/util.cc
--- a/src/codegen/util.h
+++ b/src/codegen/util.h
--- a/src/common/array_api.h
+++ b/src/common/array_api.h
--- a/src/common/common.cc
+++ b/src/common/common.cc
--- a/src/common/util_cce.h
+++ b/src/common/util_cce.h
--- a/src/composite/composite.cc
+++ b/src/composite/composite.cc
--- a/src/composite/composite_topi.cc
+++ b/src/composite/composite_topi.cc
--- a/src/composite/util.h
+++ b/src/composite/util.h
--- a/src/contrib/cce_parm/cceconf.cc
+++ b/src/contrib/cce_parm/cceconf.cc
--- a/src/contrib/cce_parm/cceconf.h
+++ b/src/contrib/cce_parm/cceconf.h
--- a/src/contrib/cce_parm/cceparam.cc
+++ b/src/contrib/cce_parm/cceparam.cc
--- a/src/contrib/cce_parm/cceparam.h
+++ b/src/contrib/cce_parm/cceparam.h
--- a/src/contrib/parser/Grammar
+++ b/src/contrib/parser/Grammar
--- a/src/contrib/parser/NOTICE
+++ b/src/contrib/parser/NOTICE
--- a/src/contrib/parser/ast.cc
+++ b/src/contrib/parser/ast.cc
--- a/src/contrib/parser/ast.h
+++ b/src/contrib/parser/ast.h
--- a/src/contrib/parser/codegen.cc
+++ b/src/contrib/parser/codegen.cc
--- a/src/contrib/parser/codegen.h
+++ b/src/contrib/parser/codegen.h
--- a/src/contrib/parser/grammar.cc
+++ b/src/contrib/parser/grammar.cc
--- a/src/contrib/parser/grammar.h
+++ b/src/contrib/parser/grammar.h
--- a/src/contrib/parser/key_word.md
+++ b/src/contrib/parser/key_word.md
--- a/src/contrib/parser/meta_token.md
+++ b/src/contrib/parser/meta_token.md
--- a/src/contrib/parser/parser.cc
+++ b/src/contrib/parser/parser.cc
--- a/src/contrib/parser/parser.h
+++ b/src/contrib/parser/parser.h
--- a/src/contrib/parser/token.cc
+++ b/src/contrib/parser/token.cc
--- a/src/contrib/parser/token.h
+++ b/src/contrib/parser/token.h
--- a/src/emit_insn/cce_params.h
+++ b/src/emit_insn/cce_params.h
--- a/src/emit_insn/insn_binary_vec_pattern.cc
+++ b/src/emit_insn/insn_binary_vec_pattern.cc
--- a/src/emit_insn/insn_builder.cc
+++ b/src/emit_insn/insn_builder.cc
--- a/src/emit_insn/insn_builder.h
+++ b/src/emit_insn/insn_builder.h
--- a/src/emit_insn/insn_builder_dma.cc
+++ b/src/emit_insn/insn_builder_dma.cc
--- a/src/emit_insn/insn_builder_rpn.cc
+++ b/src/emit_insn/insn_builder_rpn.cc
--- a/src/emit_insn/insn_builder_vector.cc
+++ b/src/emit_insn/insn_builder_vector.cc
--- a/src/emit_insn/insn_emitter.cc
+++ b/src/emit_insn/insn_emitter.cc
--- a/src/emit_insn/insn_emitter.h
+++ b/src/emit_insn/insn_emitter.h
--- a/src/emit_insn/insn_emitter_multimask.cc
+++ b/src/emit_insn/insn_emitter_multimask.cc
--- a/src/emit_insn/insn_emitter_multimask.h
+++ b/src/emit_insn/insn_emitter_multimask.h
--- a/src/emit_insn/insn_info.cc
+++ b/src/emit_insn/insn_info.cc
--- a/src/emit_insn/insn_info.h
+++ b/src/emit_insn/insn_info.h
--- a/src/emit_insn/insn_info_adapter.cc
+++ b/src/emit_insn/insn_info_adapter.cc
--- a/src/emit_insn/insn_pattern.cc
+++ b/src/emit_insn/insn_pattern.cc
--- a/src/emit_insn/insn_pattern.h
+++ b/src/emit_insn/insn_pattern.h
--- a/src/emit_insn/insn_single_vec_pattern.cc
+++ b/src/emit_insn/insn_single_vec_pattern.cc
--- a/src/emit_insn/insn_with_variable.cc
+++ b/src/emit_insn/insn_with_variable.cc
--- a/src/emit_insn/insn_with_variable.h
+++ b/src/emit_insn/insn_with_variable.h
--- a/src/feature_lib/include/FL_find_divisible_tiling_factor.h
+++ b/src/feature_lib/include/FL_find_divisible_tiling_factor.h
--- a/src/feature_lib/src/FL_find_divisible_tiling_factor.cce
+++ b/src/feature_lib/src/FL_find_divisible_tiling_factor.cce
--- a/src/include/build_module.h
+++ b/src/include/build_module.h
--- a/src/include/floating.h
+++ b/src/include/floating.h
--- a/src/include/ir_generictree.h
+++ b/src/include/ir_generictree.h
--- a/src/include/ir_pass.h
+++ b/src/include/ir_pass.h
--- a/src/include/schedule_pass.h
+++ b/src/include/schedule_pass.h
--- a/src/include/tvm.h
+++ b/src/include/tvm.h
--- a/src/lang/cgtypes.h
+++ b/src/lang/cgtypes.h
--- a/src/pass/algebra_simplify.cc
+++ b/src/pass/algebra_simplify.cc
--- a/src/pass/align_last_axis_loop_extent.cc
+++ b/src/pass/align_last_axis_loop_extent.cc
--- a/src/pass/align_partition.cc
+++ b/src/pass/align_partition.cc
--- a/src/pass/allocate_unify.cc
+++ b/src/pass/allocate_unify.cc
--- a/src/pass/analyze_align.h
+++ b/src/pass/analyze_align.h
--- a/src/pass/analyze_align_dynamic.cc
+++ b/src/pass/analyze_align_dynamic.cc
--- a/src/pass/analyze_align_static.cc
+++ b/src/pass/analyze_align_static.cc
--- a/src/pass/append_stride_msg.cc
+++ b/src/pass/append_stride_msg.cc
--- a/src/pass/append_stride_msg.h
+++ b/src/pass/append_stride_msg.h
--- a/src/pass/arch.h
+++ b/src/pass/arch.h
--- a/src/pass/arith_expr_simplify.cc
+++ b/src/pass/arith_expr_simplify.cc
--- a/src/pass/arith_expr_simplify.h
+++ b/src/pass/arith_expr_simplify.h
--- a/src/pass/atomic_dma_elim.cc
+++ b/src/pass/atomic_dma_elim.cc
--- a/src/pass/auto_double_buffer.cc
+++ b/src/pass/auto_double_buffer.cc
--- a/src/pass/auto_mad_pragma_attr.cc
+++ b/src/pass/auto_mad_pragma_attr.cc
--- a/src/pass/auto_pragma.cc
+++ b/src/pass/auto_pragma.cc
--- a/src/pass/auto_reorder.cc
+++ b/src/pass/auto_reorder.cc
--- a/src/pass/autodiff.cc
+++ b/src/pass/autodiff.cc
--- a/src/pass/autodiff.h
+++ b/src/pass/autodiff.h
--- a/src/pass/autodiff_cce.cc
+++ b/src/pass/autodiff_cce.cc
--- a/src/pass/autodiff_cce.h
+++ b/src/pass/autodiff_cce.h
--- a/src/pass/broadcast_rewrite.cc
+++ b/src/pass/broadcast_rewrite.cc
--- a/src/pass/bypass_l1.cc
+++ b/src/pass/bypass_l1.cc
--- a/src/pass/c0_expand_split.cc
+++ b/src/pass/c0_expand_split.cc
--- a/src/pass/canonical_form.cc
+++ b/src/pass/canonical_form.cc
--- a/src/pass/canonical_form.h
+++ b/src/pass/canonical_form.h
--- a/src/pass/cast_filter.cc
+++ b/src/pass/cast_filter.cc
--- a/src/pass/cast_kernel_params.cc
+++ b/src/pass/cast_kernel_params.cc
--- a/src/pass/cast_normalize.cc
+++ b/src/pass/cast_normalize.cc
--- a/src/pass/check_shape_params.cc
+++ b/src/pass/check_shape_params.cc
--- a/src/pass/coarsen-img2col.cc
+++ b/src/pass/coarsen-img2col.cc
--- a/src/pass/coarsen-img2col.h
+++ b/src/pass/coarsen-img2col.h
--- a/src/pass/collect_external_call.cc
+++ b/src/pass/collect_external_call.cc
--- a/src/pass/common.h
+++ b/src/pass/common.h
--- a/src/pass/convert_cond_to_extent.cc
+++ b/src/pass/convert_cond_to_extent.cc
--- a/src/pass/convert_div_mod_to_shift.cc
+++ b/src/pass/convert_div_mod_to_shift.cc
--- a/src/pass/convert_extent_to_cond.cc
+++ b/src/pass/convert_extent_to_cond.cc
--- a/src/pass/convert_if_to_select.cc
+++ b/src/pass/convert_if_to_select.cc
--- a/src/pass/convolution_model.cc
+++ b/src/pass/convolution_model.cc
--- a/src/pass/convolution_model.h
+++ b/src/pass/convolution_model.h
--- a/src/pass/copy_propagation.cc
+++ b/src/pass/copy_propagation.cc
--- a/src/pass/correct_tilingcover.cc
+++ b/src/pass/correct_tilingcover.cc
--- a/src/pass/cover_protection_optimize.cc
+++ b/src/pass/cover_protection_optimize.cc
--- a/src/pass/cse.cc
+++ b/src/pass/cse.cc
--- a/src/pass/dataflow_analyze.cc
+++ b/src/pass/dataflow_analyze.cc
--- a/src/pass/dead_code_elim.cc
+++ b/src/pass/dead_code_elim.cc
--- a/src/pass/dependency_graph.h
+++ b/src/pass/dependency_graph.h
--- a/src/pass/dma_sink.cc
+++ b/src/pass/dma_sink.cc
--- a/src/pass/dtype_adapter.cc
+++ b/src/pass/dtype_adapter.cc
--- a/src/pass/dump_c.cc
+++ b/src/pass/dump_c.cc
--- a/src/pass/dump_tensor.cc
+++ b/src/pass/dump_tensor.cc
--- a/src/pass/elim_vector_mask.cc
+++ b/src/pass/elim_vector_mask.cc
--- a/src/pass/emit_insn.cc
+++ b/src/pass/emit_insn.cc
--- a/src/pass/emit_insn_debugger.cc
+++ b/src/pass/emit_insn_debugger.cc
--- a/src/pass/expr_alg_simplify.cc
+++ b/src/pass/expr_alg_simplify.cc
--- a/src/pass/expr_alg_simplify.h
+++ b/src/pass/expr_alg_simplify.h
--- a/src/pass/expr_pattern_rewrite.cc
+++ b/src/pass/expr_pattern_rewrite.cc
--- a/src/pass/feature_lib_transform.cc
+++ b/src/pass/feature_lib_transform.cc
--- a/src/pass/fix_bind_buffer.cc
+++ b/src/pass/fix_bind_buffer.cc
--- a/src/pass/fix_loop_extent.cc
+++ b/src/pass/fix_loop_extent.cc
--- a/src/pass/fix_mad_attrs.cc
+++ b/src/pass/fix_mad_attrs.cc
--- a/src/pass/fix_realize_shape.cc
+++ b/src/pass/fix_realize_shape.cc
--- a/src/pass/flatten_elementwise.cc
+++ b/src/pass/flatten_elementwise.cc
--- a/src/pass/for_eliminate.cc
+++ b/src/pass/for_eliminate.cc
--- a/src/pass/gather_loop_info.cc
+++ b/src/pass/gather_loop_info.cc
--- a/src/pass/hoist_fmatrix.cc
+++ b/src/pass/hoist_fmatrix.cc
--- a/src/pass/if_eliminate.cc
+++ b/src/pass/if_eliminate.cc
--- a/src/pass/inject_attr.cc
+++ b/src/pass/inject_attr.cc
--- a/src/pass/inject_pipe.cc
+++ b/src/pass/inject_pipe.cc
--- a/src/pass/inject_sync.cc
+++ b/src/pass/inject_sync.cc
--- a/src/pass/inject_thread_bind.cc
+++ b/src/pass/inject_thread_bind.cc
--- a/src/pass/insn_to_pip.cc
+++ b/src/pass/insn_to_pip.cc
--- a/src/pass/ir_generictree.cc
+++ b/src/pass/ir_generictree.cc
--- a/src/pass/isolate_loops.cc
+++ b/src/pass/isolate_loops.cc
--- a/src/pass/load3d_transform.cc
+++ b/src/pass/load3d_transform.cc
--- a/src/pass/loop_normlize.cc
+++ b/src/pass/loop_normlize.cc
--- a/src/pass/loop_switch_hoist.cc
+++ b/src/pass/loop_switch_hoist.cc
--- a/src/pass/lower_select.cc
+++ b/src/pass/lower_select.cc
--- a/src/pass/lower_with.cc
+++ b/src/pass/lower_with.cc
--- a/src/pass/math_intrin_rewrite.cc
+++ b/src/pass/math_intrin_rewrite.cc
--- a/src/pass/merge_loops.cc
+++ b/src/pass/merge_loops.cc
--- a/src/pass/multi_core_loop_switch_hoist.cc
+++ b/src/pass/multi_core_loop_switch_hoist.cc
--- a/src/pass/multi_last_axis_reduction.cc
+++ b/src/pass/multi_last_axis_reduction.cc
--- a/src/pass/optimize_pragma.cc
+++ b/src/pass/optimize_pragma.cc
--- a/src/pass/overflow_check.h
+++ b/src/pass/overflow_check.h
--- a/src/pass/pooling_transform.cc
+++ b/src/pass/pooling_transform.cc
--- a/src/pass/post_fusion.cc
+++ b/src/pass/post_fusion.cc
--- a/src/pass/post_fusion_utils.cc
+++ b/src/pass/post_fusion_utils.cc
--- a/src/pass/post_fusion_utils.h
+++ b/src/pass/post_fusion_utils.h
--- a/src/pass/postprocess_img2col.cc
+++ b/src/pass/postprocess_img2col.cc
--- a/src/pass/preprocess_for_multicore.cc
+++ b/src/pass/preprocess_for_multicore.cc
--- a/src/pass/promote_common_expr.cc
+++ b/src/pass/promote_common_expr.cc
--- a/src/pass/promote_const_expr.cc
+++ b/src/pass/promote_const_expr.cc
--- a/src/pass/promote_if.cc
+++ b/src/pass/promote_if.cc
--- a/src/pass/promote_let.cc
+++ b/src/pass/promote_let.cc
--- a/src/pass/quotient_remainder_opt.cc
+++ b/src/pass/quotient_remainder_opt.cc
--- a/src/pass/realize_compress.cc
+++ b/src/pass/realize_compress.cc
--- a/src/pass/reduce_fusion_opt.cc
+++ b/src/pass/reduce_fusion_opt.cc
--- a/src/pass/remove_assert.cc
+++ b/src/pass/remove_assert.cc
--- a/src/pass/rename_realize.cc
+++ b/src/pass/rename_realize.cc
--- a/src/pass/replace_fargmax_cast.cc
+++ b/src/pass/replace_fargmax_cast.cc
--- a/src/pass/rewrite_by_align_dynamic.cc
+++ b/src/pass/rewrite_by_align_dynamic.cc
--- a/src/pass/rewrite_by_align_static.cc
+++ b/src/pass/rewrite_by_align_static.cc
--- a/src/pass/rewrite_floordiv.cc
+++ b/src/pass/rewrite_floordiv.cc
--- a/src/pass/rewrite_half_reduce_sum.cc
+++ b/src/pass/rewrite_half_reduce_sum.cc
--- a/src/pass/rewrite_multi_value_func.cc
+++ b/src/pass/rewrite_multi_value_func.cc
--- a/src/pass/rewrite_scalar_compute.cc
+++ b/src/pass/rewrite_scalar_compute.cc
--- a/src/pass/rewrite_simplify_cce.cc
+++ b/src/pass/rewrite_simplify_cce.cc
--- a/src/pass/rewrite_simplify_cce.h
+++ b/src/pass/rewrite_simplify_cce.h
--- a/src/pass/rewrite_tensor_index.cc
+++ b/src/pass/rewrite_tensor_index.cc
--- a/src/pass/rewrite_var_tensor_idx.cc
+++ b/src/pass/rewrite_var_tensor_idx.cc
--- a/src/pass/set_vector_mask_default.cc
+++ b/src/pass/set_vector_mask_default.cc
--- a/src/pass/sink_allocate.cc
+++ b/src/pass/sink_allocate.cc
--- a/src/pass/sink_if.cc
+++ b/src/pass/sink_if.cc
--- a/src/pass/special_value_replace.cc
+++ b/src/pass/special_value_replace.cc
--- a/src/pass/specify_min_max_data_type.cc
+++ b/src/pass/specify_min_max_data_type.cc
--- a/src/pass/stmt_pattern_rewrite.cc
+++ b/src/pass/stmt_pattern_rewrite.cc
--- a/src/pass/storage_access_cce.cc
+++ b/src/pass/storage_access_cce.cc
--- a/src/pass/storage_rewrite_cce.cc
+++ b/src/pass/storage_rewrite_cce.cc
--- a/src/pass/storage_rewrite_cce.h
+++ b/src/pass/storage_rewrite_cce.h
--- a/src/pass/stride_kernel_op.cc
+++ b/src/pass/stride_kernel_op.cc
--- a/src/pass/substitute_div_var.cc
+++ b/src/pass/substitute_div_var.cc
--- a/src/pass/to_three_address.cc
+++ b/src/pass/to_three_address.cc
--- a/src/pass/unify_loop_vars.cc
+++ b/src/pass/unify_loop_vars.cc
--- a/src/pass/unroll_nonconstant_extent.cc
+++ b/src/pass/unroll_nonconstant_extent.cc
--- a/src/pass/utils.cc
+++ b/src/pass/utils.cc
--- a/src/pass/utils.h
+++ b/src/pass/utils.h
--- a/src/pass/value_numbering.cc
+++ b/src/pass/value_numbering.cc
--- a/src/pass/vectorize_for.cc
+++ b/src/pass/vectorize_for.cc
--- a/src/pass/zero_elimination.cc
+++ b/src/pass/zero_elimination.cc
--- a/src/pass/zero_elimination.h
+++ b/src/pass/zero_elimination.h
--- a/src/poly/cce_isl_emitter.cc
+++ b/src/poly/cce_isl_emitter.cc
--- a/src/poly/cce_isl_emitter.h
+++ b/src/poly/cce_isl_emitter.h
--- a/src/poly/cce_optimizer.cc
+++ b/src/poly/cce_optimizer.cc
--- a/src/poly/custom_tiling.h
+++ b/src/poly/custom_tiling.h
--- a/src/poly/dma_dataflow.cc
+++ b/src/poly/dma_dataflow.cc
--- a/src/poly/dma_dataflow.h
+++ b/src/poly/dma_dataflow.h
--- a/src/poly/dma_inject.cc
+++ b/src/poly/dma_inject.cc
--- a/src/poly/dma_inject.h
+++ b/src/poly/dma_inject.h
--- a/src/poly/dump_log.cc
+++ b/src/poly/dump_log.cc
--- a/src/poly/dump_log.h
+++ b/src/poly/dump_log.h
--- a/src/poly/dynamic_shape.h
+++ b/src/poly/dynamic_shape.h
--- a/src/poly/gen_tiling_space.cc
+++ b/src/poly/gen_tiling_space.cc
--- a/src/poly/isl.h
+++ b/src/poly/isl.h
--- a/src/poly/isl_emitter.cc
+++ b/src/poly/isl_emitter.cc
--- a/src/poly/isl_emitter.h
+++ b/src/poly/isl_emitter.h
--- a/src/poly/memory_manager.cc
+++ b/src/poly/memory_manager.cc
--- a/src/poly/poly.cc
+++ b/src/poly/poly.cc
--- a/src/poly/poly_util.cc
+++ b/src/poly/poly_util.cc
--- a/src/poly/poly_util.h
+++ b/src/poly/poly_util.h
--- a/src/poly/reschedule.cc
+++ b/src/poly/reschedule.cc
--- a/src/poly/reschedule.h
+++ b/src/poly/reschedule.h
--- a/src/poly/rm_self_dep.cc
+++ b/src/poly/rm_self_dep.cc
--- a/src/poly/rm_self_dep.h
+++ b/src/poly/rm_self_dep.h
--- a/src/poly/schtree_analyzer.cc
+++ b/src/poly/schtree_analyzer.cc
--- a/src/poly/schtree_analyzer.h
+++ b/src/poly/schtree_analyzer.h
--- a/src/poly/scop.cc
+++ b/src/poly/scop.cc
--- a/src/poly/scop.h
+++ b/src/poly/scop.h
--- a/src/poly/scop_builder.cc
+++ b/src/poly/scop_builder.cc
--- a/src/poly/scop_builder.h
+++ b/src/poly/scop_builder.h
--- a/src/poly/scop_dynamic.cc
+++ b/src/poly/scop_dynamic.cc
--- a/src/poly/scop_info.cc
+++ b/src/poly/scop_info.cc
--- a/src/poly/sink_axis.cc
+++ b/src/poly/sink_axis.cc
--- a/src/poly/sink_axis.h
+++ b/src/poly/sink_axis.h
--- a/src/poly/space_analyzer.cc
+++ b/src/poly/space_analyzer.cc
--- a/src/poly/space_analyzer.h
+++ b/src/poly/space_analyzer.h
--- a/src/poly/spec_gemm_builder.cc
+++ b/src/poly/spec_gemm_builder.cc
--- a/src/poly/stmt_parse.cc
+++ b/src/poly/stmt_parse.cc
--- a/src/poly/stmt_parse.h
+++ b/src/poly/stmt_parse.h
--- a/src/poly/tile_space.h
+++ b/src/poly/tile_space.h
--- a/src/poly/tiling.cc
+++ b/src/poly/tiling.cc
--- a/src/poly/tiling_algorithm.h
+++ b/src/poly/tiling_algorithm.h
--- a/src/poly/tiling_analyzer.cc
+++ b/src/poly/tiling_analyzer.cc
--- a/src/poly/tiling_analyzer.h
+++ b/src/poly/tiling_analyzer.h
--- a/src/poly/tiling_solver.cc
+++ b/src/poly/tiling_solver.cc
--- a/src/poly/tiling_solver.h
+++ b/src/poly/tiling_solver.h
--- a/src/poly/tiling_strategy_manager.cc
+++ b/src/poly/tiling_strategy_manager.cc
--- a/src/poly/tiling_strategy_manager.h
+++ b/src/poly/tiling_strategy_manager.h
--- a/src/poly/tiling_utils.cc
+++ b/src/poly/tiling_utils.cc
--- a/src/poly/tiling_utils.h
+++ b/src/poly/tiling_utils.h
--- a/src/poly/transform.cc
+++ b/src/poly/transform.cc
--- a/src/poly/transform.h
+++ b/src/poly/transform.h
--- a/src/runtime/csim/aicore_debug_funcs.h
+++ b/src/runtime/csim/aicore_debug_funcs.h
--- a/src/runtime/csim/aicore_fast_sim.cc
+++ b/src/runtime/csim/aicore_fast_sim.cc
--- a/src/runtime/csim/aicore_fast_sim.h
+++ b/src/runtime/csim/aicore_fast_sim.h
--- a/src/runtime/csim/compute_tracker.cc
+++ b/src/runtime/csim/compute_tracker.cc
--- a/src/runtime/csim/compute_tracker.h
+++ b/src/runtime/csim/compute_tracker.h
--- a/src/runtime/csim/half_float.h
+++ b/src/runtime/csim/half_float.h
--- a/src/runtime/csim/halide_intrinsics.h
+++ b/src/runtime/csim/halide_intrinsics.h
--- a/src/runtime/stub/runtime_stub.cc
+++ b/src/runtime/stub/runtime_stub.cc
--- a/src/schedule/auto_inline.cc
+++ b/src/schedule/auto_inline.cc
--- a/tests/__init__.py
+++ b/tests/__init__.py
--- a/tests/common/__init__.py
+++ b/tests/common/__init__.py
--- a/tests/common/base.py
+++ b/tests/common/base.py
--- a/tests/common/boot.py
+++ b/tests/common/boot.py
--- a/tests/common/ftp_handel.py
+++ b/tests/common/ftp_handel.py
--- a/tests/common/fuzz.py
+++ b/tests/common/fuzz.py
--- a/tests/common/gen_json_data.py
+++ b/tests/common/gen_json_data.py
--- a/tests/common/gen_random.py
+++ b/tests/common/gen_random.py
--- a/tests/common/log.py
+++ b/tests/common/log.py
--- a/tests/common/profiling_base.py
+++ b/tests/common/profiling_base.py
--- a/tests/common/tensorio.py
+++ b/tests/common/tensorio.py
--- a/tests/common/test_op/IOU_for_train.py
+++ b/tests/common/test_op/IOU_for_train.py
--- a/tests/common/test_op/SecondOrder_diag_combine_matrix.py
+++ b/tests/common/test_op/SecondOrder_diag_combine_matrix.py
--- a/tests/common/test_op/SecondOrder_diag_split_matrix.py
+++ b/tests/common/test_op/SecondOrder_diag_split_matrix.py
--- a/tests/common/test_op/SecondOrder_trace_extract.py
+++ b/tests/common/test_op/SecondOrder_trace_extract.py
--- a/tests/common/test_op/__init__.py
+++ b/tests/common/test_op/__init__.py
--- a/tests/common/test_op/abs_sum.py
+++ b/tests/common/test_op/abs_sum.py
--- a/tests/common/test_op/accumulate_nv2.py
+++ b/tests/common/test_op/accumulate_nv2.py
--- a/tests/common/test_op/acos.py
+++ b/tests/common/test_op/acos.py
--- a/tests/common/test_op/acos_grad.py
+++ b/tests/common/test_op/acos_grad.py
--- a/tests/common/test_op/acosh.py
+++ b/tests/common/test_op/acosh.py
--- a/tests/common/test_op/acosh_grad.py
+++ b/tests/common/test_op/acosh_grad.py
--- a/tests/common/test_op/add_a_conv.py
+++ b/tests/common/test_op/add_a_conv.py
--- a/tests/common/test_op/add_ad.py
+++ b/tests/common/test_op/add_ad.py
--- a/tests/common/test_op/add_b_conv.py
+++ b/tests/common/test_op/add_b_conv.py
--- a/tests/common/test_op/apply_ada_max.py
+++ b/tests/common/test_op/apply_ada_max.py
--- a/tests/common/test_op/apply_adadelta.py
+++ b/tests/common/test_op/apply_adadelta.py
--- a/tests/common/test_op/apply_adagrad.py
+++ b/tests/common/test_op/apply_adagrad.py
--- a/tests/common/test_op/apply_adagrad_da.py
+++ b/tests/common/test_op/apply_adagrad_da.py
--- a/tests/common/test_op/apply_adam.py
+++ b/tests/common/test_op/apply_adam.py
--- a/tests/common/test_op/apply_add_sign.py
+++ b/tests/common/test_op/apply_add_sign.py
--- a/tests/common/test_op/apply_centered_rms_prop.py
+++ b/tests/common/test_op/apply_centered_rms_prop.py
--- a/tests/common/test_op/apply_ftrl.py
+++ b/tests/common/test_op/apply_ftrl.py
--- a/tests/common/test_op/apply_ftrl_v2.py
+++ b/tests/common/test_op/apply_ftrl_v2.py
--- a/tests/common/test_op/apply_gradient_descent.py
+++ b/tests/common/test_op/apply_gradient_descent.py
--- a/tests/common/test_op/apply_power_sign.py
+++ b/tests/common/test_op/apply_power_sign.py
--- a/tests/common/test_op/apply_proximal_adagrad.py
+++ b/tests/common/test_op/apply_proximal_adagrad.py
--- a/tests/common/test_op/apply_proximal_gradient_descent.py
+++ b/tests/common/test_op/apply_proximal_gradient_descent.py
--- a/tests/common/test_op/apply_rms_prop.py
+++ b/tests/common/test_op/apply_rms_prop.py
--- a/tests/common/test_op/approximate_equal.py
+++ b/tests/common/test_op/approximate_equal.py
--- a/tests/common/test_op/argmin.py
+++ b/tests/common/test_op/argmin.py
--- a/tests/common/test_op/asin.py
+++ b/tests/common/test_op/asin.py
--- a/tests/common/test_op/asin_grad.py
+++ b/tests/common/test_op/asin_grad.py
--- a/tests/common/test_op/asinh.py
+++ b/tests/common/test_op/asinh.py
--- a/tests/common/test_op/asinh_grad.py
+++ b/tests/common/test_op/asinh_grad.py
--- a/tests/common/test_op/assign.py
+++ b/tests/common/test_op/assign.py
--- a/tests/common/test_op/assign_sub.py
+++ b/tests/common/test_op/assign_sub.py
--- a/tests/common/test_op/atan.py
+++ b/tests/common/test_op/atan.py
--- a/tests/common/test_op/atan2.py
+++ b/tests/common/test_op/atan2.py
--- a/tests/common/test_op/atan_grad.py
+++ b/tests/common/test_op/atan_grad.py
--- a/tests/common/test_op/atanh.py
+++ b/tests/common/test_op/atanh.py
--- a/tests/common/test_op/avgpool_ad.py
+++ b/tests/common/test_op/avgpool_ad.py
--- a/tests/common/test_op/avgpool_grad.py
+++ b/tests/common/test_op/avgpool_grad.py
--- a/tests/common/test_op/axpy.py
+++ b/tests/common/test_op/axpy.py
--- a/tests/common/test_op/batch_cholesky.py
+++ b/tests/common/test_op/batch_cholesky.py
--- a/tests/common/test_op/batch_cholesky_trsm.py
+++ b/tests/common/test_op/batch_cholesky_trsm.py
--- a/tests/common/test_op/batch_norm.py
+++ b/tests/common/test_op/batch_norm.py
--- a/tests/common/test_op/batch_reindex_layer.py
+++ b/tests/common/test_op/batch_reindex_layer.py
--- a/tests/common/test_op/batch_to_space_nd.py
+++ b/tests/common/test_op/batch_to_space_nd.py
--- a/tests/common/test_op/batch_trsm.py
+++ b/tests/common/test_op/batch_trsm.py
--- a/tests/common/test_op/bc_test.py
+++ b/tests/common/test_op/bc_test.py
--- a/tests/common/test_op/bessel_i0e.py
+++ b/tests/common/test_op/bessel_i0e.py
--- a/tests/common/test_op/bessel_i1e.py
+++ b/tests/common/test_op/bessel_i1e.py
--- a/tests/common/test_op/bias_add_ad_v2.py
+++ b/tests/common/test_op/bias_add_ad_v2.py
--- a/tests/common/test_op/bitwise_and.py
+++ b/tests/common/test_op/bitwise_and.py
--- a/tests/common/test_op/bitwise_not.py
+++ b/tests/common/test_op/bitwise_not.py
--- a/tests/common/test_op/bitwise_or.py
+++ b/tests/common/test_op/bitwise_or.py
--- a/tests/common/test_op/bitwise_xor.py
+++ b/tests/common/test_op/bitwise_xor.py
--- a/tests/common/test_op/blas_axby.py
+++ b/tests/common/test_op/blas_axby.py
--- a/tests/common/test_op/blas_axby_ad.py
+++ b/tests/common/test_op/blas_axby_ad.py
--- a/tests/common/test_op/bounding_box_encode.py
+++ b/tests/common/test_op/bounding_box_encode.py
--- a/tests/common/test_op/broadcast_to.py
+++ b/tests/common/test_op/broadcast_to.py
--- a/tests/common/test_op/ceil.py
+++ b/tests/common/test_op/ceil.py
--- a/tests/common/test_op/cholesky.py
+++ b/tests/common/test_op/cholesky.py
--- a/tests/common/test_op/clip.py
+++ b/tests/common/test_op/clip.py
--- a/tests/common/test_op/clip_by_value.py
+++ b/tests/common/test_op/clip_by_value.py
--- a/tests/common/test_op/col2im.py
+++ b/tests/common/test_op/col2im.py
--- a/tests/common/test_op/col2im_compute.py
+++ b/tests/common/test_op/col2im_compute.py
--- a/tests/common/test_op/concat.py
+++ b/tests/common/test_op/concat.py
--- a/tests/common/test_op/concat_ad.py
+++ b/tests/common/test_op/concat_ad.py
--- a/tests/common/test_op/confusion_matrix.py
+++ b/tests/common/test_op/confusion_matrix.py
--- a/tests/common/test_op/conv_ad_v2.py
+++ b/tests/common/test_op/conv_ad_v2.py
--- a/tests/common/test_op/conv_mansch.py
+++ b/tests/common/test_op/conv_mansch.py
--- a/tests/common/test_op/cos.py
+++ b/tests/common/test_op/cos.py
--- a/tests/common/test_op/cos_ad.py
+++ b/tests/common/test_op/cos_ad.py
--- a/tests/common/test_op/cosh.py
+++ b/tests/common/test_op/cosh.py
--- a/tests/common/test_op/cross.py
+++ b/tests/common/test_op/cross.py
--- a/tests/common/test_op/crossentropyloss.py
+++ b/tests/common/test_op/crossentropyloss.py
--- a/tests/common/test_op/crossentropyloss_ad.py
+++ b/tests/common/test_op/crossentropyloss_ad.py
--- a/tests/common/test_op/dense.py
+++ b/tests/common/test_op/dense.py
--- a/tests/common/test_op/depthwise.py
+++ b/tests/common/test_op/depthwise.py
--- a/tests/common/test_op/detection_five2four.py
+++ b/tests/common/test_op/detection_five2four.py
--- a/tests/common/test_op/detection_four2five.py
+++ b/tests/common/test_op/detection_four2five.py
--- a/tests/common/test_op/diagpart.py
+++ b/tests/common/test_op/diagpart.py
--- a/tests/common/test_op/discontinous_mov.py
+++ b/tests/common/test_op/discontinous_mov.py
--- a/tests/common/test_op/distinguish_between_pn_samples.py
+++ b/tests/common/test_op/distinguish_between_pn_samples.py
--- a/tests/common/test_op/div_mod_issue.py
+++ b/tests/common/test_op/div_mod_issue.py
--- a/tests/common/test_op/div_no_nan.py
+++ b/tests/common/test_op/div_no_nan.py
--- a/tests/common/test_op/dropout.py
+++ b/tests/common/test_op/dropout.py
--- a/tests/common/test_op/dynamic_stitch.py
+++ b/tests/common/test_op/dynamic_stitch.py
--- a/tests/common/test_op/elemwise_chain.py
+++ b/tests/common/test_op/elemwise_chain.py
--- a/tests/common/test_op/elemwise_mul_ad.py
+++ b/tests/common/test_op/elemwise_mul_ad.py
--- a/tests/common/test_op/elemwise_sum.py
+++ b/tests/common/test_op/elemwise_sum.py
--- a/tests/common/test_op/elemwise_sum_ad.py
+++ b/tests/common/test_op/elemwise_sum_ad.py
--- a/tests/common/test_op/eltwise.py
+++ b/tests/common/test_op/eltwise.py
--- a/tests/common/test_op/elu.py
+++ b/tests/common/test_op/elu.py
--- a/tests/common/test_op/elu_ad.py
+++ b/tests/common/test_op/elu_ad.py
--- a/tests/common/test_op/encode_onehot_classes.py
+++ b/tests/common/test_op/encode_onehot_classes.py
--- a/tests/common/test_op/erf.py
+++ b/tests/common/test_op/erf.py
--- a/tests/common/test_op/erf_ad.py
+++ b/tests/common/test_op/erf_ad.py
--- a/tests/common/test_op/erfc.py
+++ b/tests/common/test_op/erfc.py
--- a/tests/common/test_op/expand_dims.py
+++ b/tests/common/test_op/expand_dims.py
--- a/tests/common/test_op/expand_dims_ad.py
+++ b/tests/common/test_op/expand_dims_ad.py
--- a/tests/common/test_op/expm1.py
+++ b/tests/common/test_op/expm1.py
--- a/tests/common/test_op/fake_quant_with_min_max_args.py
+++ b/tests/common/test_op/fake_quant_with_min_max_args.py
--- a/tests/common/test_op/fake_quant_with_min_max_args_gradient.py
+++ b/tests/common/test_op/fake_quant_with_min_max_args_gradient.py
--- a/tests/common/test_op/fake_quant_with_min_max_vars_per_channel.py
+++ b/tests/common/test_op/fake_quant_with_min_max_vars_per_channel.py
--- a/tests/common/test_op/fake_quant_with_min_max_vars_per_channel_gradient.py
+++ b/tests/common/test_op/fake_quant_with_min_max_vars_per_channel_gradient.py
--- a/tests/common/test_op/fc.py
+++ b/tests/common/test_op/fc.py
--- a/tests/common/test_op/fill.py
+++ b/tests/common/test_op/fill.py
--- a/tests/common/test_op/flatten.py
+++ b/tests/common/test_op/flatten.py
--- a/tests/common/test_op/floormod.py
+++ b/tests/common/test_op/floormod.py
--- a/tests/common/test_op/focal_loss.py
+++ b/tests/common/test_op/focal_loss.py
--- a/tests/common/test_op/focalloss_ad.py
+++ b/tests/common/test_op/focalloss_ad.py
--- a/tests/common/test_op/focalloss_grad.py
+++ b/tests/common/test_op/focalloss_grad.py
--- a/tests/common/test_op/fractal2two.py
+++ b/tests/common/test_op/fractal2two.py
--- a/tests/common/test_op/fused_layer_norm_grad.py
+++ b/tests/common/test_op/fused_layer_norm_grad.py
--- a/tests/common/test_op/fused_layernorm.py
+++ b/tests/common/test_op/fused_layernorm.py
--- a/tests/common/test_op/fused_minimum_or_maximum_grad.py
+++ b/tests/common/test_op/fused_minimum_or_maximum_grad.py
--- a/tests/common/test_op/gather.py
+++ b/tests/common/test_op/gather.py
--- a/tests/common/test_op/gelu.py
+++ b/tests/common/test_op/gelu.py
--- a/tests/common/test_op/gelu_ad.py
+++ b/tests/common/test_op/gelu_ad.py
--- a/tests/common/test_op/gelu_grad.py
+++ b/tests/common/test_op/gelu_grad.py
--- a/tests/common/test_op/globalavgpool.py
+++ b/tests/common/test_op/globalavgpool.py
--- a/tests/common/test_op/greater.py
+++ b/tests/common/test_op/greater.py
--- a/tests/common/test_op/greater_equal.py
+++ b/tests/common/test_op/greater_equal.py
--- a/tests/common/test_op/group_conv.py
+++ b/tests/common/test_op/group_conv.py
--- a/tests/common/test_op/group_conv_ad.py
+++ b/tests/common/test_op/group_conv_ad.py
--- a/tests/common/test_op/im2col.py
+++ b/tests/common/test_op/im2col.py
--- a/tests/common/test_op/im2col_compute.py
+++ b/tests/common/test_op/im2col_compute.py
--- a/tests/common/test_op/insn_vec_binary_elemwise.py
+++ b/tests/common/test_op/insn_vec_binary_elemwise.py
--- a/tests/common/test_op/inv_grad.py
+++ b/tests/common/test_op/inv_grad.py
--- a/tests/common/test_op/invert.py
+++ b/tests/common/test_op/invert.py
--- a/tests/common/test_op/invert_permutation.py
+++ b/tests/common/test_op/invert_permutation.py
--- a/tests/common/test_op/kldiv_loss.py
+++ b/tests/common/test_op/kldiv_loss.py
--- a/tests/common/test_op/kldiv_loss_grad.py
+++ b/tests/common/test_op/kldiv_loss_grad.py
--- a/tests/common/test_op/l1_loss.py
+++ b/tests/common/test_op/l1_loss.py
--- a/tests/common/test_op/l1_loss_grad.py
+++ b/tests/common/test_op/l1_loss_grad.py
--- a/tests/common/test_op/l2loss.py
+++ b/tests/common/test_op/l2loss.py
--- a/tests/common/test_op/l2normalize.py
+++ b/tests/common/test_op/l2normalize.py
--- a/tests/common/test_op/l2normalize_ad.py
+++ b/tests/common/test_op/l2normalize_ad.py
--- a/tests/common/test_op/laplacian_of_gaussian.py
+++ b/tests/common/test_op/laplacian_of_gaussian.py
--- a/tests/common/test_op/leaky_relu.py
+++ b/tests/common/test_op/leaky_relu.py
--- a/tests/common/test_op/less_equal.py
+++ b/tests/common/test_op/less_equal.py
--- a/tests/common/test_op/lin_space.py
+++ b/tests/common/test_op/lin_space.py
--- a/tests/common/test_op/load3d.py
+++ b/tests/common/test_op/load3d.py
--- a/tests/common/test_op/log1p.py
+++ b/tests/common/test_op/log1p.py
--- a/tests/common/test_op/logical_and.py
+++ b/tests/common/test_op/logical_and.py
--- a/tests/common/test_op/logical_not.py
+++ b/tests/common/test_op/logical_not.py
--- a/tests/common/test_op/logical_or.py
+++ b/tests/common/test_op/logical_or.py
--- a/tests/common/test_op/logsigmoid.py
+++ b/tests/common/test_op/logsigmoid.py
--- a/tests/common/test_op/logsigmoid_ad.py
+++ b/tests/common/test_op/logsigmoid_ad.py
--- a/tests/common/test_op/logsoftmax.py
+++ b/tests/common/test_op/logsoftmax.py
--- a/tests/common/test_op/logsoftmax_ad.py
+++ b/tests/common/test_op/logsoftmax_ad.py
--- a/tests/common/test_op/logsoftmax_grad.py
+++ b/tests/common/test_op/logsoftmax_grad.py
--- a/tests/common/test_op/lstm_rnn.py
+++ b/tests/common/test_op/lstm_rnn.py
--- a/tests/common/test_op/lstm_rnn_ad.py
+++ b/tests/common/test_op/lstm_rnn_ad.py
--- a/tests/common/test_op/lstm_rnn_grad.py
+++ b/tests/common/test_op/lstm_rnn_grad.py
--- a/tests/common/test_op/matmul.py
+++ b/tests/common/test_op/matmul.py
--- a/tests/common/test_op/matmul4d_ad.py
+++ b/tests/common/test_op/matmul4d_ad.py
--- a/tests/common/test_op/matmul_ad.py
+++ b/tests/common/test_op/matmul_ad.py
--- a/tests/common/test_op/matmul_mansch.py
+++ b/tests/common/test_op/matmul_mansch.py
--- a/tests/common/test_op/matrix_diag.py
+++ b/tests/common/test_op/matrix_diag.py
--- a/tests/common/test_op/matrix_diag_part.py
+++ b/tests/common/test_op/matrix_diag_part.py
--- a/tests/common/test_op/matrix_set_diag.py
+++ b/tests/common/test_op/matrix_set_diag.py
--- a/tests/common/test_op/maximum.py
+++ b/tests/common/test_op/maximum.py
--- a/tests/common/test_op/mean_square.py
+++ b/tests/common/test_op/mean_square.py
--- a/tests/common/test_op/minimum.py
+++ b/tests/common/test_op/minimum.py
--- a/tests/common/test_op/minimum_ad.py
+++ b/tests/common/test_op/minimum_ad.py
--- a/tests/common/test_op/mul_ad.py
+++ b/tests/common/test_op/mul_ad.py
--- a/tests/common/test_op/nms.py
+++ b/tests/common/test_op/nms.py
--- a/tests/common/test_op/not_equal.py
+++ b/tests/common/test_op/not_equal.py
--- a/tests/common/test_op/ones_like.py
+++ b/tests/common/test_op/ones_like.py
--- a/tests/common/test_op/pack.py
+++ b/tests/common/test_op/pack.py
--- a/tests/common/test_op/pad.py
+++ b/tests/common/test_op/pad.py
--- a/tests/common/test_op/pooling.py
+++ b/tests/common/test_op/pooling.py
--- a/tests/common/test_op/pow.py
+++ b/tests/common/test_op/pow.py
--- a/tests/common/test_op/prelu.py
+++ b/tests/common/test_op/prelu.py
--- a/tests/common/test_op/prelu_grad.py
+++ b/tests/common/test_op/prelu_grad.py
--- a/tests/common/test_op/prob_program/distr_bernoulli_logprob_ad.py
+++ b/tests/common/test_op/prob_program/distr_bernoulli_logprob_ad.py
--- a/tests/common/test_op/prob_program/distr_normal_diag_KLdiv_ad.py
+++ b/tests/common/test_op/prob_program/distr_normal_diag_KLdiv_ad.py
--- a/tests/common/test_op/prob_program/distr_normal_diag_logprob_ad.py
+++ b/tests/common/test_op/prob_program/distr_normal_diag_logprob_ad.py
--- a/tests/common/test_op/prob_program/distr_normal_diag_sample_ad.py
+++ b/tests/common/test_op/prob_program/distr_normal_diag_sample_ad.py
--- a/tests/common/test_op/prob_program/distr_normal_prob_regr_train.py
+++ b/tests/common/test_op/prob_program/distr_normal_prob_regr_train.py
--- a/tests/common/test_op/prob_program/distribution/bernoulli.py
+++ b/tests/common/test_op/prob_program/distribution/bernoulli.py
--- a/tests/common/test_op/prob_program/distribution/normal_diag.py
+++ b/tests/common/test_op/prob_program/distribution/normal_diag.py
--- a/tests/common/test_op/prob_program/distribution/normal_unit_var.py
+++ b/tests/common/test_op/prob_program/distribution/normal_unit_var.py
--- a/tests/common/test_op/proposal_sort.py
+++ b/tests/common/test_op/proposal_sort.py
--- a/tests/common/test_op/quantized_avg_pool.py
+++ b/tests/common/test_op/quantized_avg_pool.py
--- a/tests/common/test_op/quantized_max_pool.py
+++ b/tests/common/test_op/quantized_max_pool.py
--- a/tests/common/test_op/range.py
+++ b/tests/common/test_op/range.py
--- a/tests/common/test_op/realdiv_ad.py
+++ b/tests/common/test_op/realdiv_ad.py
--- a/tests/common/test_op/reduce_all.py
+++ b/tests/common/test_op/reduce_all.py
--- a/tests/common/test_op/reduce_any_d.py
+++ b/tests/common/test_op/reduce_any_d.py
--- a/tests/common/test_op/reduce_logsumexp.py
+++ b/tests/common/test_op/reduce_logsumexp.py
--- a/tests/common/test_op/reduce_logsumexp_ad.py
+++ b/tests/common/test_op/reduce_logsumexp_ad.py
--- a/tests/common/test_op/reduce_max_ad.py
+++ b/tests/common/test_op/reduce_max_ad.py
--- a/tests/common/test_op/reduce_min.py
+++ b/tests/common/test_op/reduce_min.py
--- a/tests/common/test_op/reduce_min_ad.py
+++ b/tests/common/test_op/reduce_min_ad.py
--- a/tests/common/test_op/reduce_prod.py
+++ b/tests/common/test_op/reduce_prod.py
--- a/tests/common/test_op/reduction_layer.py
+++ b/tests/common/test_op/reduction_layer.py
--- a/tests/common/test_op/relu6.py
+++ b/tests/common/test_op/relu6.py
--- a/tests/common/test_op/relu6_grad.py
+++ b/tests/common/test_op/relu6_grad.py
--- a/tests/common/test_op/relu_grad.py
+++ b/tests/common/test_op/relu_grad.py
--- a/tests/common/test_op/resize_bilinear.py
+++ b/tests/common/test_op/resize_bilinear.py
--- a/tests/common/test_op/resize_bilinear_grad.py
+++ b/tests/common/test_op/resize_bilinear_grad.py
--- a/tests/common/test_op/resize_nearest.py
+++ b/tests/common/test_op/resize_nearest.py
--- a/tests/common/test_op/reverse.py
+++ b/tests/common/test_op/reverse.py
--- a/tests/common/test_op/rint.py
+++ b/tests/common/test_op/rint.py
--- a/tests/common/test_op/roi_align_ad.py
+++ b/tests/common/test_op/roi_align_ad.py
--- a/tests/common/test_op/roipool.py
+++ b/tests/common/test_op/roipool.py
--- a/tests/common/test_op/round.py
+++ b/tests/common/test_op/round.py
--- a/tests/common/test_op/rsqrt_ad.py
+++ b/tests/common/test_op/rsqrt_ad.py
--- a/tests/common/test_op/rsqrt_grad.py
+++ b/tests/common/test_op/rsqrt_grad.py
--- a/tests/common/test_op/scale.py
+++ b/tests/common/test_op/scale.py
--- a/tests/common/test_op/scatter_.py
+++ b/tests/common/test_op/scatter_.py
--- a/tests/common/test_op/scatter_add.py
+++ b/tests/common/test_op/scatter_add.py
--- a/tests/common/test_op/scatter_nd.py
+++ b/tests/common/test_op/scatter_nd.py
--- a/tests/common/test_op/scatter_nd_ad.py
+++ b/tests/common/test_op/scatter_nd_ad.py
--- a/tests/common/test_op/segment_max.py
+++ b/tests/common/test_op/segment_max.py
--- a/tests/common/test_op/select.py
+++ b/tests/common/test_op/select.py
--- a/tests/common/test_op/selu.py
+++ b/tests/common/test_op/selu.py
--- a/tests/common/test_op/sgd.py
+++ b/tests/common/test_op/sgd.py
--- a/tests/common/test_op/sigmoid.py
+++ b/tests/common/test_op/sigmoid.py
--- a/tests/common/test_op/sigmoid_ad.py
+++ b/tests/common/test_op/sigmoid_ad.py
--- a/tests/common/test_op/sigmoid_cross_entropy_with_logits.py
+++ b/tests/common/test_op/sigmoid_cross_entropy_with_logits.py
--- a/tests/common/test_op/sigmoid_cross_entropy_with_logits_grad.py
+++ b/tests/common/test_op/sigmoid_cross_entropy_with_logits_grad.py
--- a/tests/common/test_op/sign.py
+++ b/tests/common/test_op/sign.py
--- a/tests/common/test_op/sin.py
+++ b/tests/common/test_op/sin.py
--- a/tests/common/test_op/sinh.py
+++ b/tests/common/test_op/sinh.py
--- a/tests/common/test_op/slice.py
+++ b/tests/common/test_op/slice.py
--- a/tests/common/test_op/sliceeven.py
+++ b/tests/common/test_op/sliceeven.py
--- a/tests/common/test_op/smooth_l1_loss.py
+++ b/tests/common/test_op/smooth_l1_loss.py
--- a/tests/common/test_op/smooth_l1_loss_ad.py
+++ b/tests/common/test_op/smooth_l1_loss_ad.py
--- a/tests/common/test_op/smooth_l1_loss_grad.py
+++ b/tests/common/test_op/smooth_l1_loss_grad.py
--- a/tests/common/test_op/softmax_ad.py
+++ b/tests/common/test_op/softmax_ad.py
--- a/tests/common/test_op/softmax_grad.py
+++ b/tests/common/test_op/softmax_grad.py
--- a/tests/common/test_op/softmaxcrossentropywithlogits.py
+++ b/tests/common/test_op/softmaxcrossentropywithlogits.py
--- a/tests/common/test_op/softplus.py
+++ b/tests/common/test_op/softplus.py
--- a/tests/common/test_op/softplus_ad.py
+++ b/tests/common/test_op/softplus_ad.py
--- a/tests/common/test_op/softplus_grad.py
+++ b/tests/common/test_op/softplus_grad.py
--- a/tests/common/test_op/softsign.py
+++ b/tests/common/test_op/softsign.py
--- a/tests/common/test_op/space_to_batch_nd.py
+++ b/tests/common/test_op/space_to_batch_nd.py
--- a/tests/common/test_op/split.py
+++ b/tests/common/test_op/split.py
--- a/tests/common/test_op/sqrt.py
+++ b/tests/common/test_op/sqrt.py
--- a/tests/common/test_op/sqrt_ad.py
+++ b/tests/common/test_op/sqrt_ad.py
--- a/tests/common/test_op/square.py
+++ b/tests/common/test_op/square.py
--- a/tests/common/test_op/square_difference.py
+++ b/tests/common/test_op/square_difference.py
--- a/tests/common/test_op/square_difference_ad.py
+++ b/tests/common/test_op/square_difference_ad.py
--- a/tests/common/test_op/squeeze.py
+++ b/tests/common/test_op/squeeze.py
--- a/tests/common/test_op/squeeze_ad.py
+++ b/tests/common/test_op/squeeze_ad.py
--- a/tests/common/test_op/strided_slice_ad.py
+++ b/tests/common/test_op/strided_slice_ad.py
--- a/tests/common/test_op/strided_slice_grad.py
+++ b/tests/common/test_op/strided_slice_grad.py
--- a/tests/common/test_op/sub_ad.py
+++ b/tests/common/test_op/sub_ad.py
--- a/tests/common/test_op/sum_square.py
+++ b/tests/common/test_op/sum_square.py
--- a/tests/common/test_op/tan.py
+++ b/tests/common/test_op/tan.py
--- a/tests/common/test_op/tanh_grad.py
+++ b/tests/common/test_op/tanh_grad.py
--- a/tests/common/test_op/topk.py
+++ b/tests/common/test_op/topk.py
--- a/tests/common/test_op/transpose.py
+++ b/tests/common/test_op/transpose.py
--- a/tests/common/test_op/triangle.py
+++ b/tests/common/test_op/triangle.py
--- a/tests/common/test_op/triplet_loss.py
+++ b/tests/common/test_op/triplet_loss.py
--- a/tests/common/test_op/triplet_loss_ad.py
+++ b/tests/common/test_op/triplet_loss_ad.py
--- a/tests/common/test_op/triplet_loss_grad.py
+++ b/tests/common/test_op/triplet_loss_grad.py
--- a/tests/common/test_op/truncate_div.py
+++ b/tests/common/test_op/truncate_div.py
--- a/tests/common/test_op/truncatemod.py
+++ b/tests/common/test_op/truncatemod.py
--- a/tests/common/test_op/two2fractal.py
+++ b/tests/common/test_op/two2fractal.py
--- a/tests/common/test_op/unpack.py
+++ b/tests/common/test_op/unpack.py
--- a/tests/common/test_op/unsorted_segment_max.py
+++ b/tests/common/test_op/unsorted_segment_max.py
--- a/tests/common/test_op/unsortedsegmentsum.py
+++ b/tests/common/test_op/unsortedsegmentsum.py
--- a/tests/common/test_op/upsampling.py
+++ b/tests/common/test_op/upsampling.py
--- a/tests/common/test_op/vector_matmul.py
+++ b/tests/common/test_op/vector_matmul.py
--- a/tests/common/test_op/where.py
+++ b/tests/common/test_op/where.py
--- a/tests/common/test_op/winograd_ad.py
+++ b/tests/common/test_op/winograd_ad.py
--- a/tests/common/test_op/xdivy.py
+++ b/tests/common/test_op/xdivy.py
--- a/tests/common/test_op/xdivy_grad.py
+++ b/tests/common/test_op/xdivy_grad.py
--- a/tests/common/test_op/xlogy_grad.py
+++ b/tests/common/test_op/xlogy_grad.py
--- a/tests/common/test_run/IOU_for_train_run.py
+++ b/tests/common/test_run/IOU_for_train_run.py
--- a/tests/common/test_run/SecondOrder_diag_combine_matrix_run.py
+++ b/tests/common/test_run/SecondOrder_diag_combine_matrix_run.py
--- a/tests/common/test_run/SecondOrder_diag_split_matrix_run.py
+++ b/tests/common/test_run/SecondOrder_diag_split_matrix_run.py
--- a/tests/common/test_run/SecondOrder_trace_extract_run.py
+++ b/tests/common/test_run/SecondOrder_trace_extract_run.py
--- a/tests/common/test_run/__init__.py
+++ b/tests/common/test_run/__init__.py
--- a/tests/common/test_run/abs_ad_run.py
+++ b/tests/common/test_run/abs_ad_run.py
--- a/tests/common/test_run/abs_run.py
+++ b/tests/common/test_run/abs_run.py
--- a/tests/common/test_run/abs_sum_run.py
+++ b/tests/common/test_run/abs_sum_run.py
--- a/tests/common/test_run/accumulate_nv2_run.py
+++ b/tests/common/test_run/accumulate_nv2_run.py
--- a/tests/common/test_run/acos_grad_run.py
+++ b/tests/common/test_run/acos_grad_run.py
--- a/tests/common/test_run/acos_run.py
+++ b/tests/common/test_run/acos_run.py
--- a/tests/common/test_run/acosh_grad_run.py
+++ b/tests/common/test_run/acosh_grad_run.py
--- a/tests/common/test_run/acosh_run.py
+++ b/tests/common/test_run/acosh_run.py
--- a/tests/common/test_run/add_a_conv_run.py
+++ b/tests/common/test_run/add_a_conv_run.py
--- a/tests/common/test_run/add_ad_run.py
+++ b/tests/common/test_run/add_ad_run.py
--- a/tests/common/test_run/add_b_conv_run.py
+++ b/tests/common/test_run/add_b_conv_run.py
--- a/tests/common/test_run/add_run.py
+++ b/tests/common/test_run/add_run.py
--- a/tests/common/test_run/addn_run.py
+++ b/tests/common/test_run/addn_run.py
--- a/tests/common/test_run/apply_ada_max_run.py
+++ b/tests/common/test_run/apply_ada_max_run.py
--- a/tests/common/test_run/apply_adadelta_run.py
+++ b/tests/common/test_run/apply_adadelta_run.py
--- a/tests/common/test_run/apply_adagrad_da_run.py
+++ b/tests/common/test_run/apply_adagrad_da_run.py
--- a/tests/common/test_run/apply_adagrad_run.py
+++ b/tests/common/test_run/apply_adagrad_run.py
--- a/tests/common/test_run/apply_adam_run.py
+++ b/tests/common/test_run/apply_adam_run.py
--- a/tests/common/test_run/apply_add_sign_run.py
+++ b/tests/common/test_run/apply_add_sign_run.py
--- a/tests/common/test_run/apply_centered_rms_prop_run.py
+++ b/tests/common/test_run/apply_centered_rms_prop_run.py
--- a/tests/common/test_run/apply_ftrl_run.py
+++ b/tests/common/test_run/apply_ftrl_run.py
--- a/tests/common/test_run/apply_ftrl_v2_run.py
+++ b/tests/common/test_run/apply_ftrl_v2_run.py
--- a/tests/common/test_run/apply_gradient_descent_run.py
+++ b/tests/common/test_run/apply_gradient_descent_run.py
--- a/tests/common/test_run/apply_momentum_run.py
+++ b/tests/common/test_run/apply_momentum_run.py
--- a/tests/common/test_run/apply_power_sign_run.py
+++ b/tests/common/test_run/apply_power_sign_run.py
--- a/tests/common/test_run/apply_proximal_adagrad_run.py
+++ b/tests/common/test_run/apply_proximal_adagrad_run.py
--- a/tests/common/test_run/apply_proximal_gradient_descent_run.py
+++ b/tests/common/test_run/apply_proximal_gradient_descent_run.py
--- a/tests/common/test_run/apply_rms_prop_mixed_precision_run.py
+++ b/tests/common/test_run/apply_rms_prop_mixed_precision_run.py
--- a/tests/common/test_run/apply_rms_prop_run.py
+++ b/tests/common/test_run/apply_rms_prop_run.py
--- a/tests/common/test_run/approximate_equal_run.py
+++ b/tests/common/test_run/approximate_equal_run.py
--- a/tests/common/test_run/argmax_min_common_run.py
+++ b/tests/common/test_run/argmax_min_common_run.py
--- a/tests/common/test_run/argmax_run.py
+++ b/tests/common/test_run/argmax_run.py
--- a/tests/common/test_run/argmin_run.py
+++ b/tests/common/test_run/argmin_run.py
--- a/tests/common/test_run/asin_grad_run.py
+++ b/tests/common/test_run/asin_grad_run.py
--- a/tests/common/test_run/asin_run.py
+++ b/tests/common/test_run/asin_run.py
--- a/tests/common/test_run/asinh_grad_run.py
+++ b/tests/common/test_run/asinh_grad_run.py
--- a/tests/common/test_run/asinh_run.py
+++ b/tests/common/test_run/asinh_run.py
--- a/tests/common/test_run/assign_add_run.py
+++ b/tests/common/test_run/assign_add_run.py
--- a/tests/common/test_run/assign_run.py
+++ b/tests/common/test_run/assign_run.py
--- a/tests/common/test_run/assign_sub_run.py
+++ b/tests/common/test_run/assign_sub_run.py
--- a/tests/common/test_run/atan2_run.py
+++ b/tests/common/test_run/atan2_run.py
--- a/tests/common/test_run/atan_grad_run.py
+++ b/tests/common/test_run/atan_grad_run.py
--- a/tests/common/test_run/atan_run.py
+++ b/tests/common/test_run/atan_run.py
--- a/tests/common/test_run/atanh_run.py
+++ b/tests/common/test_run/atanh_run.py
--- a/tests/common/test_run/avgpool_ad_run.py
+++ b/tests/common/test_run/avgpool_ad_run.py
--- a/tests/common/test_run/avgpool_grad_run.py
+++ b/tests/common/test_run/avgpool_grad_run.py
--- a/tests/common/test_run/avgpool_run.py
+++ b/tests/common/test_run/avgpool_run.py
--- a/tests/common/test_run/axpy_run.py
+++ b/tests/common/test_run/axpy_run.py
--- a/tests/common/test_run/batch_cholesky_run.py
+++ b/tests/common/test_run/batch_cholesky_run.py
--- a/tests/common/test_run/batch_cholesky_trsm_run.py
+++ b/tests/common/test_run/batch_cholesky_trsm_run.py
--- a/tests/common/test_run/batch_norm_ad_run.py
+++ b/tests/common/test_run/batch_norm_ad_run.py
--- a/tests/common/test_run/batch_norm_run.py
+++ b/tests/common/test_run/batch_norm_run.py
--- a/tests/common/test_run/batch_reindex_layer_run.py
+++ b/tests/common/test_run/batch_reindex_layer_run.py
--- a/tests/common/test_run/batch_to_space_nd_run.py
+++ b/tests/common/test_run/batch_to_space_nd_run.py
--- a/tests/common/test_run/batch_trsm_run.py
+++ b/tests/common/test_run/batch_trsm_run.py
--- a/tests/common/test_run/batchmatmul_run.py
+++ b/tests/common/test_run/batchmatmul_run.py
--- a/tests/common/test_run/bc_run.py
+++ b/tests/common/test_run/bc_run.py
--- a/tests/common/test_run/bessel_i0e_run.py
+++ b/tests/common/test_run/bessel_i0e_run.py
--- a/tests/common/test_run/bessel_i1e_run.py
+++ b/tests/common/test_run/bessel_i1e_run.py
--- a/tests/common/test_run/bias_add_ad_run.py
+++ b/tests/common/test_run/bias_add_ad_run.py
--- a/tests/common/test_run/bias_add_run.py
+++ b/tests/common/test_run/bias_add_run.py
--- a/tests/common/test_run/bitwise_and_run.py
+++ b/tests/common/test_run/bitwise_and_run.py
--- a/tests/common/test_run/bitwise_not_run.py
+++ b/tests/common/test_run/bitwise_not_run.py
--- a/tests/common/test_run/bitwise_or_run.py
+++ b/tests/common/test_run/bitwise_or_run.py
--- a/tests/common/test_run/bitwise_xor_run.py
+++ b/tests/common/test_run/bitwise_xor_run.py
--- a/tests/common/test_run/blas_axby_ad_run.py
+++ b/tests/common/test_run/blas_axby_ad_run.py
--- a/tests/common/test_run/blas_axby_run.py
+++ b/tests/common/test_run/blas_axby_run.py
--- a/tests/common/test_run/bn_split_run.py
+++ b/tests/common/test_run/bn_split_run.py
--- a/tests/common/test_run/bounding_box_encode_run.py
+++ b/tests/common/test_run/bounding_box_encode_run.py
--- a/tests/common/test_run/broadcast_to_run.py
+++ b/tests/common/test_run/broadcast_to_run.py
--- a/tests/common/test_run/cast_run.py
+++ b/tests/common/test_run/cast_run.py
--- a/tests/common/test_run/ceil_run.py
+++ b/tests/common/test_run/ceil_run.py
--- a/tests/common/test_run/cholesky_run.py
+++ b/tests/common/test_run/cholesky_run.py
--- a/tests/common/test_run/clear_zero_run.py
+++ b/tests/common/test_run/clear_zero_run.py
--- a/tests/common/test_run/clip_by_value_run.py
+++ b/tests/common/test_run/clip_by_value_run.py
--- a/tests/common/test_run/clip_run.py
+++ b/tests/common/test_run/clip_run.py
--- a/tests/common/test_run/col2im_run.py
+++ b/tests/common/test_run/col2im_run.py
--- a/tests/common/test_run/concat_ad_run.py
+++ b/tests/common/test_run/concat_ad_run.py
--- a/tests/common/test_run/concat_run.py
+++ b/tests/common/test_run/concat_run.py
--- a/tests/common/test_run/confusion_matrix_run.py
+++ b/tests/common/test_run/confusion_matrix_run.py
--- a/tests/common/test_run/conv_ad_v2_run.py
+++ b/tests/common/test_run/conv_ad_v2_run.py
--- a/tests/common/test_run/conv_backprop_filter_run.py
+++ b/tests/common/test_run/conv_backprop_filter_run.py
--- a/tests/common/test_run/conv_backprop_input_run.py
+++ b/tests/common/test_run/conv_backprop_input_run.py
--- a/tests/common/test_run/conv_bn1_run.py
+++ b/tests/common/test_run/conv_bn1_run.py
--- a/tests/common/test_run/conv_bn_fusion_run.py
+++ b/tests/common/test_run/conv_bn_fusion_run.py
--- a/tests/common/test_run/conv_filter_ad_run.py
+++ b/tests/common/test_run/conv_filter_ad_run.py
--- a/tests/common/test_run/conv_input_ad_run.py
+++ b/tests/common/test_run/conv_input_ad_run.py
--- a/tests/common/test_run/conv_relu_run.py
+++ b/tests/common/test_run/conv_relu_run.py
--- a/tests/common/test_run/conv_run.py
+++ b/tests/common/test_run/conv_run.py
--- a/tests/common/test_run/conv_run_mansch.py
+++ b/tests/common/test_run/conv_run_mansch.py
--- a/tests/common/test_run/conv_utils.py
+++ b/tests/common/test_run/conv_utils.py
--- a/tests/common/test_run/cos_ad_run.py
+++ b/tests/common/test_run/cos_ad_run.py
--- a/tests/common/test_run/cos_run.py
+++ b/tests/common/test_run/cos_run.py
--- a/tests/common/test_run/cosh_run.py
+++ b/tests/common/test_run/cosh_run.py
--- a/tests/common/test_run/cross_run.py
+++ b/tests/common/test_run/cross_run.py
--- a/tests/common/test_run/crossentropyloss_ad_run.py
+++ b/tests/common/test_run/crossentropyloss_ad_run.py
--- a/tests/common/test_run/crossentropyloss_run.py
+++ b/tests/common/test_run/crossentropyloss_run.py
--- a/tests/common/test_run/dense_run.py
+++ b/tests/common/test_run/dense_run.py
--- a/tests/common/test_run/depthwise_ad_run.py
+++ b/tests/common/test_run/depthwise_ad_run.py
--- a/tests/common/test_run/depthwise_run.py
+++ b/tests/common/test_run/depthwise_run.py
--- a/tests/common/test_run/detection_five2four_run.py
+++ b/tests/common/test_run/detection_five2four_run.py
--- a/tests/common/test_run/detection_four2five_run.py
+++ b/tests/common/test_run/detection_four2five_run.py
--- a/tests/common/test_run/diagpart_run.py
+++ b/tests/common/test_run/diagpart_run.py
--- a/tests/common/test_run/discontinous_mov_run.py
+++ b/tests/common/test_run/discontinous_mov_run.py
--- a/tests/common/test_run/distinguish_between_pn_samples_run.py
+++ b/tests/common/test_run/distinguish_between_pn_samples_run.py
--- a/tests/common/test_run/distr_bernoulli_logprob_ad_run.py
+++ b/tests/common/test_run/distr_bernoulli_logprob_ad_run.py
--- a/tests/common/test_run/distr_bernoulli_logprob_run.py
+++ b/tests/common/test_run/distr_bernoulli_logprob_run.py
--- a/tests/common/test_run/distr_normal_diag_KLdiv_ad_run.py
+++ b/tests/common/test_run/distr_normal_diag_KLdiv_ad_run.py
--- a/tests/common/test_run/distr_normal_diag_KLdiv_run.py
+++ b/tests/common/test_run/distr_normal_diag_KLdiv_run.py
--- a/tests/common/test_run/distr_normal_diag_logprob_ad_run.py
+++ b/tests/common/test_run/distr_normal_diag_logprob_ad_run.py
--- a/tests/common/test_run/distr_normal_diag_logprob_run.py
+++ b/tests/common/test_run/distr_normal_diag_logprob_run.py
--- a/tests/common/test_run/distr_normal_diag_sample_ad_run.py
+++ b/tests/common/test_run/distr_normal_diag_sample_ad_run.py
--- a/tests/common/test_run/distr_normal_diag_sample_run.py
+++ b/tests/common/test_run/distr_normal_diag_sample_run.py
--- a/tests/common/test_run/distr_normal_prob_regr_train_run.py
+++ b/tests/common/test_run/distr_normal_prob_regr_train_run.py
--- a/tests/common/test_run/div_mod_issue_run.py
+++ b/tests/common/test_run/div_mod_issue_run.py
--- a/tests/common/test_run/div_no_nan_run.py
+++ b/tests/common/test_run/div_no_nan_run.py
--- a/tests/common/test_run/div_run.py
+++ b/tests/common/test_run/div_run.py
--- a/tests/common/test_run/dropout_run.py
+++ b/tests/common/test_run/dropout_run.py
--- a/tests/common/test_run/dynamic_stitch_run.py
+++ b/tests/common/test_run/dynamic_stitch_run.py
--- a/tests/common/test_run/elemwise_mul_ad_run.py
+++ b/tests/common/test_run/elemwise_mul_ad_run.py
--- a/tests/common/test_run/elemwise_sum_ad_run.py
+++ b/tests/common/test_run/elemwise_sum_ad_run.py
--- a/tests/common/test_run/elemwise_sum_run.py
+++ b/tests/common/test_run/elemwise_sum_run.py
--- a/tests/common/test_run/eltwise_run.py
+++ b/tests/common/test_run/eltwise_run.py
--- a/tests/common/test_run/elu_ad_run.py
+++ b/tests/common/test_run/elu_ad_run.py
--- a/tests/common/test_run/elu_run.py
+++ b/tests/common/test_run/elu_run.py
--- a/tests/common/test_run/encode_onehot_classes_run.py
+++ b/tests/common/test_run/encode_onehot_classes_run.py
--- a/tests/common/test_run/equal_count_run.py
+++ b/tests/common/test_run/equal_count_run.py
--- a/tests/common/test_run/equal_run.py
+++ b/tests/common/test_run/equal_run.py
--- a/tests/common/test_run/erf_ad_run.py
+++ b/tests/common/test_run/erf_ad_run.py
--- a/tests/common/test_run/erf_run.py
+++ b/tests/common/test_run/erf_run.py
--- a/tests/common/test_run/erfc_run.py
+++ b/tests/common/test_run/erfc_run.py
--- a/tests/common/test_run/exp_ad_run.py
+++ b/tests/common/test_run/exp_ad_run.py
--- a/tests/common/test_run/exp_run.py
+++ b/tests/common/test_run/exp_run.py
--- a/tests/common/test_run/expand_dims_ad_run.py
+++ b/tests/common/test_run/expand_dims_ad_run.py
--- a/tests/common/test_run/expand_dims_run.py
+++ b/tests/common/test_run/expand_dims_run.py
--- a/tests/common/test_run/expm1_run.py
+++ b/tests/common/test_run/expm1_run.py
--- a/tests/common/test_run/fake_quant_with_min_max_args_gradient_run.py
+++ b/tests/common/test_run/fake_quant_with_min_max_args_gradient_run.py
--- a/tests/common/test_run/fake_quant_with_min_max_args_run.py
+++ b/tests/common/test_run/fake_quant_with_min_max_args_run.py
--- a/tests/common/test_run/fake_quant_with_min_max_vars_per_channel_gradient_run.py
+++ b/tests/common/test_run/fake_quant_with_min_max_vars_per_channel_gradient_run.py
--- a/tests/common/test_run/fake_quant_with_min_max_vars_per_channel_run.py
+++ b/tests/common/test_run/fake_quant_with_min_max_vars_per_channel_run.py
--- a/tests/common/test_run/fc_run.py
+++ b/tests/common/test_run/fc_run.py
--- a/tests/common/test_run/fill_run.py
+++ b/tests/common/test_run/fill_run.py
--- a/tests/common/test_run/five2four_run.py
+++ b/tests/common/test_run/five2four_run.py
--- a/tests/common/test_run/flatten_run.py
+++ b/tests/common/test_run/flatten_run.py
--- a/tests/common/test_run/floor_run.py
+++ b/tests/common/test_run/floor_run.py
--- a/tests/common/test_run/floordiv_run.py
+++ b/tests/common/test_run/floordiv_run.py
--- a/tests/common/test_run/focal_loss_run.py
+++ b/tests/common/test_run/focal_loss_run.py
--- a/tests/common/test_run/focalloss_ad_run.py
+++ b/tests/common/test_run/focalloss_ad_run.py
--- a/tests/common/test_run/four2five_run.py
+++ b/tests/common/test_run/four2five_run.py
--- a/tests/common/test_run/fractal2two_run.py
+++ b/tests/common/test_run/fractal2two_run.py
--- a/tests/common/test_run/fused_batch_norm_grad_run.py
+++ b/tests/common/test_run/fused_batch_norm_grad_run.py
--- a/tests/common/test_run/fused_batch_norm_run.py
+++ b/tests/common/test_run/fused_batch_norm_run.py
--- a/tests/common/test_run/fused_cast_conv_run.py
+++ b/tests/common/test_run/fused_cast_conv_run.py
--- a/tests/common/test_run/fused_layer_norm_grad_run.py
+++ b/tests/common/test_run/fused_layer_norm_grad_run.py
--- a/tests/common/test_run/fused_layernorm_run.py
+++ b/tests/common/test_run/fused_layernorm_run.py
--- a/tests/common/test_run/fused_mean_mul_run.py
+++ b/tests/common/test_run/fused_mean_mul_run.py
--- a/tests/common/test_run/fused_minimum_or_maximum_grad_run.py
+++ b/tests/common/test_run/fused_minimum_or_maximum_grad_run.py
--- a/tests/common/test_run/fused_mul_conv_run.py
+++ b/tests/common/test_run/fused_mul_conv_run.py
--- a/tests/common/test_run/fused_mul_mean_run.py
+++ b/tests/common/test_run/fused_mul_mean_run.py
--- a/tests/common/test_run/fused_mul_sub_mutioutput_run.py
+++ b/tests/common/test_run/fused_mul_sub_mutioutput_run.py
--- a/tests/common/test_run/fused_mul_sub_run.py
+++ b/tests/common/test_run/fused_mul_sub_run.py
--- a/tests/common/test_run/fused_mul_unsortedsegmentsum_run.py
+++ b/tests/common/test_run/fused_mul_unsortedsegmentsum_run.py
--- a/tests/common/test_run/gather_run.py
+++ b/tests/common/test_run/gather_run.py
--- a/tests/common/test_run/gather_v2_run.py
+++ b/tests/common/test_run/gather_v2_run.py
--- a/tests/common/test_run/gelu_ad_run.py
+++ b/tests/common/test_run/gelu_ad_run.py
--- a/tests/common/test_run/gelu_grad_run.py
+++ b/tests/common/test_run/gelu_grad_run.py
--- a/tests/common/test_run/gelu_run.py
+++ b/tests/common/test_run/gelu_run.py
--- a/tests/common/test_run/globalavgpool_run.py
+++ b/tests/common/test_run/globalavgpool_run.py
--- a/tests/common/test_run/greater_equal_run.py
+++ b/tests/common/test_run/greater_equal_run.py
--- a/tests/common/test_run/greater_run.py
+++ b/tests/common/test_run/greater_run.py
--- a/tests/common/test_run/group_conv_ad_run.py
+++ b/tests/common/test_run/group_conv_ad_run.py
--- a/tests/common/test_run/group_conv_run.py
+++ b/tests/common/test_run/group_conv_run.py
--- a/tests/common/test_run/im2col_run.py
+++ b/tests/common/test_run/im2col_run.py
--- a/tests/common/test_run/insn_vec_binary_elemwise_run.py
+++ b/tests/common/test_run/insn_vec_binary_elemwise_run.py
--- a/tests/common/test_run/inv_grad_run.py
+++ b/tests/common/test_run/inv_grad_run.py
--- a/tests/common/test_run/invert_permutation_run.py
+++ b/tests/common/test_run/invert_permutation_run.py
--- a/tests/common/test_run/invert_run.py
+++ b/tests/common/test_run/invert_run.py
--- a/tests/common/test_run/kldiv_loss_grad_run.py
+++ b/tests/common/test_run/kldiv_loss_grad_run.py
--- a/tests/common/test_run/kldiv_loss_run.py
+++ b/tests/common/test_run/kldiv_loss_run.py
--- a/tests/common/test_run/l1_loss_grad_run.py
+++ b/tests/common/test_run/l1_loss_grad_run.py
--- a/tests/common/test_run/l1_loss_run.py
+++ b/tests/common/test_run/l1_loss_run.py
--- a/tests/common/test_run/l2loss_run.py
+++ b/tests/common/test_run/l2loss_run.py
--- a/tests/common/test_run/l2normalize_ad_run.py
+++ b/tests/common/test_run/l2normalize_ad_run.py
--- a/tests/common/test_run/l2normalize_run.py
+++ b/tests/common/test_run/l2normalize_run.py
--- a/tests/common/test_run/laplacian_of_gaussian_run.py
+++ b/tests/common/test_run/laplacian_of_gaussian_run.py
--- a/tests/common/test_run/leaky_relu_run.py
+++ b/tests/common/test_run/leaky_relu_run.py
--- a/tests/common/test_run/less_equal_run.py
+++ b/tests/common/test_run/less_equal_run.py
--- a/tests/common/test_run/less_run.py
+++ b/tests/common/test_run/less_run.py
--- a/tests/common/test_run/lin_space_run.py
+++ b/tests/common/test_run/lin_space_run.py
--- a/tests/common/test_run/load3d_run.py
+++ b/tests/common/test_run/load3d_run.py
--- a/tests/common/test_run/log1p_run.py
+++ b/tests/common/test_run/log1p_run.py
--- a/tests/common/test_run/log_ad_run.py
+++ b/tests/common/test_run/log_ad_run.py
--- a/tests/common/test_run/log_run.py
+++ b/tests/common/test_run/log_run.py
--- a/tests/common/test_run/logical_and_run.py
+++ b/tests/common/test_run/logical_and_run.py
--- a/tests/common/test_run/logical_not_run.py
+++ b/tests/common/test_run/logical_not_run.py
--- a/tests/common/test_run/logical_or_run.py
+++ b/tests/common/test_run/logical_or_run.py
--- a/tests/common/test_run/logsigmoid_ad_run.py
+++ b/tests/common/test_run/logsigmoid_ad_run.py
--- a/tests/common/test_run/logsigmoid_run.py
+++ b/tests/common/test_run/logsigmoid_run.py
--- a/tests/common/test_run/logsoftmax_ad_run.py
+++ b/tests/common/test_run/logsoftmax_ad_run.py
--- a/tests/common/test_run/logsoftmax_grad_run.py
+++ b/tests/common/test_run/logsoftmax_grad_run.py
--- a/tests/common/test_run/logsoftmax_run.py
+++ b/tests/common/test_run/logsoftmax_run.py
--- a/tests/common/test_run/lstm_rnn_ad_run.py
+++ b/tests/common/test_run/lstm_rnn_ad_run.py
--- a/tests/common/test_run/lstm_rnn_run.py
+++ b/tests/common/test_run/lstm_rnn_run.py
--- a/tests/common/test_run/matmul4d_ad_run.py
+++ b/tests/common/test_run/matmul4d_ad_run.py
--- a/tests/common/test_run/matmul_ad_run.py
+++ b/tests/common/test_run/matmul_ad_run.py
--- a/tests/common/test_run/matmul_run.py
+++ b/tests/common/test_run/matmul_run.py
--- a/tests/common/test_run/matmul_run_mansch.py
+++ b/tests/common/test_run/matmul_run_mansch.py
--- a/tests/common/test_run/matrix_diag_part_run.py
+++ b/tests/common/test_run/matrix_diag_part_run.py
--- a/tests/common/test_run/matrix_diag_run.py
+++ b/tests/common/test_run/matrix_diag_run.py
--- a/tests/common/test_run/matrix_set_diag_run.py
+++ b/tests/common/test_run/matrix_set_diag_run.py
--- a/tests/common/test_run/maximum_run.py
+++ b/tests/common/test_run/maximum_run.py
--- a/tests/common/test_run/maxpool_ad_run.py
+++ b/tests/common/test_run/maxpool_ad_run.py
--- a/tests/common/test_run/maxpool_grad_run.py
+++ b/tests/common/test_run/maxpool_grad_run.py
--- a/tests/common/test_run/maxpool_grad_with_argmax_run.py
+++ b/tests/common/test_run/maxpool_grad_with_argmax_run.py
--- a/tests/common/test_run/maxpool_run.py
+++ b/tests/common/test_run/maxpool_run.py
--- a/tests/common/test_run/maxpool_with_argmax_run.py
+++ b/tests/common/test_run/maxpool_with_argmax_run.py
--- a/tests/common/test_run/mean_ad_run.py
+++ b/tests/common/test_run/mean_ad_run.py
--- a/tests/common/test_run/mean_run.py
+++ b/tests/common/test_run/mean_run.py
--- a/tests/common/test_run/mean_square_run.py
+++ b/tests/common/test_run/mean_square_run.py
--- a/tests/common/test_run/minimum_ad_run.py
+++ b/tests/common/test_run/minimum_ad_run.py
--- a/tests/common/test_run/minimum_run.py
+++ b/tests/common/test_run/minimum_run.py
--- a/tests/common/test_run/mod_run.py
+++ b/tests/common/test_run/mod_run.py
--- a/tests/common/test_run/mul_ad_run.py
+++ b/tests/common/test_run/mul_ad_run.py
--- a/tests/common/test_run/mul_run.py
+++ b/tests/common/test_run/mul_run.py
--- a/tests/common/test_run/neg_run.py
+++ b/tests/common/test_run/neg_run.py
--- a/tests/common/test_run/nms_run.py
+++ b/tests/common/test_run/nms_run.py
--- a/tests/common/test_run/not_equal_run.py
+++ b/tests/common/test_run/not_equal_run.py
--- a/tests/common/test_run/one_hot_run.py
+++ b/tests/common/test_run/one_hot_run.py
--- a/tests/common/test_run/ones_like_run.py
+++ b/tests/common/test_run/ones_like_run.py
--- a/tests/common/test_run/pack_run.py
+++ b/tests/common/test_run/pack_run.py
--- a/tests/common/test_run/pad_run.py
+++ b/tests/common/test_run/pad_run.py
--- a/tests/common/test_run/pooling_run.py
+++ b/tests/common/test_run/pooling_run.py
--- a/tests/common/test_run/pow_run.py
+++ b/tests/common/test_run/pow_run.py
--- a/tests/common/test_run/prelu_grad_run.py
+++ b/tests/common/test_run/prelu_grad_run.py
--- a/tests/common/test_run/prelu_run.py
+++ b/tests/common/test_run/prelu_run.py
--- a/tests/common/test_run/proposal_sort_run.py
+++ b/tests/common/test_run/proposal_sort_run.py
--- a/tests/common/test_run/quantized_avg_pool_run.py
+++ b/tests/common/test_run/quantized_avg_pool_run.py
--- a/tests/common/test_run/quantized_max_pool_run.py
+++ b/tests/common/test_run/quantized_max_pool_run.py
--- a/tests/common/test_run/range_run.py
+++ b/tests/common/test_run/range_run.py
--- a/tests/common/test_run/realdiv_ad_run.py
+++ b/tests/common/test_run/realdiv_ad_run.py
--- a/tests/common/test_run/realdiv_run.py
+++ b/tests/common/test_run/realdiv_run.py
--- a/tests/common/test_run/reciprocal_run.py
+++ b/tests/common/test_run/reciprocal_run.py
--- a/tests/common/test_run/reduce_all_run.py
+++ b/tests/common/test_run/reduce_all_run.py
--- a/tests/common/test_run/reduce_any_d_run.py
+++ b/tests/common/test_run/reduce_any_d_run.py
--- a/tests/common/test_run/reduce_logsumexp_ad_run.py
+++ b/tests/common/test_run/reduce_logsumexp_ad_run.py
--- a/tests/common/test_run/reduce_logsumexp_run.py
+++ b/tests/common/test_run/reduce_logsumexp_run.py
--- a/tests/common/test_run/reduce_max_ad_run.py
+++ b/tests/common/test_run/reduce_max_ad_run.py
--- a/tests/common/test_run/reduce_max_run.py
+++ b/tests/common/test_run/reduce_max_run.py
--- a/tests/common/test_run/reduce_min_ad_run.py
+++ b/tests/common/test_run/reduce_min_ad_run.py
--- a/tests/common/test_run/reduce_min_run.py
+++ b/tests/common/test_run/reduce_min_run.py
--- a/tests/common/test_run/reduce_prod_run.py
+++ b/tests/common/test_run/reduce_prod_run.py
--- a/tests/common/test_run/reduction_layer_run.py
+++ b/tests/common/test_run/reduction_layer_run.py
--- a/tests/common/test_run/relu6_grad_run.py
+++ b/tests/common/test_run/relu6_grad_run.py
--- a/tests/common/test_run/relu6_run.py
+++ b/tests/common/test_run/relu6_run.py
--- a/tests/common/test_run/relu_ad_run.py
+++ b/tests/common/test_run/relu_ad_run.py
--- a/tests/common/test_run/relu_grad_run.py
+++ b/tests/common/test_run/relu_grad_run.py
--- a/tests/common/test_run/relu_run.py
+++ b/tests/common/test_run/relu_run.py
--- a/tests/common/test_run/reshape_run.py
+++ b/tests/common/test_run/reshape_run.py
--- a/tests/common/test_run/resize_bilinear_grad_run.py
+++ b/tests/common/test_run/resize_bilinear_grad_run.py
--- a/tests/common/test_run/resize_bilinear_run.py
+++ b/tests/common/test_run/resize_bilinear_run.py