From 4f3c466b92151253ff707c6c97661aae6baf2001 Mon Sep 17 00:00:00 2001
From: Curio Yang <curioyhq@gmail.com>
Date: Fri, 28 Jul 2023 09:26:40 +0800
Subject: [PATCH] GNNE-1886 210_docs and script (#1026)

* 1. add k210 pipeline docs in USAGE.md
   2. add simulate.ipynb for compile and inference
* Indicate available nncase versions
* format
* fix ci
---------

Co-authored-by: yanghaoqi <yanghaoqi_intern@canaan-creative.com>
---
 docs/USAGE_EN.md                         | 209 +++++-------------
 docs/USAGE_ZH.md                         | 256 +++++++----------------
 examples/user_guide/README.md            |  15 ++
 examples/user_guide/nncase_base_func.py  |  95 +++++++++
 examples/user_guide/simulate.ipynb       | 198 ++++++++++++++++++
 tests/importer/onnx_/basic/test_slice.py |   1 -
 6 files changed, 436 insertions(+), 338 deletions(-)
 create mode 100644 examples/user_guide/README.md
 create mode 100644 examples/user_guide/nncase_base_func.py
 create mode 100644 examples/user_guide/simulate.ipynb

diff --git a/docs/USAGE_EN.md b/docs/USAGE_EN.md
index 3e634b7bbc..b8af24687a 100644
--- a/docs/USAGE_EN.md
+++ b/docs/USAGE_EN.md
@@ -2,14 +2,17 @@
 
 # Overview
 
-nncase provides both python wheel package and ncc client to compile your neural models.
+nncase provides python wheel package to compile your neural models. The current documentation only works for nncase-v1. The available version are shown below.
 
-- nncase wheel package can be downloaded at [nncase release](https://github.com/kendryte/nncase/releases)
-- For ncc client, you should git clone nncase repository and then build it by yourself.
+```
+1.0.0.20211029, 1.1.0.20211203, 1.3.0.20220127, 1.4.0.20220303, 1.5.0.20220331, 1.6.0.20220505, 1.7.0.20220530, 1.7.1.20220701, 1.8.0.20220929, 1.9.0.20230322
+```
+
+- nncase wheel package can be downloaded at [nncase release](https://github.com/kendryte/nncase/releases).
 
 # nncase python APIs
 
-nncase provides Python APIs to compile neural network model and inference on your PC.
+nncase provides Python APIs to compile neural network model and inference on x86_64 and amd64 platforms.
 
 ## Installation
 
@@ -27,8 +30,6 @@ $ docker pull registry.cn-hangzhou.aliyuncs.com/kendryte/nncase:latest
 $ docker run -it --rm -v `pwd`:/mnt -w /mnt registry.cn-hangzhou.aliyuncs.com/kendryte/nncase:latest /bin/bash -c "/bin/bash"
 ```
 
-
-
 ### cpu/K210
 
 - Download nncase wheel package and then install it.
@@ -39,8 +40,6 @@ root@2b11cc15c7f8:/mnt# wget -P x86_64 https://github.com/kendryte/nncase/releas
 root@2b11cc15c7f8:/mnt# pip3 install x86_64/*.whl
 ```
 
-
-
 ### K510
 
 - Download both nncase and nncase_k510 wheel packages and then install them.
@@ -53,8 +52,6 @@ root@2b11cc15c7f8:/mnt# wget -P x86_64 https://github.com/kendryte/nncase/releas
 root@2b11cc15c7f8:/mnt# pip3 install x86_64/*.whl
 ```
 
-
-
 ### Check nncase version
 
 ```python
@@ -67,8 +64,6 @@ Type "help", "copyright", "credits" or "license" for more information.
 1.8.0-55be52f
 ```
 
-
-
 ## nncase compile model APIs
 
 ### CompileOptions
@@ -116,13 +111,13 @@ The details of all attributes are following.
 | quant_type       | string    | N            | Specify the quantization type for input data , such as 'uint8', 'int8', 'int16'                                                                                                                         |
 | w_quant_type     | string    | N            | Specify the quantization type for weight , such as 'uint8'(by default), 'int8', 'int16'                                                                                                                 |
 | use_mse_quant_w  | bool      | N            | Specify whether use  mean-square error when quantizing weight                                                                                                                                           |
-| split_w_to_act   | bool      | N            | Specify whether split weight into activation                                                                                                                                            |
+| split_w_to_act   | bool      | N            | Specify whether split weight into activation                                                                                                                                                            |
 | preprocess       | bool      | N            | Whether enable preprocess, False by default                                                                                                                                                             |
 | swapRB           | bool      | N            | Whether swap red and blue channel for RGB data(from RGB to BGR or from BGR to RGB), False by default                                                                                                    |
 | mean             | list      | N            | Normalize mean value for preprocess, [0, 0, 0] by default                                                                                                                                               |
 | std              | list      | N            | Normalize std value for preprocess, [1, 1, 1] by default                                                                                                                                                |
 | input_range      | list      | N            | The float range for dequantized input data, [0，1] by default                                                                                                                                           |
-| output_range | list | N | The float range for quantized output data,  [ ] by default |
+| output_range     | list      | N            | The float range for quantized output data,  [ ] by default                                                                                                                                              |
 | input_shape      | list      | N            | Specify the shape of input data.  input_shape should be consistent with input _layout.  There will be letterbox  operations(Such as resize/pad) if input_shape is not the same as input shape of model. |
 | letterbox_value  | float     | N            | Specify the pad value of letterbox during preprocess.                                                                                                                                                   |
 | input_type       | string    | N            | Specify the data type of input data, 'float32' by default.                                                                                                                                              |
@@ -767,10 +762,50 @@ if __name__ == '__main__':
 
 ## Deploy nncase runtime
 
-### K210
+### Inference on K210 development board
+
+1. Download [SDK](https://github.com/kendryte/kendryte-standalone-sdk)
+
+   ```shell
+   $ git clone https://github.com/kendryte/kendryte-standalone-sdk.git
+   $ cd kendryte-standalone-sdk
+   $ export KENDRYTE_WORKSPACE=`pwd`
+   ```
+2. Download the cross-compile toolchain and extract it
 
-1. Download `k210-runtime.zip` from [Release](https://github.com/kendryte/nncase/releases) page.
-2. Unzip to your [kendryte-standalone-sdk](https://github.com/kendryte/kendryte-standalone-sdk) 's `lib/nncase/v1` directory.
+   ```shell
+   $ wget https://github.com/kendryte/kendryte-gnu-toolchain/releases/download/v8.2.0-20190409/kendryte-toolchain-ubuntu-amd64-8.2.0-20190409.tar.xz -O $KENDRYTE_WORKSPACE/kendryte-toolchain.tar.xz
+   $ cd $KENDRYTE_WORKSPACE
+   $ mkdir toolchain
+   $ tar -xf kendryte-toolchain.tar.xz -C ./toolchain
+   ```
+3. Update nncase runtime
+
+   Download `k210-runtime.zip` from [Release](https://github.com/kendryte/nncase/releases) and extract it into [kendryte-standalone-sdk](https://github.com/kendryte/kendryte-standalone-sdk) 's `lib/nncase/v1`.
+4. Compile App
+
+   ```shell
+   # 1.copy your programe into `$KENDRYTE_WORKSPACE/src`
+   # e.g. copy ($NNCASE_WORK_DIR/examples/facedetect_landmark/k210/facedetect_landmark_example) into PATH_TO_SDK/src.
+   $ cp -r $NNCASE_WORK_DIR/examples/facedetect_landmark/k210/facedetect_landmark_example $KENDRYTE_WORKSPACE/src/
+
+   # 2. compile
+   $ cd $KENDRYTE_WORKSPACE
+   $ mkdir build
+   $ cmake .. -DPROJ=facedetect_landmark_example -DTOOLCHAIN=$KENDRYTE_WORKSPACE/toolchain/kendryte-toolchain/bin && make
+   ```
+
+   `facedetect_landmark_example` and `FaceDETECt_landmark_example.bin` will be generated.
+5. Write the program to the K210 development board
+
+   ```shell
+   # 1. Check available USB ports
+   $ ls /dev/ttyUSB*
+   # /dev/ttyUSB0 /dev/ttyUSB1
+
+   # 2. Write your App by kflash
+   $ kflash -p /dev/ttyUSB0 -t facedetect_landmark_example.bin
+   ```
 
 ## nncase inference APIs
 
@@ -1201,143 +1236,3 @@ N/A
 ```python
 sim.run()
 ```
-
-# ncc
-
-## Comannd line
-
-```shell
-DESCRIPTION
-NNCASE model compiler and inference tool.
-
-SYNOPSIS
-    ncc compile -i <input format> -t <target>
-        <input file> [--input-prototxt <input prototxt>] <output file> [--output-arrays <output arrays>]
-        [--quant-type <quant type>] [--w-quant-type <w quant type>] [--use-mse-quant-w]
-        [--dataset <dataset path>] [--dataset-format <dataset format>] [--calibrate-method <calibrate method>]
-        [--preprocess] [--swapRB] [--mean <normalize mean>] [--std <normalize std>]
-        [--input-range <input range>] [--input-shape <input shape>] [--letterbox-value <letter box value>]
-        [--input-type <input type>] [--output-type <output type>]
-        [--input-layout <input layout>] [--output-layout <output layout>] [--tcu-num <tcu number>]
-        [--is-fpga] [--dump-ir] [--dump-asm] [--dump-quant-error] [--dump-import-op-range] [--dump-dir <dump directory>]
-        [--dump-range-dataset <dataset path>] [--dump-range-dataset-format <dataset format>] [--benchmark-only]
-
-    ncc infer <input file> <output path>
-        --dataset <dataset path> [--dataset-format <dataset format>]
-        [--input-layout <input layout>]
-
-    ncc [-v]
-
-OPTIONS
-  compile
-
-  -i, --input-format <input format>
-                          input format, e.g. tflite|onnx|caffe
-  -t, --target <target>   target architecture, e.g. cpu|k210|k510
-  <input file>            input file
-  --input-prototxt <input prototxt>
-                          input prototxt
-  <output file>           output file
-  --output-arrays <output arrays>
-                          output arrays
-  --quant-type <quant type>
-                          post trainning quantize type, e.g uint8|int8|int16, default is uint8
-  --w-quant-type <w quant type>
-                          post trainning weights quantize type, e.g uint8|int8|int16, default is uint8
-  --use-mse-quant-w       use min mse algorithm to refine weights quantilization or not, default is 0
-  --dataset <dataset path>
-                          calibration dataset, used in post quantization
-  --dataset-format <dataset format>
-                          datset format: e.g. image|raw, default is image
-  --dump-range-dataset <dataset path>
-                          dump import op range dataset
-  --dump-range-dataset-format <dataset format>
-                          datset format: e.g. image|raw, default is image
-  --calibrate-method <calibrate method>
-                          calibrate method: e.g. no_clip|l2|kld_m0|kld_m1|kld_m2|cdf, default is no_clip
-  --preprocess            enable preprocess, default is 0
-  --swapRB                swap red and blue channel, default is 0
-  --mean <normalize mean> normalize mean, default is 0. 0. 0.
-  --std <normalize std>   normalize std, default is 1. 1. 1.
-  --input-range <input range>
-                          float range after preprocess
-  --input-shape <input shape>
-                          shape for input data
-  --letterbox-value <letter box value>
-                          letter box pad value, default is 0.000000
-  --input-type <input type>
-                          input type, e.g float32|uint8|default, default is default
-  --output-type <output type>
-                          output type, e.g float32|uint8, default is float32
-  --input-layout <input layout>
-                          input layout, e.g NCHW|NHWC, default is NCHW
-  --output-layout <output layout>
-                          output layout, e.g NCHW|NHWC, default is NCHW
-  --tcu-num <tcu number>  tcu number, e.g 1|2|3|4, default is 0
-  --is-fpga               use fpga parameters, default is 0
-  --dump-ir               dump ir to .dot, default is 0
-  --dump-asm              dump assembly, default is 0
-  --dump-quant-error      dump quant error, default is 0
-  --dump-import-op-range  dump import op range, default is 0
-  --dump-dir <dump directory>
-                          dump to directory
-  --benchmark-only        compile kmodel only for benchmark use, default is 0
-
-  infer
-
-  <model filename>        kmodel filename
-  <output path>           output path
-  --dataset <dataset path>
-                          dataset path
-  --dataset-format <dataset format>
-                          dataset format, e.g. image|raw, default is image
-  --input-layout <input layout>
-                          input layout, e.g NCHW|NHWC, default is NCHW
-```
-
-## Description
-
-`ncc` is the nncase command line tool. It has two commands: `compile` and `infer`.
-
-`compile` command compile your trained models (`.tflite`, `.caffemodel`, `.onnx`) to `.kmodel`.
-
-- `-i, --input-format` option is used to specify the input model format. nncase supports `tflite`, `caffe` and `onnx` input model currently.
-- `-t, --target` option is used to set your desired target device to run the model. `cpu` is the most general target that almost every platform should support. `k210` is the Kendryte K210 SoC platform. If you set this option to `k210`, this model can only run on K210 or be emulated on your PC.
-- `<input file>` is your input model path.
-- `--input-prototxt` is the prototxt file for caffe model.
-- `<output file>` is the output model path.
-- `--output-arrays` is the names of nodes to output.
-- `--quant-type` is used to specify quantize type, such as `uint8` by default and `int8` and `int16`.
-- `--w-quant-type` is used to specify quantize type for weight, such as `uint8` by default and `int8 `and `int16`.
-- `--use-mse-quant-w ` is used to specify whether use minimize mse(mean-square error, mse) algorithm to quantize weight or not.
-- `--dataset` is to provide your quantization calibration dataset to quantize your models. You should put hundreds or thousands of data in training set to this directory.
-- `--dataset-format` is to set the format of the calibration dataset. Default is `image`, nncase will use `opencv` to read your images and autoscale to the desired input size of your model. If the input has 3 channels, ncc will convert images to RGB float tensors [0,1] in `NCHW` layout. If the input has only 1 channel, ncc will grayscale your images. Set to `raw` if your dataset is not image dataset for example, audio or matrices. In this scenario you should convert your dataset to raw binaries which contains float tensors.
-- `--dump-range-dataset` is to provide your dump range dataset to dump each op data range of your models. You should put hundreds or thousands of data in training set to this directory.
-- `--dump-range-dataset-format` is to set the format of the dump range dataset. Default is `image`, nncase will use `opencv` to read your images and autoscale to the desired input size of your model. If the input has 3 channels, ncc will convert images to RGB float tensors [0,1] in `NCHW` layout. If the input has only 1 channel, ncc will grayscale your images. Set to `raw` if your dataset is not image dataset for example, audio or matrices. In this scenario you should convert your dataset to raw binaries which contains float tensors.
-- `--calibrate-method` is to set your desired calibration method, which is used to select the optimal activation ranges. The default is `no_clip` in that ncc will use the full range of activations. If you want a better quantization result, you can use `l2` but it will take a longer time to find the optimal ranges.
-- `--preprocess ` is used specify whether enable preprocessing or not.
-- `--swapRB ` is used specify whether swap red and blue channel or not. You can use this flag to implement RGB2BGR or BGR2RGB feature.
-- `--mean` is the mean values to be subtracted during preprocessing.
-- `--std` is the std values to be divided during preprocessing.
-- `--input-range` is the input range in float after dequantization.
-- `--input-shape` is used to specify the shape of input data. If the input shape is different from the input shape of your model, the preprocess will add resize/pad ops automatically for the transformation.
-- `--letterbox-value` is used to specify the pad values when pad is added during preprocessing.
-- `--input-type` is to set your desired input data type when do inference. If `--input-type` is `uint8`, for example you should provide RGB888 uint8 tensors when you do inference. If `--input-type` is `float`, you should provide RGB float tensors instead.
-- `--output-type` is the type of output data.
-- `--input-layout` is the layout of input data.
-- `--output-layout` is the layout of output data.
-- `--tcu-num` is used to configure the number of TCU. 0 means do not configure the number of TCU.
-- `--is-fpga` is a debug option. It is used to specify whether the kmodel run on fpga or not.
-- `--dump-ir` is a debug option. It is used to specify whether dump IR or not.
-- `--dump-asm` is a debug option. It is used to specify whether dump asm file or not.
-- `--dump-quant-error` is a debug option. It is used to specify whether dump quantization error information or not.
-- `--dump-import-op-range` is a debug option. It is used to specify whether dump imported op data range or not, need to also specify dump-range-dataset if enabled.
-- `--dump-dir` is used to specify dump directory.
-- `--benchmark-only` is used to specify whether the kmodel is used for benchmark or not.
-
-`infer` command can run your kmodel, and it's often used as debug purpose. ncc will save the model's output tensors to `.bin` files in `NCHW` layout.
-
-- `<input file>` is your kmodel path.
-- `<output path>` is the output directory ncc will produce to.
-- `--dataset` is the test set directory.
-- `--dataset-format` and `--input-layout` have the same meaning as in `compile` command.
diff --git a/docs/USAGE_ZH.md b/docs/USAGE_ZH.md
index 40cefe67b2..0293f3eb5c 100644
--- a/docs/USAGE_ZH.md
+++ b/docs/USAGE_ZH.md
@@ -1,13 +1,16 @@
 # 概述
 
-nncase目前提供了python wheel包和ncc客户端两种方法编译模型.
+nncase目前提供了python wheel包编译模型。当前文档仅适用于nncase-v1，适用于以下版本号：
+
+```
+1.0.0.20211029, 1.1.0.20211203, 1.3.0.20220127, 1.4.0.20220303, 1.5.0.20220331, 1.6.0.20220505, 1.7.0.20220530, 1.7.1.20220701, 1.8.0.20220929, 1.9.0.20230322
+```
 
 - nncase wheel包需要去[nncase release](https://github.com/kendryte/nncase/releases)获取
-- ncc客户端需要用户下载并编译nncase
 
 # nncase python APIs
 
-nncase提供了Python APIs, 用于在PC上编译/推理深度学习模型.
+nncase提供了Python APIs, 用于在x86_64和amd64平台上编译/推理深度学习模型.
 
 ## 安装
 
@@ -25,8 +28,6 @@ $ docker pull registry.cn-hangzhou.aliyuncs.com/kendryte/nncase:latest
 $ docker run -it --rm -v `pwd`:/mnt -w /mnt registry.cn-hangzhou.aliyuncs.com/kendryte/nncase:latest /bin/bash -c "/bin/bash"
 ```
 
-
-
 ### cpu/K210
 
 - 下载nncase wheel包, 直接安装即可.
@@ -37,8 +38,6 @@ root@2b11cc15c7f8:/mnt# wget -P x86_64 https://github.com/kendryte/nncase/releas
 root@2b11cc15c7f8:/mnt# pip3 install x86_64/*.whl
 ```
 
-
-
 ### K510
 
 - 分别下载nncase和nncase_k510插件包，再一起安装
@@ -51,8 +50,6 @@ root@2b11cc15c7f8:/mnt# wget -P x86_64 https://github.com/kendryte/nncase/releas
 root@2b11cc15c7f8:/mnt# pip3 install x86_64/*.whl
 ```
 
-
-
 ### 查看版本信息
 
 ```python
@@ -65,8 +62,6 @@ Type "help", "copyright", "credits" or "license" for more information.
 1.8.0-55be52f
 ```
 
-
-
 ## nncase 编译模型APIs
 
 ### CompileOptions
@@ -108,32 +103,32 @@ py::class_<compile_options>(m, "CompileOptions")
 
 各属性说明如下
 
-| 属性名称         | 类型   | 是否必须 | 描述                                                         |
-| ---------------- | ------ | -------- | ------------------------------------------------------------ |
-| target           | string | 是       | 指定编译目标, 如'k210', 'k510'                               |
-| quant_type       | string | 否       | 指定数据量化类型, 如'uint8', 'int8', 'int16'                 |
-| w_quant_type     | string | 否       | 指定权重量化类型, 如'uint8', 'int8', 'int16', 默认为'uint8'  |
-| use_mse_quant_w  | bool   | 否       | 指定权重量化时是否使用最小化均方误差(mean-square error, MSE)算法优化量化参数 |
-| split_w_to_act   | bool   | 否       | 指定是否将权重数据平衡到激活数据中                           |
-| preprocess       | bool   | 否       | 是否开启前处理，默认为False                                  |
-| swapRB           | bool   | 否       | 是否交换RGB输入数据的红和蓝两个通道(RGB-->BGR或者BGR-->RGB)，默认为False |
-| mean             | list   | 否       | 前处理标准化参数均值，默认为[0, 0, 0]                        |
-| std              | list   | 否       | 前处理标准化参数方差，默认为[1, 1, 1]                        |
-| input_range      | list   | 否       | 输入数据反量化后对应浮点数的范围，默认为[0，1]               |
-| output_range     | list   | 否       | 输出定点数据前对应浮点数的范围，默认为空，使用模型实际浮点输出范围 |
+| 属性名称         | 类型   | 是否必须 | 描述                                                                                                                                                  |
+| ---------------- | ------ | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
+| target           | string | 是       | 指定编译目标, 如'k210', 'k510'                                                                                                                        |
+| quant_type       | string | 否       | 指定数据量化类型, 如'uint8', 'int8', 'int16'                                                                                                          |
+| w_quant_type     | string | 否       | 指定权重量化类型, 如'uint8', 'int8', 'int16', 默认为'uint8'                                                                                           |
+| use_mse_quant_w  | bool   | 否       | 指定权重量化时是否使用最小化均方误差(mean-square error, MSE)算法优化量化参数                                                                          |
+| split_w_to_act   | bool   | 否       | 指定是否将权重数据平衡到激活数据中                                                                                                                    |
+| preprocess       | bool   | 否       | 是否开启前处理，默认为False                                                                                                                           |
+| swapRB           | bool   | 否       | 是否交换RGB输入数据的红和蓝两个通道(RGB-->BGR或者BGR-->RGB)，默认为False                                                                              |
+| mean             | list   | 否       | 前处理标准化参数均值，默认为[0, 0, 0]                                                                                                                 |
+| std              | list   | 否       | 前处理标准化参数方差，默认为[1, 1, 1]                                                                                                                 |
+| input_range      | list   | 否       | 输入数据反量化后对应浮点数的范围，默认为[0，1]                                                                                                        |
+| output_range     | list   | 否       | 输出定点数据前对应浮点数的范围，默认为空，使用模型实际浮点输出范围                                                                                    |
 | input_shape      | list   | 否       | 指定输入数据的shape，input_shape的layout需要与input layout保持一致，输入数据的input_shape与模型的input shape不一致时会进行letterbox操作(resize/pad等) |
-| letterbox_value  | float  | 否       | 指定前处理letterbox的填充值                                  |
-| input_type       | string | 否       | 指定输入数据的类型, 默认为'float32'                          |
-| output_type      | string | 否       | 指定输出数据的类型, 如'float32', 'uint8'(仅用于指定量化情况下), 默认为'float32' |
-| input_layout     | string | 否       | 指定输入数据的layout, 如'NCHW', 'NHWC'. 若输入数据layout与模型本身layout不同, nncase会插入transpose进行转换 |
-| output_layout    | string | 否       | 指定输出数据的layout, 如'NCHW', 'NHWC'. 若输出数据layout与模型本身layout不同, nncase会插入transpose进行转换 |
-| model_layout     | string | 否       | 指定模型的layout，默认为空，当tflite模型layout为‘NCHW’，Onnx和Caffe模型layout为‘NHWC’时需指定 |
-| is_fpga          | bool   | 否       | 指定kmodel是否用于fpga, 默认为False                          |
-| dump_ir          | bool   | 否       | 指定是否dump IR, 默认为False                                 |
-| dump_asm         | bool   | 否       | 指定是否dump asm汇编文件, 默认为False                        |
-| dump_quant_error | bool   | 否       | 指定是否dump量化前后的模型误差                               |
-| dump_dir         | string | 否       | 前面指定dump_ir等开关后, 这里指定dump的目录, 默认为空字符串  |
-| benchmark_only   | bool   | 否       | 指定kmodel是否只用于benchmark, 默认为False                   |
+| letterbox_value  | float  | 否       | 指定前处理letterbox的填充值                                                                                                                           |
+| input_type       | string | 否       | 指定输入数据的类型, 默认为'float32'                                                                                                                   |
+| output_type      | string | 否       | 指定输出数据的类型, 如'float32', 'uint8'(仅用于指定量化情况下), 默认为'float32'                                                                       |
+| input_layout     | string | 否       | 指定输入数据的layout, 如'NCHW', 'NHWC'. 若输入数据layout与模型本身layout不同, nncase会插入transpose进行转换                                           |
+| output_layout    | string | 否       | 指定输出数据的layout, 如'NCHW', 'NHWC'. 若输出数据layout与模型本身layout不同, nncase会插入transpose进行转换                                           |
+| model_layout     | string | 否       | 指定模型的layout，默认为空，当tflite模型layout为‘NCHW’，Onnx和Caffe模型layout为‘NHWC’时需指定                                                     |
+| is_fpga          | bool   | 否       | 指定kmodel是否用于fpga, 默认为False                                                                                                                   |
+| dump_ir          | bool   | 否       | 指定是否dump IR, 默认为False                                                                                                                          |
+| dump_asm         | bool   | 否       | 指定是否dump asm汇编文件, 默认为False                                                                                                                 |
+| dump_quant_error | bool   | 否       | 指定是否dump量化前后的模型误差                                                                                                                        |
+| dump_dir         | string | 否       | 前面指定dump_ir等开关后, 这里指定dump的目录, 默认为空字符串                                                                                           |
+| benchmark_only   | bool   | 否       | 指定kmodel是否只用于benchmark, 默认为False                                                                                                            |
 
 > 1. mean和std为浮点数进行normalize的参数，用户可以自由指定.
 > 2. input range为浮点数的范围，即如果输入数据类型为uint8，则input range为反量化到浮点之后的范围（可以不为0~1），可以自由指定.
@@ -769,10 +764,51 @@ if __name__ == '__main__':
 
 ## 部署 nncase runtime
 
-### K210
+### K210上板推理流程
+
+1. 下载官方[SDK](https://github.com/kendryte/kendryte-standalone-sdk)
+
+   ```shell
+   git clone https://github.com/kendryte/kendryte-standalone-sdk.git
+   cd kendryte-standalone-sdk
+   export KENDRYTE_WORKSPACE=`pwd`
+   ```
+2. 下载交叉编译工具链，并解压
 
-1. 从 [Release](https://github.com/kendryte/nncase/releases) 页面下载 `k210-runtime.zip`。
-2. 解压到 [kendryte-standalone-sdk](https://github.com/kendryte/kendryte-standalone-sdk) 's `lib/nncase/v1` 目录。
+   ```shell
+   wget https://github.com/kendryte/kendryte-gnu-toolchain/releases/download/v8.2.0-20190409/kendryte-toolchain-ubuntu-amd64-8.2.0-20190409.tar.xz -O $KENDRYTE_WORKSPACE/kendryte-toolchain.tar.xz
+   cd $KENDRYTE_WORKSPACE
+   mkdir toolchain
+   tar -xf kendryte-toolchain.tar.xz -C ./toolchain
+   ```
+3. 更新runtime
+
+   从 [Release](https://github.com/kendryte/nncase/releases) 页面下载 `k210-runtime.zip`。解压到 [kendryte-standalone-sdk](https://github.com/kendryte/kendryte-standalone-sdk) 's `lib/nncase/v1` 目录。
+4. 编译App
+
+   ````shell
+   # 1.将自己的App工程放在`$KENDRYTE_WORKSPACE/src`目录下
+   # 例如，将[example的示例程序]($NNCASE_WORK_DIR/examples/facedetect_landmark/k210/facedetect_landmark_example)目录，拷贝到SDK的src目录下。
+   cp -r $NNCASE_WORK_DIR/examples/facedetect_landmark/k210/facedetect_landmark_example $KENDRYTE_WORKSPACE/src/
+
+   # 2.cmake 编译App
+   cd $KENDRYTE_WORKSPACE
+   mkdir build
+   cmake .. -DPROJ=facedetect_landmark_example -DTOOLCHAIN=$KENDRYTE_WORKSPACE/toolchain/kendryte-toolchain/bin && make
+   ````
+
+   之后会在当前目录下生成 `facedetect_landmark_example`和 `facedetect_landmark_example.bin`
+5. 烧写App
+
+   ```shell
+   # 1. 检查可用的USB端口
+   ls /dev/ttyUSB*
+   # > /dev/ttyUSB0 /dev/ttyUSB1
+   # 2. 使用kflash进行烧录
+   kflash -p /dev/ttyUSB0 -t facedetect_landmark_example.bin
+   ```
+
+   烧写过程缓慢，需要耐心等待。
 
 ## nncase 推理模型APIs
 
@@ -1206,144 +1242,4 @@ N/A
 
 ```python
 sim.run()
-```
-
-# ncc
-
-## 命令行
-
-```shell
-DESCRIPTION
-NNCASE model compiler and inference tool.
-
-SYNOPSIS
-    ncc compile -i <input format> -t <target>
-        <input file> [--input-prototxt <input prototxt>] <output file> [--output-arrays <output arrays>]
-        [--quant-type <quant type>] [--w-quant-type <w quant type>] [--use-mse-quant-w]
-        [--dataset <dataset path>] [--dataset-format <dataset format>] [--calibrate-method <calibrate method>]
-        [--preprocess] [--swapRB] [--mean <normalize mean>] [--std <normalize std>]
-        [--input-range <input range>] [--input-shape <input shape>] [--letterbox-value <letter box value>]
-        [--input-type <input type>] [--output-type <output type>]
-        [--input-layout <input layout>] [--output-layout <output layout>] [--tcu-num <tcu number>]
-        [--is-fpga] [--dump-ir] [--dump-asm] [--dump-quant-error] [--dump-import-op-range] [--dump-dir <dump directory>]
-        [--dump-range-dataset <dataset path>] [--dump-range-dataset-format <dataset format>] [--benchmark-only]
-
-    ncc infer <input file> <output path>
-        --dataset <dataset path> [--dataset-format <dataset format>]
-        [--input-layout <input layout>]
-
-    ncc [-v]
-
-OPTIONS
-  compile
-
-  -i, --input-format <input format>
-                          input format, e.g. tflite|onnx|caffe
-  -t, --target <target>   target architecture, e.g. cpu|k210|k510
-  <input file>            input file
-  --input-prototxt <input prototxt>
-                          input prototxt
-  <output file>           output file
-  --output-arrays <output arrays>
-                          output arrays
-  --quant-type <quant type>
-                          post trainning quantize type, e.g uint8|int8|int16, default is uint8
-  --w-quant-type <w quant type>
-                          post trainning weights quantize type, e.g uint8|int8|int16, default is uint8
-  --use-mse-quant-w       use min mse algorithm to refine weights quantilization or not, default is 0
-  --dataset <dataset path>
-                          calibration dataset, used in post quantization
-  --dataset-format <dataset format>
-                          datset format: e.g. image|raw, default is image
-  --dump-range-dataset <dataset path>
-                          dump import op range dataset
-  --dump-range-dataset-format <dataset format>
-                          datset format: e.g. image|raw, default is image
-  --calibrate-method <calibrate method>
-                          calibrate method: e.g. no_clip|l2|kld_m0|kld_m1|kld_m2|cdf, default is no_clip
-  --preprocess            enable preprocess, default is 0
-  --swapRB                swap red and blue channel, default is 0
-  --mean <normalize mean> normalize mean, default is 0. 0. 0.
-  --std <normalize std>   normalize std, default is 1. 1. 1.
-  --input-range <input range>
-                          float range after preprocess
-  --input-shape <input shape>
-                          shape for input data
-  --letterbox-value <letter box value>
-                          letter box pad value, default is 0.000000
-  --input-type <input type>
-                          input type, e.g float32|uint8|default, default is default
-  --output-type <output type>
-                          output type, e.g float32|uint8, default is float32
-  --input-layout <input layout>
-                          input layout, e.g NCHW|NHWC, default is NCHW
-  --output-layout <output layout>
-                          output layout, e.g NCHW|NHWC, default is NCHW
-  --tcu-num <tcu number>  tcu number, e.g 1|2|3|4, default is 0
-  --is-fpga               use fpga parameters, default is 0
-  --dump-ir               dump ir to .dot, default is 0
-  --dump-asm              dump assembly, default is 0
-  --dump-quant-error      dump quant error, default is 0
-  --dump-import-op-range  dump import op range, default is 0
-  --dump-dir <dump directory>
-                          dump to directory
-  --benchmark-only        compile kmodel only for benchmark use, default is 0
-
-  infer
-
-  <model filename>        kmodel filename
-  <output path>           output path
-  --dataset <dataset path>
-                          dataset path
-  --dataset-format <dataset format>
-                          dataset format, e.g. image|raw, default is image
-  --input-layout <input layout>
-                          input layout, e.g NCHW|NHWC, default is NCHW
-```
-
-## 描述
-
-`ncc` 是 nncase 的命令行工具。它有两个命令： `compile` 和 `infer`。
-
-`compile` 命令将你训练好的模型 (`.tflite`, `.caffemodel`, `.onnx`) 编译到 `.kmodel`。
-
-- `-i, --input-format` 用来指定输入模型的格式。nncase 现在支持 `tflite`、`caffe` 和 `onnx` 输入格式。
-- `-t, --target` 用来指定你想要你的模型在哪种目标设备上运行。`cpu` 几乎所有平台都支持的通用目标。`k210` 是 Kendryte K210 SoC 平台。如果你指定了 `k210`，这个模型就只能在 K210 运行或在你的 PC 上模拟运行。
-- `<input file>` 用于指定输入模型文件
-- `--input-prototxt`用于指定caffe模型的prototxt文件
-- `<output file>` 用于指定输出模型文件
-- `--output-arrays `用于指定输出结点的名称
-- `--quant-type` 用于指定数据的量化类型, 如 `uint8`/`int8`/`int16, 默认是`uint8
-- `--w-quant-type` 用于指定权重的量化类型, 如 `uint8`/`int8`/`int16, 默认是`uint8
-- `--use-mse-quant-w`指定是否使用最小化mse(mean-square error, 均方误差)算法来量化权重.
-- `--dataset` 用于提供量化校准集来量化你的模型。你需要从训练集中选择几百到上千个数据放到这个目录里。
-- `--dataset-format` 用于指定量化校准集的格式。默认是 `image`，nncase 将使用 `opencv` 读取你的图片，并自动缩放到你的模型输入需要的尺寸。如果你的输入有 3 个通道，ncc 会将你的图片转换为值域是 [0,1] 布局是 `NCHW` 的张量。如果你的输入只有 1 个通道，ncc 会灰度化你的图片。如果你的数据集不是图片（例如音频或者矩阵），把它设置为 `raw`。这种场景下你需要把你的数据集转换为 float 张量的二进制文件。
-- `--dump-range-dataset` 用于提供统计范围数据集来统计原始模型每个节点输出数据范围。你需要从训练集中选择几百到上千个数据放到这个目录里。
-- `--dump-range-dataset-format` 用于指定统计范围数据集的格式。默认是 `image`，nncase 将使用 `opencv` 读取你的图片，并自动缩放到你的模型输入需要的尺寸。如果你的输入有 3 个通道，ncc 会将你的图片转换为值域是 [0,1] 布局是 `NCHW` 的张量。如果你的输入只有 1 个通道，ncc 会灰度化你的图片。如果你的数据集不是图片（例如音频或者矩阵），把它设置为 `raw`。这种场景下你需要把你的数据集转换为 float 张量的二进制文件。
-- `--calibrate-method` 用于设置量化校准方法，它被用来选择最优的激活函数值域。默认值是 `no_clip`，ncc 会使用整个激活函数值域。如果你需要更好的量化结果，你可以使用 `l2`，但它需要花更长的时间寻找最优值域。
-- `--preprocess`指定是否预处理, 添加后表示开启预处理
-- `--swapRB`指定**预处理时**是否交换红和蓝两个通道数据, 用于实现RGB2BGR或BGR2RGB功能
-- `--mean`指定**预处理时**标准化参数均值,例如添加 `--mean "0.1 2.3 33.1f"`用于设置三个通道的均值.
-- `--std`指定**预处理时**标准化参数方差,例如添加 `--std "1. 2. 3."`用于设置三个通道的方差.
-- `--input-range`指定输入数据反量化后的数据范围,例如添加 `--input-range "0.1 2."`设置反量化的范围为 `[0.1~2]`.
-- `--input-shape`指定输入数据的形状. 若与模型的输入形状不同, 则预处理时会做resize/pad等处理, 例如添加 `--input-shape "1 1 28 28"`指明当前输入图像尺寸.
-- `--letterbox-value`用于指定预处理时pad填充的值.
-- `--input-type` 用于指定推理时输入的数据类型。如果 `--input-type` 是 `uint8`，推理时你需要提供 RGB888 uint8 张量。如果 `--input-type` 是 `float`，你则需要提供 RGB float 张量.
-- `--output-type` 用于指定推理时输出的数据类型。如 `float`/`uint8`,  `uint8`仅在量化模型时才有效. 默认是 `float`
-- `--input-layout`用于指定输入数据的layout. 若输入数据的layout与模型的layout不同, 预处理会添加transpose进行转换.
-- `--output-layout`用于指定输出数据的layout
-- `--tcu-num`用于指定tcu个数, 默认值为0, 表示不配置tcu个数.
-- `--is-fpga`指定编译后的kmodel是否运行在fpga上
-- `--dump-ir` 是一个调试选项。当它打开时 ncc 会在工作目录产生一些 `.dot` 文件。你可以使用 `Graphviz` 或 [Graphviz Online](https://dreampuf.github.io/GraphvizOnline) 来查看这些文件。
-- `--dump-asm` 是一个调试选项。当它打开时 ncc 会生成硬件指令文件compile.text.asm
-- `--dump-quant-error`是一个调试选项, 用于dump量化错误信息
-- `--dump-import-op-range`是一个调试选项, 用于dump import之后节点的数据范围，需要同时指定dump-range-dataset
-- `--dump-dir`是一个调试选项, 用于指定dump目录.
-- `--benchmark-only`是一个调试选项, 用于指定编译后的kmodel用于benchmark.
-
-`infer` 命令可以运行你的 kmodel，通常它被用来调试。ncc 会将你模型的输出张量按 `NCHW` 布局保存到 `.bin` 文件。
-
-- `<input file>` kmodel 的路径。
-- `<output path>` ncc 输出目录。
-- `--dataset` 测试集路径。
-- `--dataset-format`和 `--input-layout`同 `compile` 命令中的含义。
+```
\ No newline at end of file
diff --git a/examples/user_guide/README.md b/examples/user_guide/README.md
new file mode 100644
index 0000000000..34aaf7744a
--- /dev/null
+++ b/examples/user_guide/README.md
@@ -0,0 +1,15 @@
+模型编译推理参考Jupyter脚本：[User_guide](./simulate.ipynb)，脚本中包含了单输入和多输入的示例。也可以使用单独的编译脚本 [Single build](../../docs/USAGE_ZH.md#编译模型示例)完成kmodel的编译。
+
+如果在Docker中运行Jupyter脚本，可以参考[配置Jupyter lab](https://github.com/kunjing96/docker-jupyterlab#32-%E9%85%8D%E7%BD%AEjupyter-lab)进行配置。
+
+在执行脚本之前需要根据自身需求修改以下内容：
+
+1. `compile_kmodel`函数中 `compile_options`,`ptq_options`相关信息
+   `compile_options`详细信息见[CompileOptions](../../docs/USAGE_ZH.md#CompileOptions)
+   `ptq_options`详细信息见[PTQTensorOptions](../../docs/USAGE_ZH.md#PTQTensorOptions)
+2. `compile kmodel single input(multiple inputs)`部分
+   修改 `model_path`和 `dump_path`，用于指定模型路径和编译期间文件生成路径。
+   修改 `calib_data`的实现，数据格式见注释。
+3. `run kmodel(simulate)`部分，修改 `input_data`的实现，数据格式见注释。
+
+推理结束后，会在 `dump_path`路径下生成 `kmodel`、输出结果和编译期间的文件。
\ No newline at end of file
diff --git a/examples/user_guide/nncase_base_func.py b/examples/user_guide/nncase_base_func.py
new file mode 100644
index 0000000000..a624e9c0a7
--- /dev/null
+++ b/examples/user_guide/nncase_base_func.py
@@ -0,0 +1,95 @@
+import os
+
+import numpy as np
+import onnx
+import onnxsim
+from sklearn.metrics.pairwise import cosine_similarity
+
+import nncase
+
+
+def get_cosine(vec1, vec2):
+    """
+    result compare
+    """
+    return cosine_similarity(vec1.reshape(1, -1), vec2.reshape(1, -1))
+
+
+
+def read_model_file(model_file):
+    """
+    read model
+    """
+    with open(model_file, 'rb') as f:
+        model_content = f.read()
+    return model_content
+
+
+def parse_model_input_output(model_file):
+    """
+    parse onnx model
+    """
+    onnx_model = onnx.load(model_file)
+    input_all = [node.name for node in onnx_model.graph.input]
+    input_initializer = [node.name for node in onnx_model.graph.initializer]
+    input_names = list(set(input_all) - set(input_initializer))
+    input_tensors = [
+        node for node in onnx_model.graph.input if node.name in input_names]
+
+    # input
+    inputs = []
+    for _, e in enumerate(input_tensors):
+        onnx_type = e.type.tensor_type
+        input_dict = {}
+        input_dict['name'] = e.name
+        input_dict['dtype'] = onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[onnx_type.elem_type]
+        input_dict['shape'] = [i.dim_value for i in onnx_type.shape.dim]
+        inputs.append(input_dict)
+
+    return onnx_model, inputs
+
+def model_simplify(model_file):
+    """
+    simplify onnx model
+    """
+    if model_file.split('.')[-1] == "onnx":
+        onnx_model, inputs = parse_model_input_output(model_file)
+        onnx_model = onnx.shape_inference.infer_shapes(onnx_model)
+        input_shapes = {}
+        for input in inputs:
+            input_shapes[input['name']] = input['shape']
+    
+        onnx_model, check = onnxsim.simplify(onnx_model, overwrite_input_shapes=input_shapes)
+        assert check, "Simplified ONNX model could not be validated"
+    
+        model_file = os.path.join(os.path.dirname(model_file), 'simplified.onnx')
+        onnx.save_model(onnx_model, model_file)
+        print("[ onnx done ]")
+    elif model_file.split('.')[-1] == "tflite":
+        print("[ tflite pass ]")
+    else:
+        raise Exception(f"Unsupport type {model_file.split('.')[-1]}")
+        
+    return model_file
+
+def run_kmodel(kmodel_path, input_data):
+    print("\n---------start run kmodel---------")
+    print("Load kmodel...")
+    model_sim = nncase.Simulator()
+    with open(kmodel_path, 'rb') as f:
+        model_sim.load_model(f.read())
+    
+    print("Set input data...")
+    for i, p_d in enumerate(input_data):
+        model_sim.set_input_tensor(i, nncase.RuntimeTensor.from_numpy(p_d))
+    
+    print("Run...")
+    model_sim.run()
+    
+    print("Get output result...")
+    all_result = []
+    for i in range(model_sim.outputs_size):
+        result = model_sim.get_output_tensor(i).to_numpy()
+        all_result.append(result)
+    print("----------------end-----------------")
+    return all_result
\ No newline at end of file
diff --git a/examples/user_guide/simulate.ipynb b/examples/user_guide/simulate.ipynb
new file mode 100644
index 0000000000..6a9a041eaa
--- /dev/null
+++ b/examples/user_guide/simulate.ipynb
@@ -0,0 +1,198 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "82a8f9c1-c2bf-4270-9f1f-ac25c9fdd898",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install --upgrade pip\n",
+    "#!pip uninstall -y nncase\n",
+    "!pip install nncase==1.9.0.20230322 --timeout=1000\n",
+    "#from versions: 1.0.0.20211029, 1.1.0.20211203, 1.3.0.20220127, 1.4.0.20220303, 1.5.0.20220331, \n",
+    "# 1.6.0.20220505, 1.7.0.20220530, 1.7.1.20220701, 1.8.0.20220929, 1.9.0.20230322, 2.0.0.20230602, 2.1.0.20230703)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a7eff82e-295c-4cce-afbc-ce64c84dc40a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import nncase\n",
+    "from nncase_base_func import *\n",
+    "# from parse_model import *\n",
+    "\n",
+    "\n",
+    "def compile_kmodel(model_path, dump_path, calib_data):\n",
+    "    \"\"\"\n",
+    "    Set compile options and ptq options.\n",
+    "    Compile kmodel.\n",
+    "    Dump the compile-time result to 'compile_options.dump_dir'\n",
+    "    \"\"\"\n",
+    "    print(\"----------model simplify----------\")\n",
+    "    model_file = model_simplify(model_path)\n",
+    "\n",
+    "    print(\"---------- set  options ----------\")\n",
+    "    # import_options\n",
+    "    import_options = nncase.ImportOptions()\n",
+    "    \n",
+    "    # compile_options\n",
+    "    compile_options = nncase.CompileOptions()\n",
+    "    compile_options.target = \"k210\" # \"cpu\" \"k510\"\n",
+    "    compile_options.dump_ir = True  # if False, will not dump the compile-time result.\n",
+    "    compile_options.dump_asm = True\n",
+    "    compile_options.dump_dir = dump_path\n",
+    "\n",
+    "    # preprocess args\n",
+    "    compile_options.preprocess = True\n",
+    "    if compile_options.preprocess:\n",
+    "        compile_options.input_type = \"uint8\" # \"uint8\"\n",
+    "        compile_options.swapRB = False\n",
+    "        compile_options.input_shape = [1,224,320,3]\n",
+    "        compile_options.input_range = [0,1]\n",
+    "        compile_options.mean = [0,0,0]\n",
+    "        compile_options.std = [1,1,1]\n",
+    "        compile_options.input_layout = \"NHWC\" # \"NHWC\"\n",
+    "        compile_options.output_layout = \"NHWC\" # \"NHWC\"\n",
+    "        compile_options.letterbox_value = 0\n",
+    "    \n",
+    "    # quant args\n",
+    "    compile_options.quant_type = \"uint8\" \n",
+    "    compile_options.w_quant_type = \"uint8\"\n",
+    "    compile_options.use_mse_quant_w = True\n",
+    "    compile_options.split_w_to_act = False\n",
+    "\n",
+    "    # quant options\n",
+    "    ptq_options = nncase.PTQTensorOptions()\n",
+    "    ptq_options.calibrate_method = \"no_clip\" # \"kld_m2\" \"l2\" \"cdf\"\n",
+    "    ptq_options.samples_count = len(calib_data[0])\n",
+    "    ptq_options.set_tensor_data(np.array(calib_data).tobytes())\n",
+    "\n",
+    "    \n",
+    "    # set options\n",
+    "    compiler = nncase.Compiler(compile_options)\n",
+    "    compiler.use_ptq(ptq_options)\n",
+    "    \n",
+    "    print(\"----------   compile    ----------\")\n",
+    "    # import\n",
+    "    model_content = read_model_file(model_file)\n",
+    "    if model_path.split(\".\")[-1] == \"onnx\":\n",
+    "        compiler.import_onnx(model_content, import_options)\n",
+    "    elif model_path.split(\".\")[-1] == \"tflite\":\n",
+    "        compiler.import_tflite(model_content, import_options)\n",
+    "\n",
+    "    # compile\n",
+    "    compiler.compile()\n",
+    "    kmodel = compiler.gencode_tobytes()\n",
+    "    \n",
+    "    kmodel_path = os.path.join(dump_path, \"test.kmodel\")\n",
+    "    with open(kmodel_path, 'wb') as f:\n",
+    "        f.write(kmodel)\n",
+    "    print(\"----------  compile end ----------\")\n",
+    "    return kmodel_path\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c957fe20-99c9-4a54-bae8-38361a8f8830",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# compile kmodel single input\n",
+    "model_path = \"./model_f32.tflite\"\n",
+    "dump_path = \"./tmp\"\n",
+    "\n",
+    "# If model has multi inputs, calib_data format is \"[[x1, x2,...], [y1, y2,...], ...]\"\n",
+    "# e.g. Model has three inputs (x, y, z), the calib_data is '[[x1, x2, x3],[y1, y2, y3],[z1, z2, z3]]'\n",
+    "\n",
+    "calib_data = [[np.random.rand(1,224,320,3).astype(np.float32), np.random.rand(1,224,320,3).astype(np.float32)]]\n",
+    "kmodel_path = compile_kmodel(model_path, dump_path, calib_data)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7f617edc-781c-4b8b-b45d-fef2f0b36a46",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# run kmodel(simulate)\n",
+    "kmodel_path = \"./tmp/test.kmodel\"\n",
+    "input_data = [np.random.rand(1,224,320,3).astype(np.float32)]\n",
+    "\n",
+    "result = run_kmodel(kmodel_path, input_data)\n",
+    "for idx, i in enumerate(result):\n",
+    "    print(i.shape)\n",
+    "    i.tofile(os.path.join(dump_path, \"nncase_result_{}.bin\".format(idx)))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "89280d3a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# compile kmodel multiple inputs\n",
+    "model_path = \"./decoder_100.onnx\"\n",
+    "dump_path = \"./tmp_dec\"\n",
+    "\n",
+    "# If model has multiple inputs, calib_data format is \"[[x1, x2,...], [y1, y2,...], ...]\"\n",
+    "# e.g. Model has three inputs (x, y, z), the calib_data is '[[x1, x2, x3],[y1, y2, y3],[z1, z2, z3]]'\n",
+    "\n",
+    "calib_data = [[np.random.randint(1, 5, size=[3, 100], dtype='int64'), np.random.randint(1, 5, size=[3, 100], dtype='int64')],\n",
+    "              [np.random.rand(100, 3, 192).astype(np.float32), np.random.rand(100, 3, 192).astype(np.float32)],\n",
+    "              [np.random.rand(3, 100).astype(np.float32) > 0.5, np.random.rand(3, 100).astype(np.float32) > 0.5], ]  # bool\n",
+    "\n",
+    "kmodel_path = compile_kmodel(model_path, dump_path, calib_data)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22a25a7f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# run kmodel(simulate)\n",
+    "import os\n",
+    "\n",
+    "kmodel_path = \"./tmp_dec/test.kmodel\"\n",
+    "input_data = [np.random.randint(1, 5, size=[3, 100], dtype='int64'),\n",
+    "              np.random.rand(100, 3, 192).astype(np.float32),\n",
+    "              np.random.rand(3, 100).astype(np.float32) > 0.5, ]\n",
+    "\n",
+    "result = run_kmodel(kmodel_path, input_data)\n",
+    "\n",
+    "for idx, i in enumerate(result):\n",
+    "    print(i.shape)\n",
+    "    i.tofile(os.path.join(dump_path, \"nncase_result_{}.bin\".format(idx)))\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tests/importer/onnx_/basic/test_slice.py b/tests/importer/onnx_/basic/test_slice.py
index dc52e4fc6c..45c86684c3 100644
--- a/tests/importer/onnx_/basic/test_slice.py
+++ b/tests/importer/onnx_/basic/test_slice.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # pylint: disable=invalid-name, unused-argument, import-outside-toplevel
 
-from attr import attributes
 import pytest
 import onnx
 from onnx import helper