diff --git a/docs/USAGE_EN.md b/docs/USAGE_EN.md
index dadbb0360d..b8af24687a 100644
--- a/docs/USAGE_EN.md
+++ b/docs/USAGE_EN.md
@@ -2,18 +2,17 @@
 
 # Overview
 
-nncase provides both python wheel package and ncc client to compile your neural models. The current documentation only works for nncase-v1. The available version are shown below.
+nncase provides python wheel package to compile your neural models. The current documentation only works for nncase-v1. The available version are shown below.
 
 ```
 1.0.0.20211029, 1.1.0.20211203, 1.3.0.20220127, 1.4.0.20220303, 1.5.0.20220331, 1.6.0.20220505, 1.7.0.20220530, 1.7.1.20220701, 1.8.0.20220929, 1.9.0.20230322
 ```
 
-- nncase wheel package can be downloaded at [nncase release](https://github.com/kendryte/nncase/releases). 
-- For ncc client, you should git clone nncase repository and then build it by yourself.
+- nncase wheel package can be downloaded at [nncase release](https://github.com/kendryte/nncase/releases).
 
 # nncase python APIs
 
-nncase provides Python APIs to compile neural network model and inference on your PC.
+nncase provides Python APIs to compile neural network model and inference on x86_64 and amd64 platforms.
 
 ## Installation
 
@@ -31,8 +30,6 @@ $ docker pull registry.cn-hangzhou.aliyuncs.com/kendryte/nncase:latest
 $ docker run -it --rm -v `pwd`:/mnt -w /mnt registry.cn-hangzhou.aliyuncs.com/kendryte/nncase:latest /bin/bash -c "/bin/bash"
 ```
 
-
-
 ### cpu/K210
 
 - Download nncase wheel package and then install it.
@@ -43,8 +40,6 @@ root@2b11cc15c7f8:/mnt# wget -P x86_64 https://github.com/kendryte/nncase/releas
 root@2b11cc15c7f8:/mnt# pip3 install x86_64/*.whl
 ```
 
-
-
 ### K510
 
 - Download both nncase and nncase_k510 wheel packages and then install them.
@@ -57,8 +52,6 @@ root@2b11cc15c7f8:/mnt# wget -P x86_64 https://github.com/kendryte/nncase/releas
 root@2b11cc15c7f8:/mnt# pip3 install x86_64/*.whl
 ```
 
-
-
 ### Check nncase version
 
 ```python
@@ -71,8 +64,6 @@ Type "help", "copyright", "credits" or "license" for more information.
 1.8.0-55be52f
 ```
 
-
-
 ## nncase compile model APIs
 
 ### CompileOptions
@@ -120,13 +111,13 @@ The details of all attributes are following.
 | quant_type       | string    | N            | Specify the quantization type for input data , such as 'uint8', 'int8', 'int16'                                                                                                                         |
 | w_quant_type     | string    | N            | Specify the quantization type for weight , such as 'uint8'(by default), 'int8', 'int16'                                                                                                                 |
 | use_mse_quant_w  | bool      | N            | Specify whether use  mean-square error when quantizing weight                                                                                                                                           |
-| split_w_to_act   | bool      | N            | Specify whether split weight into activation                                                                                                                                            |
+| split_w_to_act   | bool      | N            | Specify whether split weight into activation                                                                                                                                                            |
 | preprocess       | bool      | N            | Whether enable preprocess, False by default                                                                                                                                                             |
 | swapRB           | bool      | N            | Whether swap red and blue channel for RGB data(from RGB to BGR or from BGR to RGB), False by default                                                                                                    |
 | mean             | list      | N            | Normalize mean value for preprocess, [0, 0, 0] by default                                                                                                                                               |
 | std              | list      | N            | Normalize std value for preprocess, [1, 1, 1] by default                                                                                                                                                |
 | input_range      | list      | N            | The float range for dequantized input data, [0，1] by default                                                                                                                                           |
-| output_range | list | N | The float range for quantized output data,  [ ] by default |
+| output_range     | list      | N            | The float range for quantized output data,  [ ] by default                                                                                                                                              |
 | input_shape      | list      | N            | Specify the shape of input data.  input_shape should be consistent with input _layout.  There will be letterbox  operations(Such as resize/pad) if input_shape is not the same as input shape of model. |
 | letterbox_value  | float     | N            | Specify the pad value of letterbox during preprocess.                                                                                                                                                   |
 | input_type       | string    | N            | Specify the data type of input data, 'float32' by default.                                                                                                                                              |
@@ -775,12 +766,11 @@ if __name__ == '__main__':
 
 1. Download [SDK](https://github.com/kendryte/kendryte-standalone-sdk)
 
-      ```shell
-      $ git clone https://github.com/kendryte/kendryte-standalone-sdk.git
-      $ cd kendryte-standalone-sdk
-      $ export KENDRYTE_WORKSPACE=`pwd`
-      ```
-
+   ```shell
+   $ git clone https://github.com/kendryte/kendryte-standalone-sdk.git
+   $ cd kendryte-standalone-sdk
+   $ export KENDRYTE_WORKSPACE=`pwd`
+   ```
 2. Download the cross-compile toolchain and extract it
 
    ```shell
@@ -789,33 +779,30 @@ if __name__ == '__main__':
    $ mkdir toolchain
    $ tar -xf kendryte-toolchain.tar.xz -C ./toolchain
    ```
-
 3. Update nncase runtime
 
    Download `k210-runtime.zip` from [Release](https://github.com/kendryte/nncase/releases) and extract it into [kendryte-standalone-sdk](https://github.com/kendryte/kendryte-standalone-sdk) 's `lib/nncase/v1`.
-
 4. Compile App
 
    ```shell
    # 1.copy your programe into `$KENDRYTE_WORKSPACE/src`
    # e.g. copy ($NNCASE_WORK_DIR/examples/facedetect_landmark/k210/facedetect_landmark_example) into PATH_TO_SDK/src.
    $ cp -r $NNCASE_WORK_DIR/examples/facedetect_landmark/k210/facedetect_landmark_example $KENDRYTE_WORKSPACE/src/
-   
+
    # 2. compile
    $ cd $KENDRYTE_WORKSPACE
    $ mkdir build
    $ cmake .. -DPROJ=facedetect_landmark_example -DTOOLCHAIN=$KENDRYTE_WORKSPACE/toolchain/kendryte-toolchain/bin && make
    ```
 
-   `facedetect_landmark_example` and`FaceDETECt_landmark_example.bin` will be generated.
-
+   `facedetect_landmark_example` and `FaceDETECt_landmark_example.bin` will be generated.
 5. Write the program to the K210 development board
 
    ```shell
    # 1. Check available USB ports
    $ ls /dev/ttyUSB*
    # /dev/ttyUSB0 /dev/ttyUSB1
-   
+
    # 2. Write your App by kflash
    $ kflash -p /dev/ttyUSB0 -t facedetect_landmark_example.bin
    ```
@@ -1249,143 +1236,3 @@ N/A
 ```python
 sim.run()
 ```
-
-# ncc
-
-## Comannd line
-
-```shell
-DESCRIPTION
-NNCASE model compiler and inference tool.
-
-SYNOPSIS
-    ncc compile -i <input format> -t <target>
-        <input file> [--input-prototxt <input prototxt>] <output file> [--output-arrays <output arrays>]
-        [--quant-type <quant type>] [--w-quant-type <w quant type>] [--use-mse-quant-w]
-        [--dataset <dataset path>] [--dataset-format <dataset format>] [--calibrate-method <calibrate method>]
-        [--preprocess] [--swapRB] [--mean <normalize mean>] [--std <normalize std>]
-        [--input-range <input range>] [--input-shape <input shape>] [--letterbox-value <letter box value>]
-        [--input-type <input type>] [--output-type <output type>]
-        [--input-layout <input layout>] [--output-layout <output layout>] [--tcu-num <tcu number>]
-        [--is-fpga] [--dump-ir] [--dump-asm] [--dump-quant-error] [--dump-import-op-range] [--dump-dir <dump directory>]
-        [--dump-range-dataset <dataset path>] [--dump-range-dataset-format <dataset format>] [--benchmark-only]
-
-    ncc infer <input file> <output path>
-        --dataset <dataset path> [--dataset-format <dataset format>]
-        [--input-layout <input layout>]
-
-    ncc [-v]
-
-OPTIONS
-  compile
-
-  -i, --input-format <input format>
-                          input format, e.g. tflite|onnx|caffe
-  -t, --target <target>   target architecture, e.g. cpu|k210|k510
-  <input file>            input file
-  --input-prototxt <input prototxt>
-                          input prototxt
-  <output file>           output file
-  --output-arrays <output arrays>
-                          output arrays
-  --quant-type <quant type>
-                          post trainning quantize type, e.g uint8|int8|int16, default is uint8
-  --w-quant-type <w quant type>
-                          post trainning weights quantize type, e.g uint8|int8|int16, default is uint8
-  --use-mse-quant-w       use min mse algorithm to refine weights quantilization or not, default is 0
-  --dataset <dataset path>
-                          calibration dataset, used in post quantization
-  --dataset-format <dataset format>
-                          datset format: e.g. image|raw, default is image
-  --dump-range-dataset <dataset path>
-                          dump import op range dataset
-  --dump-range-dataset-format <dataset format>
-                          datset format: e.g. image|raw, default is image
-  --calibrate-method <calibrate method>
-                          calibrate method: e.g. no_clip|l2|kld_m0|kld_m1|kld_m2|cdf, default is no_clip
-  --preprocess            enable preprocess, default is 0
-  --swapRB                swap red and blue channel, default is 0
-  --mean <normalize mean> normalize mean, default is 0. 0. 0.
-  --std <normalize std>   normalize std, default is 1. 1. 1.
-  --input-range <input range>
-                          float range after preprocess
-  --input-shape <input shape>
-                          shape for input data
-  --letterbox-value <letter box value>
-                          letter box pad value, default is 0.000000
-  --input-type <input type>
-                          input type, e.g float32|uint8|default, default is default
-  --output-type <output type>
-                          output type, e.g float32|uint8, default is float32
-  --input-layout <input layout>
-                          input layout, e.g NCHW|NHWC, default is NCHW
-  --output-layout <output layout>
-                          output layout, e.g NCHW|NHWC, default is NCHW
-  --tcu-num <tcu number>  tcu number, e.g 1|2|3|4, default is 0
-  --is-fpga               use fpga parameters, default is 0
-  --dump-ir               dump ir to .dot, default is 0
-  --dump-asm              dump assembly, default is 0
-  --dump-quant-error      dump quant error, default is 0
-  --dump-import-op-range  dump import op range, default is 0
-  --dump-dir <dump directory>
-                          dump to directory
-  --benchmark-only        compile kmodel only for benchmark use, default is 0
-
-  infer
-
-  <model filename>        kmodel filename
-  <output path>           output path
-  --dataset <dataset path>
-                          dataset path
-  --dataset-format <dataset format>
-                          dataset format, e.g. image|raw, default is image
-  --input-layout <input layout>
-                          input layout, e.g NCHW|NHWC, default is NCHW
-```
-
-## Description
-
-`ncc` is the nncase command line tool. It has two commands: `compile` and `infer`.
-
-`compile` command compile your trained models (`.tflite`, `.caffemodel`, `.onnx`) to `.kmodel`.
-
-- `-i, --input-format` option is used to specify the input model format. nncase supports `tflite`, `caffe` and `onnx` input model currently.
-- `-t, --target` option is used to set your desired target device to run the model. `cpu` is the most general target that almost every platform should support. `k210` is the Kendryte K210 SoC platform. If you set this option to `k210`, this model can only run on K210 or be emulated on your PC.
-- `<input file>` is your input model path.
-- `--input-prototxt` is the prototxt file for caffe model.
-- `<output file>` is the output model path.
-- `--output-arrays` is the names of nodes to output.
-- `--quant-type` is used to specify quantize type, such as `uint8` by default and `int8` and `int16`.
-- `--w-quant-type` is used to specify quantize type for weight, such as `uint8` by default and `int8 `and `int16`.
-- `--use-mse-quant-w ` is used to specify whether use minimize mse(mean-square error, mse) algorithm to quantize weight or not.
-- `--dataset` is to provide your quantization calibration dataset to quantize your models. You should put hundreds or thousands of data in training set to this directory.
-- `--dataset-format` is to set the format of the calibration dataset. Default is `image`, nncase will use `opencv` to read your images and autoscale to the desired input size of your model. If the input has 3 channels, ncc will convert images to RGB float tensors [0,1] in `NCHW` layout. If the input has only 1 channel, ncc will grayscale your images. Set to `raw` if your dataset is not image dataset for example, audio or matrices. In this scenario you should convert your dataset to raw binaries which contains float tensors.
-- `--dump-range-dataset` is to provide your dump range dataset to dump each op data range of your models. You should put hundreds or thousands of data in training set to this directory.
-- `--dump-range-dataset-format` is to set the format of the dump range dataset. Default is `image`, nncase will use `opencv` to read your images and autoscale to the desired input size of your model. If the input has 3 channels, ncc will convert images to RGB float tensors [0,1] in `NCHW` layout. If the input has only 1 channel, ncc will grayscale your images. Set to `raw` if your dataset is not image dataset for example, audio or matrices. In this scenario you should convert your dataset to raw binaries which contains float tensors.
-- `--calibrate-method` is to set your desired calibration method, which is used to select the optimal activation ranges. The default is `no_clip` in that ncc will use the full range of activations. If you want a better quantization result, you can use `l2` but it will take a longer time to find the optimal ranges.
-- `--preprocess ` is used specify whether enable preprocessing or not.
-- `--swapRB ` is used specify whether swap red and blue channel or not. You can use this flag to implement RGB2BGR or BGR2RGB feature.
-- `--mean` is the mean values to be subtracted during preprocessing.
-- `--std` is the std values to be divided during preprocessing.
-- `--input-range` is the input range in float after dequantization.
-- `--input-shape` is used to specify the shape of input data. If the input shape is different from the input shape of your model, the preprocess will add resize/pad ops automatically for the transformation.
-- `--letterbox-value` is used to specify the pad values when pad is added during preprocessing.
-- `--input-type` is to set your desired input data type when do inference. If `--input-type` is `uint8`, for example you should provide RGB888 uint8 tensors when you do inference. If `--input-type` is `float`, you should provide RGB float tensors instead.
-- `--output-type` is the type of output data.
-- `--input-layout` is the layout of input data.
-- `--output-layout` is the layout of output data.
-- `--tcu-num` is used to configure the number of TCU. 0 means do not configure the number of TCU.
-- `--is-fpga` is a debug option. It is used to specify whether the kmodel run on fpga or not.
-- `--dump-ir` is a debug option. It is used to specify whether dump IR or not.
-- `--dump-asm` is a debug option. It is used to specify whether dump asm file or not.
-- `--dump-quant-error` is a debug option. It is used to specify whether dump quantization error information or not.
-- `--dump-import-op-range` is a debug option. It is used to specify whether dump imported op data range or not, need to also specify dump-range-dataset if enabled.
-- `--dump-dir` is used to specify dump directory.
-- `--benchmark-only` is used to specify whether the kmodel is used for benchmark or not.
-
-`infer` command can run your kmodel, and it's often used as debug purpose. ncc will save the model's output tensors to `.bin` files in `NCHW` layout.
-
-- `<input file>` is your kmodel path.
-- `<output path>` is the output directory ncc will produce to.
-- `--dataset` is the test set directory.
-- `--dataset-format` and `--input-layout` have the same meaning as in `compile` command.
diff --git a/docs/USAGE_ZH.md b/docs/USAGE_ZH.md
index 6923b6e292..0293f3eb5c 100644
--- a/docs/USAGE_ZH.md
+++ b/docs/USAGE_ZH.md
@@ -1,17 +1,16 @@
 # 概述
 
-nncase目前提供了python wheel包和ncc客户端两种方法编译模型。当前文档仅适用于nncase-v1，可用版本号如下：
+nncase目前提供了python wheel包编译模型。当前文档仅适用于nncase-v1，适用于以下版本号：
 
 ```
 1.0.0.20211029, 1.1.0.20211203, 1.3.0.20220127, 1.4.0.20220303, 1.5.0.20220331, 1.6.0.20220505, 1.7.0.20220530, 1.7.1.20220701, 1.8.0.20220929, 1.9.0.20230322
 ```
 
 - nncase wheel包需要去[nncase release](https://github.com/kendryte/nncase/releases)获取
-- ncc客户端需要用户下载并编译nncase
 
 # nncase python APIs
 
-nncase提供了Python APIs, 用于在PC上编译/推理深度学习模型.
+nncase提供了Python APIs, 用于在x86_64和amd64平台上编译/推理深度学习模型.
 
 ## 安装
 
@@ -29,8 +28,6 @@ $ docker pull registry.cn-hangzhou.aliyuncs.com/kendryte/nncase:latest
 $ docker run -it --rm -v `pwd`:/mnt -w /mnt registry.cn-hangzhou.aliyuncs.com/kendryte/nncase:latest /bin/bash -c "/bin/bash"
 ```
 
-
-
 ### cpu/K210
 
 - 下载nncase wheel包, 直接安装即可.
@@ -41,8 +38,6 @@ root@2b11cc15c7f8:/mnt# wget -P x86_64 https://github.com/kendryte/nncase/releas
 root@2b11cc15c7f8:/mnt# pip3 install x86_64/*.whl
 ```
 
-
-
 ### K510
 
 - 分别下载nncase和nncase_k510插件包，再一起安装
@@ -55,8 +50,6 @@ root@2b11cc15c7f8:/mnt# wget -P x86_64 https://github.com/kendryte/nncase/releas
 root@2b11cc15c7f8:/mnt# pip3 install x86_64/*.whl
 ```
 
-
-
 ### 查看版本信息
 
 ```python
@@ -69,8 +62,6 @@ Type "help", "copyright", "credits" or "license" for more information.
 1.8.0-55be52f
 ```
 
-
-
 ## nncase 编译模型APIs
 
 ### CompileOptions
@@ -112,32 +103,32 @@ py::class_<compile_options>(m, "CompileOptions")
 
 各属性说明如下
 
-| 属性名称         | 类型   | 是否必须 | 描述                                                         |
-| ---------------- | ------ | -------- | ------------------------------------------------------------ |
-| target           | string | 是       | 指定编译目标, 如'k210', 'k510'                               |
-| quant_type       | string | 否       | 指定数据量化类型, 如'uint8', 'int8', 'int16'                 |
-| w_quant_type     | string | 否       | 指定权重量化类型, 如'uint8', 'int8', 'int16', 默认为'uint8'  |
-| use_mse_quant_w  | bool   | 否       | 指定权重量化时是否使用最小化均方误差(mean-square error, MSE)算法优化量化参数 |
-| split_w_to_act   | bool   | 否       | 指定是否将权重数据平衡到激活数据中                           |
-| preprocess       | bool   | 否       | 是否开启前处理，默认为False                                  |
-| swapRB           | bool   | 否       | 是否交换RGB输入数据的红和蓝两个通道(RGB-->BGR或者BGR-->RGB)，默认为False |
-| mean             | list   | 否       | 前处理标准化参数均值，默认为[0, 0, 0]                        |
-| std              | list   | 否       | 前处理标准化参数方差，默认为[1, 1, 1]                        |
-| input_range      | list   | 否       | 输入数据反量化后对应浮点数的范围，默认为[0，1]               |
-| output_range     | list   | 否       | 输出定点数据前对应浮点数的范围，默认为空，使用模型实际浮点输出范围 |
+| 属性名称         | 类型   | 是否必须 | 描述                                                                                                                                                  |
+| ---------------- | ------ | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
+| target           | string | 是       | 指定编译目标, 如'k210', 'k510'                                                                                                                        |
+| quant_type       | string | 否       | 指定数据量化类型, 如'uint8', 'int8', 'int16'                                                                                                          |
+| w_quant_type     | string | 否       | 指定权重量化类型, 如'uint8', 'int8', 'int16', 默认为'uint8'                                                                                           |
+| use_mse_quant_w  | bool   | 否       | 指定权重量化时是否使用最小化均方误差(mean-square error, MSE)算法优化量化参数                                                                          |
+| split_w_to_act   | bool   | 否       | 指定是否将权重数据平衡到激活数据中                                                                                                                    |
+| preprocess       | bool   | 否       | 是否开启前处理，默认为False                                                                                                                           |
+| swapRB           | bool   | 否       | 是否交换RGB输入数据的红和蓝两个通道(RGB-->BGR或者BGR-->RGB)，默认为False                                                                              |
+| mean             | list   | 否       | 前处理标准化参数均值，默认为[0, 0, 0]                                                                                                                 |
+| std              | list   | 否       | 前处理标准化参数方差，默认为[1, 1, 1]                                                                                                                 |
+| input_range      | list   | 否       | 输入数据反量化后对应浮点数的范围，默认为[0，1]                                                                                                        |
+| output_range     | list   | 否       | 输出定点数据前对应浮点数的范围，默认为空，使用模型实际浮点输出范围                                                                                    |
 | input_shape      | list   | 否       | 指定输入数据的shape，input_shape的layout需要与input layout保持一致，输入数据的input_shape与模型的input shape不一致时会进行letterbox操作(resize/pad等) |
-| letterbox_value  | float  | 否       | 指定前处理letterbox的填充值                                  |
-| input_type       | string | 否       | 指定输入数据的类型, 默认为'float32'                          |
-| output_type      | string | 否       | 指定输出数据的类型, 如'float32', 'uint8'(仅用于指定量化情况下), 默认为'float32' |
-| input_layout     | string | 否       | 指定输入数据的layout, 如'NCHW', 'NHWC'. 若输入数据layout与模型本身layout不同, nncase会插入transpose进行转换 |
-| output_layout    | string | 否       | 指定输出数据的layout, 如'NCHW', 'NHWC'. 若输出数据layout与模型本身layout不同, nncase会插入transpose进行转换 |
-| model_layout     | string | 否       | 指定模型的layout，默认为空，当tflite模型layout为‘NCHW’，Onnx和Caffe模型layout为‘NHWC’时需指定 |
-| is_fpga          | bool   | 否       | 指定kmodel是否用于fpga, 默认为False                          |
-| dump_ir          | bool   | 否       | 指定是否dump IR, 默认为False                                 |
-| dump_asm         | bool   | 否       | 指定是否dump asm汇编文件, 默认为False                        |
-| dump_quant_error | bool   | 否       | 指定是否dump量化前后的模型误差                               |
-| dump_dir         | string | 否       | 前面指定dump_ir等开关后, 这里指定dump的目录, 默认为空字符串  |
-| benchmark_only   | bool   | 否       | 指定kmodel是否只用于benchmark, 默认为False                   |
+| letterbox_value  | float  | 否       | 指定前处理letterbox的填充值                                                                                                                           |
+| input_type       | string | 否       | 指定输入数据的类型, 默认为'float32'                                                                                                                   |
+| output_type      | string | 否       | 指定输出数据的类型, 如'float32', 'uint8'(仅用于指定量化情况下), 默认为'float32'                                                                       |
+| input_layout     | string | 否       | 指定输入数据的layout, 如'NCHW', 'NHWC'. 若输入数据layout与模型本身layout不同, nncase会插入transpose进行转换                                           |
+| output_layout    | string | 否       | 指定输出数据的layout, 如'NCHW', 'NHWC'. 若输出数据layout与模型本身layout不同, nncase会插入transpose进行转换                                           |
+| model_layout     | string | 否       | 指定模型的layout，默认为空，当tflite模型layout为‘NCHW’，Onnx和Caffe模型layout为‘NHWC’时需指定                                                     |
+| is_fpga          | bool   | 否       | 指定kmodel是否用于fpga, 默认为False                                                                                                                   |
+| dump_ir          | bool   | 否       | 指定是否dump IR, 默认为False                                                                                                                          |
+| dump_asm         | bool   | 否       | 指定是否dump asm汇编文件, 默认为False                                                                                                                 |
+| dump_quant_error | bool   | 否       | 指定是否dump量化前后的模型误差                                                                                                                        |
+| dump_dir         | string | 否       | 前面指定dump_ir等开关后, 这里指定dump的目录, 默认为空字符串                                                                                           |
+| benchmark_only   | bool   | 否       | 指定kmodel是否只用于benchmark, 默认为False                                                                                                            |
 
 > 1. mean和std为浮点数进行normalize的参数，用户可以自由指定.
 > 2. input range为浮点数的范围，即如果输入数据类型为uint8，则input range为反量化到浮点之后的范围（可以不为0~1），可以自由指定.
@@ -782,7 +773,6 @@ if __name__ == '__main__':
    cd kendryte-standalone-sdk
    export KENDRYTE_WORKSPACE=`pwd`
    ```
-
 2. 下载交叉编译工具链，并解压
 
    ```shell
@@ -791,26 +781,23 @@ if __name__ == '__main__':
    mkdir toolchain
    tar -xf kendryte-toolchain.tar.xz -C ./toolchain
    ```
-
 3. 更新runtime
 
    从 [Release](https://github.com/kendryte/nncase/releases) 页面下载 `k210-runtime.zip`。解压到 [kendryte-standalone-sdk](https://github.com/kendryte/kendryte-standalone-sdk) 's `lib/nncase/v1` 目录。
-
 4. 编译App
 
    ````shell
    # 1.将自己的App工程放在`$KENDRYTE_WORKSPACE/src`目录下
    # 例如，将[example的示例程序]($NNCASE_WORK_DIR/examples/facedetect_landmark/k210/facedetect_landmark_example)目录，拷贝到SDK的src目录下。
    cp -r $NNCASE_WORK_DIR/examples/facedetect_landmark/k210/facedetect_landmark_example $KENDRYTE_WORKSPACE/src/
-   
+
    # 2.cmake 编译App
    cd $KENDRYTE_WORKSPACE
    mkdir build
    cmake .. -DPROJ=facedetect_landmark_example -DTOOLCHAIN=$KENDRYTE_WORKSPACE/toolchain/kendryte-toolchain/bin && make
    ````
 
-   之后会在当前目录下生成`facedetect_landmark_example`和`facedetect_landmark_example.bin`
-
+   之后会在当前目录下生成 `facedetect_landmark_example`和 `facedetect_landmark_example.bin`
 5. 烧写App
 
    ```shell
@@ -823,8 +810,6 @@ if __name__ == '__main__':
 
    烧写过程缓慢，需要耐心等待。
 
-
-
 ## nncase 推理模型APIs
 
 除了编译模型APIs, nncase还提供了推理模型的APIs, 在PC上可推理前面编译生成的kmodel,  用来验证nncase推理结果和相应深度学习框架的runtime的结果是否一致等.
@@ -1257,144 +1242,4 @@ N/A
 
 ```python
 sim.run()
-```
-
-# ncc
-
-## 命令行
-
-```shell
-DESCRIPTION
-NNCASE model compiler and inference tool.
-
-SYNOPSIS
-    ncc compile -i <input format> -t <target>
-        <input file> [--input-prototxt <input prototxt>] <output file> [--output-arrays <output arrays>]
-        [--quant-type <quant type>] [--w-quant-type <w quant type>] [--use-mse-quant-w]
-        [--dataset <dataset path>] [--dataset-format <dataset format>] [--calibrate-method <calibrate method>]
-        [--preprocess] [--swapRB] [--mean <normalize mean>] [--std <normalize std>]
-        [--input-range <input range>] [--input-shape <input shape>] [--letterbox-value <letter box value>]
-        [--input-type <input type>] [--output-type <output type>]
-        [--input-layout <input layout>] [--output-layout <output layout>] [--tcu-num <tcu number>]
-        [--is-fpga] [--dump-ir] [--dump-asm] [--dump-quant-error] [--dump-import-op-range] [--dump-dir <dump directory>]
-        [--dump-range-dataset <dataset path>] [--dump-range-dataset-format <dataset format>] [--benchmark-only]
-
-    ncc infer <input file> <output path>
-        --dataset <dataset path> [--dataset-format <dataset format>]
-        [--input-layout <input layout>]
-
-    ncc [-v]
-
-OPTIONS
-  compile
-
-  -i, --input-format <input format>
-                          input format, e.g. tflite|onnx|caffe
-  -t, --target <target>   target architecture, e.g. cpu|k210|k510
-  <input file>            input file
-  --input-prototxt <input prototxt>
-                          input prototxt
-  <output file>           output file
-  --output-arrays <output arrays>
-                          output arrays
-  --quant-type <quant type>
-                          post trainning quantize type, e.g uint8|int8|int16, default is uint8
-  --w-quant-type <w quant type>
-                          post trainning weights quantize type, e.g uint8|int8|int16, default is uint8
-  --use-mse-quant-w       use min mse algorithm to refine weights quantilization or not, default is 0
-  --dataset <dataset path>
-                          calibration dataset, used in post quantization
-  --dataset-format <dataset format>
-                          datset format: e.g. image|raw, default is image
-  --dump-range-dataset <dataset path>
-                          dump import op range dataset
-  --dump-range-dataset-format <dataset format>
-                          datset format: e.g. image|raw, default is image
-  --calibrate-method <calibrate method>
-                          calibrate method: e.g. no_clip|l2|kld_m0|kld_m1|kld_m2|cdf, default is no_clip
-  --preprocess            enable preprocess, default is 0
-  --swapRB                swap red and blue channel, default is 0
-  --mean <normalize mean> normalize mean, default is 0. 0. 0.
-  --std <normalize std>   normalize std, default is 1. 1. 1.
-  --input-range <input range>
-                          float range after preprocess
-  --input-shape <input shape>
-                          shape for input data
-  --letterbox-value <letter box value>
-                          letter box pad value, default is 0.000000
-  --input-type <input type>
-                          input type, e.g float32|uint8|default, default is default
-  --output-type <output type>
-                          output type, e.g float32|uint8, default is float32
-  --input-layout <input layout>
-                          input layout, e.g NCHW|NHWC, default is NCHW
-  --output-layout <output layout>
-                          output layout, e.g NCHW|NHWC, default is NCHW
-  --tcu-num <tcu number>  tcu number, e.g 1|2|3|4, default is 0
-  --is-fpga               use fpga parameters, default is 0
-  --dump-ir               dump ir to .dot, default is 0
-  --dump-asm              dump assembly, default is 0
-  --dump-quant-error      dump quant error, default is 0
-  --dump-import-op-range  dump import op range, default is 0
-  --dump-dir <dump directory>
-                          dump to directory
-  --benchmark-only        compile kmodel only for benchmark use, default is 0
-
-  infer
-
-  <model filename>        kmodel filename
-  <output path>           output path
-  --dataset <dataset path>
-                          dataset path
-  --dataset-format <dataset format>
-                          dataset format, e.g. image|raw, default is image
-  --input-layout <input layout>
-                          input layout, e.g NCHW|NHWC, default is NCHW
-```
-
-## 描述
-
-`ncc` 是 nncase 的命令行工具。它有两个命令： `compile` 和 `infer`。
-
-`compile` 命令将你训练好的模型 (`.tflite`, `.caffemodel`, `.onnx`) 编译到 `.kmodel`。
-
-- `-i, --input-format` 用来指定输入模型的格式。nncase 现在支持 `tflite`、`caffe` 和 `onnx` 输入格式。
-- `-t, --target` 用来指定你想要你的模型在哪种目标设备上运行。`cpu` 几乎所有平台都支持的通用目标。`k210` 是 Kendryte K210 SoC 平台。如果你指定了 `k210`，这个模型就只能在 K210 运行或在你的 PC 上模拟运行。
-- `<input file>` 用于指定输入模型文件
-- `--input-prototxt`用于指定caffe模型的prototxt文件
-- `<output file>` 用于指定输出模型文件
-- `--output-arrays `用于指定输出结点的名称
-- `--quant-type` 用于指定数据的量化类型, 如 `uint8`/`int8`/`int16, 默认是`uint8
-- `--w-quant-type` 用于指定权重的量化类型, 如 `uint8`/`int8`/`int16, 默认是`uint8
-- `--use-mse-quant-w`指定是否使用最小化mse(mean-square error, 均方误差)算法来量化权重.
-- `--dataset` 用于提供量化校准集来量化你的模型。你需要从训练集中选择几百到上千个数据放到这个目录里。
-- `--dataset-format` 用于指定量化校准集的格式。默认是 `image`，nncase 将使用 `opencv` 读取你的图片，并自动缩放到你的模型输入需要的尺寸。如果你的输入有 3 个通道，ncc 会将你的图片转换为值域是 [0,1] 布局是 `NCHW` 的张量。如果你的输入只有 1 个通道，ncc 会灰度化你的图片。如果你的数据集不是图片（例如音频或者矩阵），把它设置为 `raw`。这种场景下你需要把你的数据集转换为 float 张量的二进制文件。
-- `--dump-range-dataset` 用于提供统计范围数据集来统计原始模型每个节点输出数据范围。你需要从训练集中选择几百到上千个数据放到这个目录里。
-- `--dump-range-dataset-format` 用于指定统计范围数据集的格式。默认是 `image`，nncase 将使用 `opencv` 读取你的图片，并自动缩放到你的模型输入需要的尺寸。如果你的输入有 3 个通道，ncc 会将你的图片转换为值域是 [0,1] 布局是 `NCHW` 的张量。如果你的输入只有 1 个通道，ncc 会灰度化你的图片。如果你的数据集不是图片（例如音频或者矩阵），把它设置为 `raw`。这种场景下你需要把你的数据集转换为 float 张量的二进制文件。
-- `--calibrate-method` 用于设置量化校准方法，它被用来选择最优的激活函数值域。默认值是 `no_clip`，ncc 会使用整个激活函数值域。如果你需要更好的量化结果，你可以使用 `l2`，但它需要花更长的时间寻找最优值域。
-- `--preprocess`指定是否预处理, 添加后表示开启预处理
-- `--swapRB`指定**预处理时**是否交换红和蓝两个通道数据, 用于实现RGB2BGR或BGR2RGB功能
-- `--mean`指定**预处理时**标准化参数均值,例如添加 `--mean "0.1 2.3 33.1f"`用于设置三个通道的均值.
-- `--std`指定**预处理时**标准化参数方差,例如添加 `--std "1. 2. 3."`用于设置三个通道的方差.
-- `--input-range`指定输入数据反量化后的数据范围,例如添加 `--input-range "0.1 2."`设置反量化的范围为 `[0.1~2]`.
-- `--input-shape`指定输入数据的形状. 若与模型的输入形状不同, 则预处理时会做resize/pad等处理, 例如添加 `--input-shape "1 1 28 28"`指明当前输入图像尺寸.
-- `--letterbox-value`用于指定预处理时pad填充的值.
-- `--input-type` 用于指定推理时输入的数据类型。如果 `--input-type` 是 `uint8`，推理时你需要提供 RGB888 uint8 张量。如果 `--input-type` 是 `float`，你则需要提供 RGB float 张量.
-- `--output-type` 用于指定推理时输出的数据类型。如 `float`/`uint8`,  `uint8`仅在量化模型时才有效. 默认是 `float`
-- `--input-layout`用于指定输入数据的layout. 若输入数据的layout与模型的layout不同, 预处理会添加transpose进行转换.
-- `--output-layout`用于指定输出数据的layout
-- `--tcu-num`用于指定tcu个数, 默认值为0, 表示不配置tcu个数.
-- `--is-fpga`指定编译后的kmodel是否运行在fpga上
-- `--dump-ir` 是一个调试选项。当它打开时 ncc 会在工作目录产生一些 `.dot` 文件。你可以使用 `Graphviz` 或 [Graphviz Online](https://dreampuf.github.io/GraphvizOnline) 来查看这些文件。
-- `--dump-asm` 是一个调试选项。当它打开时 ncc 会生成硬件指令文件compile.text.asm
-- `--dump-quant-error`是一个调试选项, 用于dump量化错误信息
-- `--dump-import-op-range`是一个调试选项, 用于dump import之后节点的数据范围，需要同时指定dump-range-dataset
-- `--dump-dir`是一个调试选项, 用于指定dump目录.
-- `--benchmark-only`是一个调试选项, 用于指定编译后的kmodel用于benchmark.
-
-`infer` 命令可以运行你的 kmodel，通常它被用来调试。ncc 会将你模型的输出张量按 `NCHW` 布局保存到 `.bin` 文件。
-
-- `<input file>` kmodel 的路径。
-- `<output path>` ncc 输出目录。
-- `--dataset` 测试集路径。
-- `--dataset-format`和 `--input-layout`同 `compile` 命令中的含义。
+```
\ No newline at end of file
diff --git a/examples/user_guide/README.md b/examples/user_guide/README.md
new file mode 100644
index 0000000000..34aaf7744a
--- /dev/null
+++ b/examples/user_guide/README.md
@@ -0,0 +1,15 @@
+模型编译推理参考Jupyter脚本：[User_guide](./simulate.ipynb)，脚本中包含了单输入和多输入的示例。也可以使用单独的编译脚本 [Single build](../../docs/USAGE_ZH.md#编译模型示例)完成kmodel的编译。
+
+如果在Docker中运行Jupyter脚本，可以参考[配置Jupyter lab](https://github.com/kunjing96/docker-jupyterlab#32-%E9%85%8D%E7%BD%AEjupyter-lab)进行配置。
+
+在执行脚本之前需要根据自身需求修改以下内容：
+
+1. `compile_kmodel`函数中 `compile_options`,`ptq_options`相关信息
+   `compile_options`详细信息见[CompileOptions](../../docs/USAGE_ZH.md#CompileOptions)
+   `ptq_options`详细信息见[PTQTensorOptions](../../docs/USAGE_ZH.md#PTQTensorOptions)
+2. `compile kmodel single input(multiple inputs)`部分
+   修改 `model_path`和 `dump_path`，用于指定模型路径和编译期间文件生成路径。
+   修改 `calib_data`的实现，数据格式见注释。
+3. `run kmodel(simulate)`部分，修改 `input_data`的实现，数据格式见注释。
+
+推理结束后，会在 `dump_path`路径下生成 `kmodel`、输出结果和编译期间的文件。
\ No newline at end of file
diff --git a/examples/user_guide/k210_simulate.ipynb b/examples/user_guide/simulate.ipynb
similarity index 61%
rename from examples/user_guide/k210_simulate.ipynb
rename to examples/user_guide/simulate.ipynb
index 8269bd2080..6a9a041eaa 100644
--- a/examples/user_guide/k210_simulate.ipynb
+++ b/examples/user_guide/simulate.ipynb
@@ -2,37 +2,13 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "82a8f9c1-c2bf-4270-9f1f-ac25c9fdd898",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Requirement already satisfied: pip in /usr/local/lib/python3.8/site-packages (23.2.1)\n",
-      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
-      "\u001b[0mCollecting nncase==1.9.0.20230322\n",
-      "  Downloading nncase-1.9.0.20230322-cp38-cp38-manylinux_2_24_x86_64.whl (10.0 MB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.0/10.0 MB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m0m\n",
-      "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.8/site-packages (from nncase==1.9.0.20230322) (1.23.5)\n",
-      "Installing collected packages: nncase\n",
-      "  Attempting uninstall: nncase\n",
-      "    Found existing installation: nncase 2.1.0.20230703\n",
-      "    Uninstalling nncase-2.1.0.20230703:\n",
-      "      Successfully uninstalled nncase-2.1.0.20230703\n",
-      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
-      "nncase-kpu 2.1.0.20230703 requires nncase>=2.1, but you have nncase 1.9.0.20230322 which is incompatible.\u001b[0m\u001b[31m\n",
-      "\u001b[0mSuccessfully installed nncase-1.9.0.20230322\n",
-      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
-      "\u001b[0m"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "!pip install --upgrade pip\n",
-    "# !pip uninstall -y nncase\n",
-    "# !pip install nncase==1.8.0.20220929 --timeout=1000\n",
+    "#!pip uninstall -y nncase\n",
     "!pip install nncase==1.9.0.20230322 --timeout=1000\n",
     "#from versions: 1.0.0.20211029, 1.1.0.20211203, 1.3.0.20220127, 1.4.0.20220303, 1.5.0.20220331, \n",
     "# 1.6.0.20220505, 1.7.0.20220530, 1.7.1.20220701, 1.8.0.20220929, 1.9.0.20230322, 2.0.0.20230602, 2.1.0.20230703)"
@@ -40,26 +16,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "44f95a15-936d-46f7-8740-9f7432a3231c",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "env: PATH=$PATH:/usr/local/lib/python3.8/site-packages\n"
-     ]
-    }
-   ],
-   "source": [
-    "# export nncase lib path into PATH\n",
-    "%env PATH=$PATH:/usr/local/lib/python3.8/site-packages"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "a7eff82e-295c-4cce-afbc-ce64c84dc40a",
    "metadata": {},
    "outputs": [],
@@ -84,7 +41,7 @@
     "    \n",
     "    # compile_options\n",
     "    compile_options = nncase.CompileOptions()\n",
-    "    compile_options.target = \"k210\" # \"cpu\"\n",
+    "    compile_options.target = \"k210\" # \"cpu\" \"k510\"\n",
     "    compile_options.dump_ir = True  # if False, will not dump the compile-time result.\n",
     "    compile_options.dump_asm = True\n",
     "    compile_options.dump_dir = dump_path\n",
@@ -100,6 +57,7 @@
     "        compile_options.std = [1,1,1]\n",
     "        compile_options.input_layout = \"NHWC\" # \"NHWC\"\n",
     "        compile_options.output_layout = \"NHWC\" # \"NHWC\"\n",
+    "        compile_options.letterbox_value = 0\n",
     "    \n",
     "    # quant args\n",
     "    compile_options.quant_type = \"uint8\" \n",
@@ -110,10 +68,8 @@
     "    # quant options\n",
     "    ptq_options = nncase.PTQTensorOptions()\n",
     "    ptq_options.calibrate_method = \"no_clip\" # \"kld_m2\" \"l2\" \"cdf\"\n",
-    "    ptq_options.samples_count = 80\n",
-    "    # print(len(calib_data[0]))\n",
-    "    # ptq_options.set_tensor_data(np.array(calib_data).tobytes())\n",
-    "    ptq_options.set_tensor_data(calib_data[:].tobytes())\n",
+    "    ptq_options.samples_count = len(calib_data[0])\n",
+    "    ptq_options.set_tensor_data(np.array(calib_data).tobytes())\n",
     "\n",
     "    \n",
     "    # set options\n",
@@ -146,16 +102,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# compile kmodel\n",
+    "# compile kmodel single input\n",
     "model_path = \"./model_f32.tflite\"\n",
-    "dump_path = \"./tmp_tflite\"\n",
+    "dump_path = \"./tmp\"\n",
     "\n",
     "# If model has multi inputs, calib_data format is \"[[x1, x2,...], [y1, y2,...], ...]\"\n",
     "# e.g. Model has three inputs (x, y, z), the calib_data is '[[x1, x2, x3],[y1, y2, y3],[z1, z2, z3]]'\n",
     "\n",
     "calib_data = [[np.random.rand(1,224,320,3).astype(np.float32), np.random.rand(1,224,320,3).astype(np.float32)]]\n",
-    "# calib_data = np.load(\"./qual_data.npy\")\n",
-    "# print(calib_data.shape)\n",
     "kmodel_path = compile_kmodel(model_path, dump_path, calib_data)\n"
    ]
   },
@@ -167,13 +121,56 @@
    "outputs": [],
    "source": [
     "# run kmodel(simulate)\n",
-    "kmodel_path = \"./tmp_tflite/test.kmodel\"\n",
-    "# input_data = [np.random.rand(1,224,320,3).astype(np.float32)]\n",
-    "input_data = [np.load(\"./qual_data.npy\")[:1]]\n",
+    "kmodel_path = \"./tmp/test.kmodel\"\n",
+    "input_data = [np.random.rand(1,224,320,3).astype(np.float32)]\n",
+    "\n",
+    "result = run_kmodel(kmodel_path, input_data)\n",
+    "for idx, i in enumerate(result):\n",
+    "    print(i.shape)\n",
+    "    i.tofile(os.path.join(dump_path, \"nncase_result_{}.bin\".format(idx)))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "89280d3a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# compile kmodel multiple inputs\n",
+    "model_path = \"./decoder_100.onnx\"\n",
+    "dump_path = \"./tmp_dec\"\n",
+    "\n",
+    "# If model has multiple inputs, calib_data format is \"[[x1, x2,...], [y1, y2,...], ...]\"\n",
+    "# e.g. Model has three inputs (x, y, z), the calib_data is '[[x1, x2, x3],[y1, y2, y3],[z1, z2, z3]]'\n",
+    "\n",
+    "calib_data = [[np.random.randint(1, 5, size=[3, 100], dtype='int64'), np.random.randint(1, 5, size=[3, 100], dtype='int64')],\n",
+    "              [np.random.rand(100, 3, 192).astype(np.float32), np.random.rand(100, 3, 192).astype(np.float32)],\n",
+    "              [np.random.rand(3, 100).astype(np.float32) > 0.5, np.random.rand(3, 100).astype(np.float32) > 0.5], ]  # bool\n",
+    "\n",
+    "kmodel_path = compile_kmodel(model_path, dump_path, calib_data)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22a25a7f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# run kmodel(simulate)\n",
+    "import os\n",
+    "\n",
+    "kmodel_path = \"./tmp_dec/test.kmodel\"\n",
+    "input_data = [np.random.randint(1, 5, size=[3, 100], dtype='int64'),\n",
+    "              np.random.rand(100, 3, 192).astype(np.float32),\n",
+    "              np.random.rand(3, 100).astype(np.float32) > 0.5, ]\n",
     "\n",
     "result = run_kmodel(kmodel_path, input_data)\n",
+    "\n",
     "for idx, i in enumerate(result):\n",
-    "    print(i.shape)"
+    "    print(i.shape)\n",
+    "    i.tofile(os.path.join(dump_path, \"nncase_result_{}.bin\".format(idx)))\n"
    ]
   }
  ],