From 4f3c466b92151253ff707c6c97661aae6baf2001 Mon Sep 17 00:00:00 2001 From: Curio Yang Date: Fri, 28 Jul 2023 09:26:40 +0800 Subject: [PATCH] GNNE-1886 210_docs and script (#1026) * 1. add k210 pipeline docs in USAGE.md 2. add simulate.ipynb for compile and inference * Indicate available nncase versions * format * fix ci --------- Co-authored-by: yanghaoqi --- docs/USAGE_EN.md | 209 +++++------------- docs/USAGE_ZH.md | 256 +++++++---------------- examples/user_guide/README.md | 15 ++ examples/user_guide/nncase_base_func.py | 95 +++++++++ examples/user_guide/simulate.ipynb | 198 ++++++++++++++++++ tests/importer/onnx_/basic/test_slice.py | 1 - 6 files changed, 436 insertions(+), 338 deletions(-) create mode 100644 examples/user_guide/README.md create mode 100644 examples/user_guide/nncase_base_func.py create mode 100644 examples/user_guide/simulate.ipynb diff --git a/docs/USAGE_EN.md b/docs/USAGE_EN.md index 3e634b7bbc..b8af24687a 100644 --- a/docs/USAGE_EN.md +++ b/docs/USAGE_EN.md @@ -2,14 +2,17 @@ # Overview -nncase provides both python wheel package and ncc client to compile your neural models. +nncase provides python wheel package to compile your neural models. The current documentation only works for nncase-v1. The available version are shown below. -- nncase wheel package can be downloaded at [nncase release](https://github.com/kendryte/nncase/releases) -- For ncc client, you should git clone nncase repository and then build it by yourself. +``` +1.0.0.20211029, 1.1.0.20211203, 1.3.0.20220127, 1.4.0.20220303, 1.5.0.20220331, 1.6.0.20220505, 1.7.0.20220530, 1.7.1.20220701, 1.8.0.20220929, 1.9.0.20230322 +``` + +- nncase wheel package can be downloaded at [nncase release](https://github.com/kendryte/nncase/releases). # nncase python APIs -nncase provides Python APIs to compile neural network model and inference on your PC. +nncase provides Python APIs to compile neural network model and inference on x86_64 and amd64 platforms. ## Installation @@ -27,8 +30,6 @@ $ docker pull registry.cn-hangzhou.aliyuncs.com/kendryte/nncase:latest $ docker run -it --rm -v `pwd`:/mnt -w /mnt registry.cn-hangzhou.aliyuncs.com/kendryte/nncase:latest /bin/bash -c "/bin/bash" ``` - - ### cpu/K210 - Download nncase wheel package and then install it. @@ -39,8 +40,6 @@ root@2b11cc15c7f8:/mnt# wget -P x86_64 https://github.com/kendryte/nncase/releas root@2b11cc15c7f8:/mnt# pip3 install x86_64/*.whl ``` - - ### K510 - Download both nncase and nncase_k510 wheel packages and then install them. @@ -53,8 +52,6 @@ root@2b11cc15c7f8:/mnt# wget -P x86_64 https://github.com/kendryte/nncase/releas root@2b11cc15c7f8:/mnt# pip3 install x86_64/*.whl ``` - - ### Check nncase version ```python @@ -67,8 +64,6 @@ Type "help", "copyright", "credits" or "license" for more information. 1.8.0-55be52f ``` - - ## nncase compile model APIs ### CompileOptions @@ -116,13 +111,13 @@ The details of all attributes are following. | quant_type | string | N | Specify the quantization type for input data , such as 'uint8', 'int8', 'int16' | | w_quant_type | string | N | Specify the quantization type for weight , such as 'uint8'(by default), 'int8', 'int16' | | use_mse_quant_w | bool | N | Specify whether use mean-square error when quantizing weight | -| split_w_to_act | bool | N | Specify whether split weight into activation | +| split_w_to_act | bool | N | Specify whether split weight into activation | | preprocess | bool | N | Whether enable preprocess, False by default | | swapRB | bool | N | Whether swap red and blue channel for RGB data(from RGB to BGR or from BGR to RGB), False by default | | mean | list | N | Normalize mean value for preprocess, [0, 0, 0] by default | | std | list | N | Normalize std value for preprocess, [1, 1, 1] by default | | input_range | list | N | The float range for dequantized input data, [0,1] by default | -| output_range | list | N | The float range for quantized output data, [ ] by default | +| output_range | list | N | The float range for quantized output data, [ ] by default | | input_shape | list | N | Specify the shape of input data. input_shape should be consistent with input _layout. There will be letterbox operations(Such as resize/pad) if input_shape is not the same as input shape of model. | | letterbox_value | float | N | Specify the pad value of letterbox during preprocess. | | input_type | string | N | Specify the data type of input data, 'float32' by default. | @@ -767,10 +762,50 @@ if __name__ == '__main__': ## Deploy nncase runtime -### K210 +### Inference on K210 development board + +1. Download [SDK](https://github.com/kendryte/kendryte-standalone-sdk) + + ```shell + $ git clone https://github.com/kendryte/kendryte-standalone-sdk.git + $ cd kendryte-standalone-sdk + $ export KENDRYTE_WORKSPACE=`pwd` + ``` +2. Download the cross-compile toolchain and extract it -1. Download `k210-runtime.zip` from [Release](https://github.com/kendryte/nncase/releases) page. -2. Unzip to your [kendryte-standalone-sdk](https://github.com/kendryte/kendryte-standalone-sdk) 's `lib/nncase/v1` directory. + ```shell + $ wget https://github.com/kendryte/kendryte-gnu-toolchain/releases/download/v8.2.0-20190409/kendryte-toolchain-ubuntu-amd64-8.2.0-20190409.tar.xz -O $KENDRYTE_WORKSPACE/kendryte-toolchain.tar.xz + $ cd $KENDRYTE_WORKSPACE + $ mkdir toolchain + $ tar -xf kendryte-toolchain.tar.xz -C ./toolchain + ``` +3. Update nncase runtime + + Download `k210-runtime.zip` from [Release](https://github.com/kendryte/nncase/releases) and extract it into [kendryte-standalone-sdk](https://github.com/kendryte/kendryte-standalone-sdk) 's `lib/nncase/v1`. +4. Compile App + + ```shell + # 1.copy your programe into `$KENDRYTE_WORKSPACE/src` + # e.g. copy ($NNCASE_WORK_DIR/examples/facedetect_landmark/k210/facedetect_landmark_example) into PATH_TO_SDK/src. + $ cp -r $NNCASE_WORK_DIR/examples/facedetect_landmark/k210/facedetect_landmark_example $KENDRYTE_WORKSPACE/src/ + + # 2. compile + $ cd $KENDRYTE_WORKSPACE + $ mkdir build + $ cmake .. -DPROJ=facedetect_landmark_example -DTOOLCHAIN=$KENDRYTE_WORKSPACE/toolchain/kendryte-toolchain/bin && make + ``` + + `facedetect_landmark_example` and `FaceDETECt_landmark_example.bin` will be generated. +5. Write the program to the K210 development board + + ```shell + # 1. Check available USB ports + $ ls /dev/ttyUSB* + # /dev/ttyUSB0 /dev/ttyUSB1 + + # 2. Write your App by kflash + $ kflash -p /dev/ttyUSB0 -t facedetect_landmark_example.bin + ``` ## nncase inference APIs @@ -1201,143 +1236,3 @@ N/A ```python sim.run() ``` - -# ncc - -## Comannd line - -```shell -DESCRIPTION -NNCASE model compiler and inference tool. - -SYNOPSIS - ncc compile -i -t - [--input-prototxt ] [--output-arrays ] - [--quant-type ] [--w-quant-type ] [--use-mse-quant-w] - [--dataset ] [--dataset-format ] [--calibrate-method ] - [--preprocess] [--swapRB] [--mean ] [--std ] - [--input-range ] [--input-shape ] [--letterbox-value ] - [--input-type ] [--output-type ] - [--input-layout ] [--output-layout ] [--tcu-num ] - [--is-fpga] [--dump-ir] [--dump-asm] [--dump-quant-error] [--dump-import-op-range] [--dump-dir ] - [--dump-range-dataset ] [--dump-range-dataset-format ] [--benchmark-only] - - ncc infer - --dataset [--dataset-format ] - [--input-layout ] - - ncc [-v] - -OPTIONS - compile - - -i, --input-format - input format, e.g. tflite|onnx|caffe - -t, --target target architecture, e.g. cpu|k210|k510 - input file - --input-prototxt - input prototxt - output file - --output-arrays - output arrays - --quant-type - post trainning quantize type, e.g uint8|int8|int16, default is uint8 - --w-quant-type - post trainning weights quantize type, e.g uint8|int8|int16, default is uint8 - --use-mse-quant-w use min mse algorithm to refine weights quantilization or not, default is 0 - --dataset - calibration dataset, used in post quantization - --dataset-format - datset format: e.g. image|raw, default is image - --dump-range-dataset - dump import op range dataset - --dump-range-dataset-format - datset format: e.g. image|raw, default is image - --calibrate-method - calibrate method: e.g. no_clip|l2|kld_m0|kld_m1|kld_m2|cdf, default is no_clip - --preprocess enable preprocess, default is 0 - --swapRB swap red and blue channel, default is 0 - --mean normalize mean, default is 0. 0. 0. - --std normalize std, default is 1. 1. 1. - --input-range - float range after preprocess - --input-shape - shape for input data - --letterbox-value - letter box pad value, default is 0.000000 - --input-type - input type, e.g float32|uint8|default, default is default - --output-type - output type, e.g float32|uint8, default is float32 - --input-layout - input layout, e.g NCHW|NHWC, default is NCHW - --output-layout - output layout, e.g NCHW|NHWC, default is NCHW - --tcu-num tcu number, e.g 1|2|3|4, default is 0 - --is-fpga use fpga parameters, default is 0 - --dump-ir dump ir to .dot, default is 0 - --dump-asm dump assembly, default is 0 - --dump-quant-error dump quant error, default is 0 - --dump-import-op-range dump import op range, default is 0 - --dump-dir - dump to directory - --benchmark-only compile kmodel only for benchmark use, default is 0 - - infer - - kmodel filename - output path - --dataset - dataset path - --dataset-format - dataset format, e.g. image|raw, default is image - --input-layout - input layout, e.g NCHW|NHWC, default is NCHW -``` - -## Description - -`ncc` is the nncase command line tool. It has two commands: `compile` and `infer`. - -`compile` command compile your trained models (`.tflite`, `.caffemodel`, `.onnx`) to `.kmodel`. - -- `-i, --input-format` option is used to specify the input model format. nncase supports `tflite`, `caffe` and `onnx` input model currently. -- `-t, --target` option is used to set your desired target device to run the model. `cpu` is the most general target that almost every platform should support. `k210` is the Kendryte K210 SoC platform. If you set this option to `k210`, this model can only run on K210 or be emulated on your PC. -- `` is your input model path. -- `--input-prototxt` is the prototxt file for caffe model. -- `` is the output model path. -- `--output-arrays` is the names of nodes to output. -- `--quant-type` is used to specify quantize type, such as `uint8` by default and `int8` and `int16`. -- `--w-quant-type` is used to specify quantize type for weight, such as `uint8` by default and `int8 `and `int16`. -- `--use-mse-quant-w ` is used to specify whether use minimize mse(mean-square error, mse) algorithm to quantize weight or not. -- `--dataset` is to provide your quantization calibration dataset to quantize your models. You should put hundreds or thousands of data in training set to this directory. -- `--dataset-format` is to set the format of the calibration dataset. Default is `image`, nncase will use `opencv` to read your images and autoscale to the desired input size of your model. If the input has 3 channels, ncc will convert images to RGB float tensors [0,1] in `NCHW` layout. If the input has only 1 channel, ncc will grayscale your images. Set to `raw` if your dataset is not image dataset for example, audio or matrices. In this scenario you should convert your dataset to raw binaries which contains float tensors. -- `--dump-range-dataset` is to provide your dump range dataset to dump each op data range of your models. You should put hundreds or thousands of data in training set to this directory. -- `--dump-range-dataset-format` is to set the format of the dump range dataset. Default is `image`, nncase will use `opencv` to read your images and autoscale to the desired input size of your model. If the input has 3 channels, ncc will convert images to RGB float tensors [0,1] in `NCHW` layout. If the input has only 1 channel, ncc will grayscale your images. Set to `raw` if your dataset is not image dataset for example, audio or matrices. In this scenario you should convert your dataset to raw binaries which contains float tensors. -- `--calibrate-method` is to set your desired calibration method, which is used to select the optimal activation ranges. The default is `no_clip` in that ncc will use the full range of activations. If you want a better quantization result, you can use `l2` but it will take a longer time to find the optimal ranges. -- `--preprocess ` is used specify whether enable preprocessing or not. -- `--swapRB ` is used specify whether swap red and blue channel or not. You can use this flag to implement RGB2BGR or BGR2RGB feature. -- `--mean` is the mean values to be subtracted during preprocessing. -- `--std` is the std values to be divided during preprocessing. -- `--input-range` is the input range in float after dequantization. -- `--input-shape` is used to specify the shape of input data. If the input shape is different from the input shape of your model, the preprocess will add resize/pad ops automatically for the transformation. -- `--letterbox-value` is used to specify the pad values when pad is added during preprocessing. -- `--input-type` is to set your desired input data type when do inference. If `--input-type` is `uint8`, for example you should provide RGB888 uint8 tensors when you do inference. If `--input-type` is `float`, you should provide RGB float tensors instead. -- `--output-type` is the type of output data. -- `--input-layout` is the layout of input data. -- `--output-layout` is the layout of output data. -- `--tcu-num` is used to configure the number of TCU. 0 means do not configure the number of TCU. -- `--is-fpga` is a debug option. It is used to specify whether the kmodel run on fpga or not. -- `--dump-ir` is a debug option. It is used to specify whether dump IR or not. -- `--dump-asm` is a debug option. It is used to specify whether dump asm file or not. -- `--dump-quant-error` is a debug option. It is used to specify whether dump quantization error information or not. -- `--dump-import-op-range` is a debug option. It is used to specify whether dump imported op data range or not, need to also specify dump-range-dataset if enabled. -- `--dump-dir` is used to specify dump directory. -- `--benchmark-only` is used to specify whether the kmodel is used for benchmark or not. - -`infer` command can run your kmodel, and it's often used as debug purpose. ncc will save the model's output tensors to `.bin` files in `NCHW` layout. - -- `` is your kmodel path. -- `` is the output directory ncc will produce to. -- `--dataset` is the test set directory. -- `--dataset-format` and `--input-layout` have the same meaning as in `compile` command. diff --git a/docs/USAGE_ZH.md b/docs/USAGE_ZH.md index 40cefe67b2..0293f3eb5c 100644 --- a/docs/USAGE_ZH.md +++ b/docs/USAGE_ZH.md @@ -1,13 +1,16 @@ # 概述 -nncase目前提供了python wheel包和ncc客户端两种方法编译模型. +nncase目前提供了python wheel包编译模型。当前文档仅适用于nncase-v1,适用于以下版本号: + +``` +1.0.0.20211029, 1.1.0.20211203, 1.3.0.20220127, 1.4.0.20220303, 1.5.0.20220331, 1.6.0.20220505, 1.7.0.20220530, 1.7.1.20220701, 1.8.0.20220929, 1.9.0.20230322 +``` - nncase wheel包需要去[nncase release](https://github.com/kendryte/nncase/releases)获取 -- ncc客户端需要用户下载并编译nncase # nncase python APIs -nncase提供了Python APIs, 用于在PC上编译/推理深度学习模型. +nncase提供了Python APIs, 用于在x86_64和amd64平台上编译/推理深度学习模型. ## 安装 @@ -25,8 +28,6 @@ $ docker pull registry.cn-hangzhou.aliyuncs.com/kendryte/nncase:latest $ docker run -it --rm -v `pwd`:/mnt -w /mnt registry.cn-hangzhou.aliyuncs.com/kendryte/nncase:latest /bin/bash -c "/bin/bash" ``` - - ### cpu/K210 - 下载nncase wheel包, 直接安装即可. @@ -37,8 +38,6 @@ root@2b11cc15c7f8:/mnt# wget -P x86_64 https://github.com/kendryte/nncase/releas root@2b11cc15c7f8:/mnt# pip3 install x86_64/*.whl ``` - - ### K510 - 分别下载nncase和nncase_k510插件包,再一起安装 @@ -51,8 +50,6 @@ root@2b11cc15c7f8:/mnt# wget -P x86_64 https://github.com/kendryte/nncase/releas root@2b11cc15c7f8:/mnt# pip3 install x86_64/*.whl ``` - - ### 查看版本信息 ```python @@ -65,8 +62,6 @@ Type "help", "copyright", "credits" or "license" for more information. 1.8.0-55be52f ``` - - ## nncase 编译模型APIs ### CompileOptions @@ -108,32 +103,32 @@ py::class_(m, "CompileOptions") 各属性说明如下 -| 属性名称 | 类型 | 是否必须 | 描述 | -| ---------------- | ------ | -------- | ------------------------------------------------------------ | -| target | string | 是 | 指定编译目标, 如'k210', 'k510' | -| quant_type | string | 否 | 指定数据量化类型, 如'uint8', 'int8', 'int16' | -| w_quant_type | string | 否 | 指定权重量化类型, 如'uint8', 'int8', 'int16', 默认为'uint8' | -| use_mse_quant_w | bool | 否 | 指定权重量化时是否使用最小化均方误差(mean-square error, MSE)算法优化量化参数 | -| split_w_to_act | bool | 否 | 指定是否将权重数据平衡到激活数据中 | -| preprocess | bool | 否 | 是否开启前处理,默认为False | -| swapRB | bool | 否 | 是否交换RGB输入数据的红和蓝两个通道(RGB-->BGR或者BGR-->RGB),默认为False | -| mean | list | 否 | 前处理标准化参数均值,默认为[0, 0, 0] | -| std | list | 否 | 前处理标准化参数方差,默认为[1, 1, 1] | -| input_range | list | 否 | 输入数据反量化后对应浮点数的范围,默认为[0,1] | -| output_range | list | 否 | 输出定点数据前对应浮点数的范围,默认为空,使用模型实际浮点输出范围 | +| 属性名称 | 类型 | 是否必须 | 描述 | +| ---------------- | ------ | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | +| target | string | 是 | 指定编译目标, 如'k210', 'k510' | +| quant_type | string | 否 | 指定数据量化类型, 如'uint8', 'int8', 'int16' | +| w_quant_type | string | 否 | 指定权重量化类型, 如'uint8', 'int8', 'int16', 默认为'uint8' | +| use_mse_quant_w | bool | 否 | 指定权重量化时是否使用最小化均方误差(mean-square error, MSE)算法优化量化参数 | +| split_w_to_act | bool | 否 | 指定是否将权重数据平衡到激活数据中 | +| preprocess | bool | 否 | 是否开启前处理,默认为False | +| swapRB | bool | 否 | 是否交换RGB输入数据的红和蓝两个通道(RGB-->BGR或者BGR-->RGB),默认为False | +| mean | list | 否 | 前处理标准化参数均值,默认为[0, 0, 0] | +| std | list | 否 | 前处理标准化参数方差,默认为[1, 1, 1] | +| input_range | list | 否 | 输入数据反量化后对应浮点数的范围,默认为[0,1] | +| output_range | list | 否 | 输出定点数据前对应浮点数的范围,默认为空,使用模型实际浮点输出范围 | | input_shape | list | 否 | 指定输入数据的shape,input_shape的layout需要与input layout保持一致,输入数据的input_shape与模型的input shape不一致时会进行letterbox操作(resize/pad等) | -| letterbox_value | float | 否 | 指定前处理letterbox的填充值 | -| input_type | string | 否 | 指定输入数据的类型, 默认为'float32' | -| output_type | string | 否 | 指定输出数据的类型, 如'float32', 'uint8'(仅用于指定量化情况下), 默认为'float32' | -| input_layout | string | 否 | 指定输入数据的layout, 如'NCHW', 'NHWC'. 若输入数据layout与模型本身layout不同, nncase会插入transpose进行转换 | -| output_layout | string | 否 | 指定输出数据的layout, 如'NCHW', 'NHWC'. 若输出数据layout与模型本身layout不同, nncase会插入transpose进行转换 | -| model_layout | string | 否 | 指定模型的layout,默认为空,当tflite模型layout为‘NCHW’,Onnx和Caffe模型layout为‘NHWC’时需指定 | -| is_fpga | bool | 否 | 指定kmodel是否用于fpga, 默认为False | -| dump_ir | bool | 否 | 指定是否dump IR, 默认为False | -| dump_asm | bool | 否 | 指定是否dump asm汇编文件, 默认为False | -| dump_quant_error | bool | 否 | 指定是否dump量化前后的模型误差 | -| dump_dir | string | 否 | 前面指定dump_ir等开关后, 这里指定dump的目录, 默认为空字符串 | -| benchmark_only | bool | 否 | 指定kmodel是否只用于benchmark, 默认为False | +| letterbox_value | float | 否 | 指定前处理letterbox的填充值 | +| input_type | string | 否 | 指定输入数据的类型, 默认为'float32' | +| output_type | string | 否 | 指定输出数据的类型, 如'float32', 'uint8'(仅用于指定量化情况下), 默认为'float32' | +| input_layout | string | 否 | 指定输入数据的layout, 如'NCHW', 'NHWC'. 若输入数据layout与模型本身layout不同, nncase会插入transpose进行转换 | +| output_layout | string | 否 | 指定输出数据的layout, 如'NCHW', 'NHWC'. 若输出数据layout与模型本身layout不同, nncase会插入transpose进行转换 | +| model_layout | string | 否 | 指定模型的layout,默认为空,当tflite模型layout为‘NCHW’,Onnx和Caffe模型layout为‘NHWC’时需指定 | +| is_fpga | bool | 否 | 指定kmodel是否用于fpga, 默认为False | +| dump_ir | bool | 否 | 指定是否dump IR, 默认为False | +| dump_asm | bool | 否 | 指定是否dump asm汇编文件, 默认为False | +| dump_quant_error | bool | 否 | 指定是否dump量化前后的模型误差 | +| dump_dir | string | 否 | 前面指定dump_ir等开关后, 这里指定dump的目录, 默认为空字符串 | +| benchmark_only | bool | 否 | 指定kmodel是否只用于benchmark, 默认为False | > 1. mean和std为浮点数进行normalize的参数,用户可以自由指定. > 2. input range为浮点数的范围,即如果输入数据类型为uint8,则input range为反量化到浮点之后的范围(可以不为0~1),可以自由指定. @@ -769,10 +764,51 @@ if __name__ == '__main__': ## 部署 nncase runtime -### K210 +### K210上板推理流程 + +1. 下载官方[SDK](https://github.com/kendryte/kendryte-standalone-sdk) + + ```shell + git clone https://github.com/kendryte/kendryte-standalone-sdk.git + cd kendryte-standalone-sdk + export KENDRYTE_WORKSPACE=`pwd` + ``` +2. 下载交叉编译工具链,并解压 -1. 从 [Release](https://github.com/kendryte/nncase/releases) 页面下载 `k210-runtime.zip`。 -2. 解压到 [kendryte-standalone-sdk](https://github.com/kendryte/kendryte-standalone-sdk) 's `lib/nncase/v1` 目录。 + ```shell + wget https://github.com/kendryte/kendryte-gnu-toolchain/releases/download/v8.2.0-20190409/kendryte-toolchain-ubuntu-amd64-8.2.0-20190409.tar.xz -O $KENDRYTE_WORKSPACE/kendryte-toolchain.tar.xz + cd $KENDRYTE_WORKSPACE + mkdir toolchain + tar -xf kendryte-toolchain.tar.xz -C ./toolchain + ``` +3. 更新runtime + + 从 [Release](https://github.com/kendryte/nncase/releases) 页面下载 `k210-runtime.zip`。解压到 [kendryte-standalone-sdk](https://github.com/kendryte/kendryte-standalone-sdk) 's `lib/nncase/v1` 目录。 +4. 编译App + + ````shell + # 1.将自己的App工程放在`$KENDRYTE_WORKSPACE/src`目录下 + # 例如,将[example的示例程序]($NNCASE_WORK_DIR/examples/facedetect_landmark/k210/facedetect_landmark_example)目录,拷贝到SDK的src目录下。 + cp -r $NNCASE_WORK_DIR/examples/facedetect_landmark/k210/facedetect_landmark_example $KENDRYTE_WORKSPACE/src/ + + # 2.cmake 编译App + cd $KENDRYTE_WORKSPACE + mkdir build + cmake .. -DPROJ=facedetect_landmark_example -DTOOLCHAIN=$KENDRYTE_WORKSPACE/toolchain/kendryte-toolchain/bin && make + ```` + + 之后会在当前目录下生成 `facedetect_landmark_example`和 `facedetect_landmark_example.bin` +5. 烧写App + + ```shell + # 1. 检查可用的USB端口 + ls /dev/ttyUSB* + # > /dev/ttyUSB0 /dev/ttyUSB1 + # 2. 使用kflash进行烧录 + kflash -p /dev/ttyUSB0 -t facedetect_landmark_example.bin + ``` + + 烧写过程缓慢,需要耐心等待。 ## nncase 推理模型APIs @@ -1206,144 +1242,4 @@ N/A ```python sim.run() -``` - -# ncc - -## 命令行 - -```shell -DESCRIPTION -NNCASE model compiler and inference tool. - -SYNOPSIS - ncc compile -i -t - [--input-prototxt ] [--output-arrays ] - [--quant-type ] [--w-quant-type ] [--use-mse-quant-w] - [--dataset ] [--dataset-format ] [--calibrate-method ] - [--preprocess] [--swapRB] [--mean ] [--std ] - [--input-range ] [--input-shape ] [--letterbox-value ] - [--input-type ] [--output-type ] - [--input-layout ] [--output-layout ] [--tcu-num ] - [--is-fpga] [--dump-ir] [--dump-asm] [--dump-quant-error] [--dump-import-op-range] [--dump-dir ] - [--dump-range-dataset ] [--dump-range-dataset-format ] [--benchmark-only] - - ncc infer - --dataset [--dataset-format ] - [--input-layout ] - - ncc [-v] - -OPTIONS - compile - - -i, --input-format - input format, e.g. tflite|onnx|caffe - -t, --target target architecture, e.g. cpu|k210|k510 - input file - --input-prototxt - input prototxt - output file - --output-arrays - output arrays - --quant-type - post trainning quantize type, e.g uint8|int8|int16, default is uint8 - --w-quant-type - post trainning weights quantize type, e.g uint8|int8|int16, default is uint8 - --use-mse-quant-w use min mse algorithm to refine weights quantilization or not, default is 0 - --dataset - calibration dataset, used in post quantization - --dataset-format - datset format: e.g. image|raw, default is image - --dump-range-dataset - dump import op range dataset - --dump-range-dataset-format - datset format: e.g. image|raw, default is image - --calibrate-method - calibrate method: e.g. no_clip|l2|kld_m0|kld_m1|kld_m2|cdf, default is no_clip - --preprocess enable preprocess, default is 0 - --swapRB swap red and blue channel, default is 0 - --mean normalize mean, default is 0. 0. 0. - --std normalize std, default is 1. 1. 1. - --input-range - float range after preprocess - --input-shape - shape for input data - --letterbox-value - letter box pad value, default is 0.000000 - --input-type - input type, e.g float32|uint8|default, default is default - --output-type - output type, e.g float32|uint8, default is float32 - --input-layout - input layout, e.g NCHW|NHWC, default is NCHW - --output-layout - output layout, e.g NCHW|NHWC, default is NCHW - --tcu-num tcu number, e.g 1|2|3|4, default is 0 - --is-fpga use fpga parameters, default is 0 - --dump-ir dump ir to .dot, default is 0 - --dump-asm dump assembly, default is 0 - --dump-quant-error dump quant error, default is 0 - --dump-import-op-range dump import op range, default is 0 - --dump-dir - dump to directory - --benchmark-only compile kmodel only for benchmark use, default is 0 - - infer - - kmodel filename - output path - --dataset - dataset path - --dataset-format - dataset format, e.g. image|raw, default is image - --input-layout - input layout, e.g NCHW|NHWC, default is NCHW -``` - -## 描述 - -`ncc` 是 nncase 的命令行工具。它有两个命令: `compile` 和 `infer`。 - -`compile` 命令将你训练好的模型 (`.tflite`, `.caffemodel`, `.onnx`) 编译到 `.kmodel`。 - -- `-i, --input-format` 用来指定输入模型的格式。nncase 现在支持 `tflite`、`caffe` 和 `onnx` 输入格式。 -- `-t, --target` 用来指定你想要你的模型在哪种目标设备上运行。`cpu` 几乎所有平台都支持的通用目标。`k210` 是 Kendryte K210 SoC 平台。如果你指定了 `k210`,这个模型就只能在 K210 运行或在你的 PC 上模拟运行。 -- `` 用于指定输入模型文件 -- `--input-prototxt`用于指定caffe模型的prototxt文件 -- `` 用于指定输出模型文件 -- `--output-arrays `用于指定输出结点的名称 -- `--quant-type` 用于指定数据的量化类型, 如 `uint8`/`int8`/`int16, 默认是`uint8 -- `--w-quant-type` 用于指定权重的量化类型, 如 `uint8`/`int8`/`int16, 默认是`uint8 -- `--use-mse-quant-w`指定是否使用最小化mse(mean-square error, 均方误差)算法来量化权重. -- `--dataset` 用于提供量化校准集来量化你的模型。你需要从训练集中选择几百到上千个数据放到这个目录里。 -- `--dataset-format` 用于指定量化校准集的格式。默认是 `image`,nncase 将使用 `opencv` 读取你的图片,并自动缩放到你的模型输入需要的尺寸。如果你的输入有 3 个通道,ncc 会将你的图片转换为值域是 [0,1] 布局是 `NCHW` 的张量。如果你的输入只有 1 个通道,ncc 会灰度化你的图片。如果你的数据集不是图片(例如音频或者矩阵),把它设置为 `raw`。这种场景下你需要把你的数据集转换为 float 张量的二进制文件。 -- `--dump-range-dataset` 用于提供统计范围数据集来统计原始模型每个节点输出数据范围。你需要从训练集中选择几百到上千个数据放到这个目录里。 -- `--dump-range-dataset-format` 用于指定统计范围数据集的格式。默认是 `image`,nncase 将使用 `opencv` 读取你的图片,并自动缩放到你的模型输入需要的尺寸。如果你的输入有 3 个通道,ncc 会将你的图片转换为值域是 [0,1] 布局是 `NCHW` 的张量。如果你的输入只有 1 个通道,ncc 会灰度化你的图片。如果你的数据集不是图片(例如音频或者矩阵),把它设置为 `raw`。这种场景下你需要把你的数据集转换为 float 张量的二进制文件。 -- `--calibrate-method` 用于设置量化校准方法,它被用来选择最优的激活函数值域。默认值是 `no_clip`,ncc 会使用整个激活函数值域。如果你需要更好的量化结果,你可以使用 `l2`,但它需要花更长的时间寻找最优值域。 -- `--preprocess`指定是否预处理, 添加后表示开启预处理 -- `--swapRB`指定**预处理时**是否交换红和蓝两个通道数据, 用于实现RGB2BGR或BGR2RGB功能 -- `--mean`指定**预处理时**标准化参数均值,例如添加 `--mean "0.1 2.3 33.1f"`用于设置三个通道的均值. -- `--std`指定**预处理时**标准化参数方差,例如添加 `--std "1. 2. 3."`用于设置三个通道的方差. -- `--input-range`指定输入数据反量化后的数据范围,例如添加 `--input-range "0.1 2."`设置反量化的范围为 `[0.1~2]`. -- `--input-shape`指定输入数据的形状. 若与模型的输入形状不同, 则预处理时会做resize/pad等处理, 例如添加 `--input-shape "1 1 28 28"`指明当前输入图像尺寸. -- `--letterbox-value`用于指定预处理时pad填充的值. -- `--input-type` 用于指定推理时输入的数据类型。如果 `--input-type` 是 `uint8`,推理时你需要提供 RGB888 uint8 张量。如果 `--input-type` 是 `float`,你则需要提供 RGB float 张量. -- `--output-type` 用于指定推理时输出的数据类型。如 `float`/`uint8`, `uint8`仅在量化模型时才有效. 默认是 `float` -- `--input-layout`用于指定输入数据的layout. 若输入数据的layout与模型的layout不同, 预处理会添加transpose进行转换. -- `--output-layout`用于指定输出数据的layout -- `--tcu-num`用于指定tcu个数, 默认值为0, 表示不配置tcu个数. -- `--is-fpga`指定编译后的kmodel是否运行在fpga上 -- `--dump-ir` 是一个调试选项。当它打开时 ncc 会在工作目录产生一些 `.dot` 文件。你可以使用 `Graphviz` 或 [Graphviz Online](https://dreampuf.github.io/GraphvizOnline) 来查看这些文件。 -- `--dump-asm` 是一个调试选项。当它打开时 ncc 会生成硬件指令文件compile.text.asm -- `--dump-quant-error`是一个调试选项, 用于dump量化错误信息 -- `--dump-import-op-range`是一个调试选项, 用于dump import之后节点的数据范围,需要同时指定dump-range-dataset -- `--dump-dir`是一个调试选项, 用于指定dump目录. -- `--benchmark-only`是一个调试选项, 用于指定编译后的kmodel用于benchmark. - -`infer` 命令可以运行你的 kmodel,通常它被用来调试。ncc 会将你模型的输出张量按 `NCHW` 布局保存到 `.bin` 文件。 - -- `` kmodel 的路径。 -- `` ncc 输出目录。 -- `--dataset` 测试集路径。 -- `--dataset-format`和 `--input-layout`同 `compile` 命令中的含义。 +``` \ No newline at end of file diff --git a/examples/user_guide/README.md b/examples/user_guide/README.md new file mode 100644 index 0000000000..34aaf7744a --- /dev/null +++ b/examples/user_guide/README.md @@ -0,0 +1,15 @@ +模型编译推理参考Jupyter脚本:[User_guide](./simulate.ipynb),脚本中包含了单输入和多输入的示例。也可以使用单独的编译脚本 [Single build](../../docs/USAGE_ZH.md#编译模型示例)完成kmodel的编译。 + +如果在Docker中运行Jupyter脚本,可以参考[配置Jupyter lab](https://github.com/kunjing96/docker-jupyterlab#32-%E9%85%8D%E7%BD%AEjupyter-lab)进行配置。 + +在执行脚本之前需要根据自身需求修改以下内容: + +1. `compile_kmodel`函数中 `compile_options`,`ptq_options`相关信息 + `compile_options`详细信息见[CompileOptions](../../docs/USAGE_ZH.md#CompileOptions) + `ptq_options`详细信息见[PTQTensorOptions](../../docs/USAGE_ZH.md#PTQTensorOptions) +2. `compile kmodel single input(multiple inputs)`部分 + 修改 `model_path`和 `dump_path`,用于指定模型路径和编译期间文件生成路径。 + 修改 `calib_data`的实现,数据格式见注释。 +3. `run kmodel(simulate)`部分,修改 `input_data`的实现,数据格式见注释。 + +推理结束后,会在 `dump_path`路径下生成 `kmodel`、输出结果和编译期间的文件。 \ No newline at end of file diff --git a/examples/user_guide/nncase_base_func.py b/examples/user_guide/nncase_base_func.py new file mode 100644 index 0000000000..a624e9c0a7 --- /dev/null +++ b/examples/user_guide/nncase_base_func.py @@ -0,0 +1,95 @@ +import os + +import numpy as np +import onnx +import onnxsim +from sklearn.metrics.pairwise import cosine_similarity + +import nncase + + +def get_cosine(vec1, vec2): + """ + result compare + """ + return cosine_similarity(vec1.reshape(1, -1), vec2.reshape(1, -1)) + + + +def read_model_file(model_file): + """ + read model + """ + with open(model_file, 'rb') as f: + model_content = f.read() + return model_content + + +def parse_model_input_output(model_file): + """ + parse onnx model + """ + onnx_model = onnx.load(model_file) + input_all = [node.name for node in onnx_model.graph.input] + input_initializer = [node.name for node in onnx_model.graph.initializer] + input_names = list(set(input_all) - set(input_initializer)) + input_tensors = [ + node for node in onnx_model.graph.input if node.name in input_names] + + # input + inputs = [] + for _, e in enumerate(input_tensors): + onnx_type = e.type.tensor_type + input_dict = {} + input_dict['name'] = e.name + input_dict['dtype'] = onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[onnx_type.elem_type] + input_dict['shape'] = [i.dim_value for i in onnx_type.shape.dim] + inputs.append(input_dict) + + return onnx_model, inputs + +def model_simplify(model_file): + """ + simplify onnx model + """ + if model_file.split('.')[-1] == "onnx": + onnx_model, inputs = parse_model_input_output(model_file) + onnx_model = onnx.shape_inference.infer_shapes(onnx_model) + input_shapes = {} + for input in inputs: + input_shapes[input['name']] = input['shape'] + + onnx_model, check = onnxsim.simplify(onnx_model, overwrite_input_shapes=input_shapes) + assert check, "Simplified ONNX model could not be validated" + + model_file = os.path.join(os.path.dirname(model_file), 'simplified.onnx') + onnx.save_model(onnx_model, model_file) + print("[ onnx done ]") + elif model_file.split('.')[-1] == "tflite": + print("[ tflite pass ]") + else: + raise Exception(f"Unsupport type {model_file.split('.')[-1]}") + + return model_file + +def run_kmodel(kmodel_path, input_data): + print("\n---------start run kmodel---------") + print("Load kmodel...") + model_sim = nncase.Simulator() + with open(kmodel_path, 'rb') as f: + model_sim.load_model(f.read()) + + print("Set input data...") + for i, p_d in enumerate(input_data): + model_sim.set_input_tensor(i, nncase.RuntimeTensor.from_numpy(p_d)) + + print("Run...") + model_sim.run() + + print("Get output result...") + all_result = [] + for i in range(model_sim.outputs_size): + result = model_sim.get_output_tensor(i).to_numpy() + all_result.append(result) + print("----------------end-----------------") + return all_result \ No newline at end of file diff --git a/examples/user_guide/simulate.ipynb b/examples/user_guide/simulate.ipynb new file mode 100644 index 0000000000..6a9a041eaa --- /dev/null +++ b/examples/user_guide/simulate.ipynb @@ -0,0 +1,198 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "82a8f9c1-c2bf-4270-9f1f-ac25c9fdd898", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --upgrade pip\n", + "#!pip uninstall -y nncase\n", + "!pip install nncase==1.9.0.20230322 --timeout=1000\n", + "#from versions: 1.0.0.20211029, 1.1.0.20211203, 1.3.0.20220127, 1.4.0.20220303, 1.5.0.20220331, \n", + "# 1.6.0.20220505, 1.7.0.20220530, 1.7.1.20220701, 1.8.0.20220929, 1.9.0.20230322, 2.0.0.20230602, 2.1.0.20230703)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7eff82e-295c-4cce-afbc-ce64c84dc40a", + "metadata": {}, + "outputs": [], + "source": [ + "import nncase\n", + "from nncase_base_func import *\n", + "# from parse_model import *\n", + "\n", + "\n", + "def compile_kmodel(model_path, dump_path, calib_data):\n", + " \"\"\"\n", + " Set compile options and ptq options.\n", + " Compile kmodel.\n", + " Dump the compile-time result to 'compile_options.dump_dir'\n", + " \"\"\"\n", + " print(\"----------model simplify----------\")\n", + " model_file = model_simplify(model_path)\n", + "\n", + " print(\"---------- set options ----------\")\n", + " # import_options\n", + " import_options = nncase.ImportOptions()\n", + " \n", + " # compile_options\n", + " compile_options = nncase.CompileOptions()\n", + " compile_options.target = \"k210\" # \"cpu\" \"k510\"\n", + " compile_options.dump_ir = True # if False, will not dump the compile-time result.\n", + " compile_options.dump_asm = True\n", + " compile_options.dump_dir = dump_path\n", + "\n", + " # preprocess args\n", + " compile_options.preprocess = True\n", + " if compile_options.preprocess:\n", + " compile_options.input_type = \"uint8\" # \"uint8\"\n", + " compile_options.swapRB = False\n", + " compile_options.input_shape = [1,224,320,3]\n", + " compile_options.input_range = [0,1]\n", + " compile_options.mean = [0,0,0]\n", + " compile_options.std = [1,1,1]\n", + " compile_options.input_layout = \"NHWC\" # \"NHWC\"\n", + " compile_options.output_layout = \"NHWC\" # \"NHWC\"\n", + " compile_options.letterbox_value = 0\n", + " \n", + " # quant args\n", + " compile_options.quant_type = \"uint8\" \n", + " compile_options.w_quant_type = \"uint8\"\n", + " compile_options.use_mse_quant_w = True\n", + " compile_options.split_w_to_act = False\n", + "\n", + " # quant options\n", + " ptq_options = nncase.PTQTensorOptions()\n", + " ptq_options.calibrate_method = \"no_clip\" # \"kld_m2\" \"l2\" \"cdf\"\n", + " ptq_options.samples_count = len(calib_data[0])\n", + " ptq_options.set_tensor_data(np.array(calib_data).tobytes())\n", + "\n", + " \n", + " # set options\n", + " compiler = nncase.Compiler(compile_options)\n", + " compiler.use_ptq(ptq_options)\n", + " \n", + " print(\"---------- compile ----------\")\n", + " # import\n", + " model_content = read_model_file(model_file)\n", + " if model_path.split(\".\")[-1] == \"onnx\":\n", + " compiler.import_onnx(model_content, import_options)\n", + " elif model_path.split(\".\")[-1] == \"tflite\":\n", + " compiler.import_tflite(model_content, import_options)\n", + "\n", + " # compile\n", + " compiler.compile()\n", + " kmodel = compiler.gencode_tobytes()\n", + " \n", + " kmodel_path = os.path.join(dump_path, \"test.kmodel\")\n", + " with open(kmodel_path, 'wb') as f:\n", + " f.write(kmodel)\n", + " print(\"---------- compile end ----------\")\n", + " return kmodel_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c957fe20-99c9-4a54-bae8-38361a8f8830", + "metadata": {}, + "outputs": [], + "source": [ + "# compile kmodel single input\n", + "model_path = \"./model_f32.tflite\"\n", + "dump_path = \"./tmp\"\n", + "\n", + "# If model has multi inputs, calib_data format is \"[[x1, x2,...], [y1, y2,...], ...]\"\n", + "# e.g. Model has three inputs (x, y, z), the calib_data is '[[x1, x2, x3],[y1, y2, y3],[z1, z2, z3]]'\n", + "\n", + "calib_data = [[np.random.rand(1,224,320,3).astype(np.float32), np.random.rand(1,224,320,3).astype(np.float32)]]\n", + "kmodel_path = compile_kmodel(model_path, dump_path, calib_data)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f617edc-781c-4b8b-b45d-fef2f0b36a46", + "metadata": {}, + "outputs": [], + "source": [ + "# run kmodel(simulate)\n", + "kmodel_path = \"./tmp/test.kmodel\"\n", + "input_data = [np.random.rand(1,224,320,3).astype(np.float32)]\n", + "\n", + "result = run_kmodel(kmodel_path, input_data)\n", + "for idx, i in enumerate(result):\n", + " print(i.shape)\n", + " i.tofile(os.path.join(dump_path, \"nncase_result_{}.bin\".format(idx)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89280d3a", + "metadata": {}, + "outputs": [], + "source": [ + "# compile kmodel multiple inputs\n", + "model_path = \"./decoder_100.onnx\"\n", + "dump_path = \"./tmp_dec\"\n", + "\n", + "# If model has multiple inputs, calib_data format is \"[[x1, x2,...], [y1, y2,...], ...]\"\n", + "# e.g. Model has three inputs (x, y, z), the calib_data is '[[x1, x2, x3],[y1, y2, y3],[z1, z2, z3]]'\n", + "\n", + "calib_data = [[np.random.randint(1, 5, size=[3, 100], dtype='int64'), np.random.randint(1, 5, size=[3, 100], dtype='int64')],\n", + " [np.random.rand(100, 3, 192).astype(np.float32), np.random.rand(100, 3, 192).astype(np.float32)],\n", + " [np.random.rand(3, 100).astype(np.float32) > 0.5, np.random.rand(3, 100).astype(np.float32) > 0.5], ] # bool\n", + "\n", + "kmodel_path = compile_kmodel(model_path, dump_path, calib_data)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22a25a7f", + "metadata": {}, + "outputs": [], + "source": [ + "# run kmodel(simulate)\n", + "import os\n", + "\n", + "kmodel_path = \"./tmp_dec/test.kmodel\"\n", + "input_data = [np.random.randint(1, 5, size=[3, 100], dtype='int64'),\n", + " np.random.rand(100, 3, 192).astype(np.float32),\n", + " np.random.rand(3, 100).astype(np.float32) > 0.5, ]\n", + "\n", + "result = run_kmodel(kmodel_path, input_data)\n", + "\n", + "for idx, i in enumerate(result):\n", + " print(i.shape)\n", + " i.tofile(os.path.join(dump_path, \"nncase_result_{}.bin\".format(idx)))\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/importer/onnx_/basic/test_slice.py b/tests/importer/onnx_/basic/test_slice.py index dc52e4fc6c..45c86684c3 100644 --- a/tests/importer/onnx_/basic/test_slice.py +++ b/tests/importer/onnx_/basic/test_slice.py @@ -13,7 +13,6 @@ # limitations under the License. # pylint: disable=invalid-name, unused-argument, import-outside-toplevel -from attr import attributes import pytest import onnx from onnx import helper