【爱芯派 Pro 开发板试用体验】人体姿态估计模型部署后期尝试




git clone https://github.com/fire717/movenet.pytorch.git


后面发现movenet有谷歌官方开源的模型,movenet | Kaggle上面可以下载tflite格式的模型,并且是已经量化好为8bit的。兴冲冲的准备直接转为onnx格式,再把onnx模型转为axmodel模型,结果发现存在一个问题:tflite无法直接转化为onnx模型,存在一定麻烦。最明显的问题是数据布局问题—— TFLite 模型是 NHWC 格式,而 ONNX 是NCHW,因此内部的算子和张量等等分布都不一样。目前最常用的方式是用tflite2onnx这个库进行转换,但是有这个库目前还不支持的算子:
NotImplementedError: Unsupported TFLite OP: 53 CAST!


2、openpose/lightweight openpose


git clone https://github.com/Hzzone/pytorch-openpose.git

后面笔者又尝试了lightweight openpose的模型,发现同样有很好用的开源模型,并基于此的基础进行尝试:

git clone https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch.git

简单介绍一下openpose和lightweight openpose模型的差别:

lightweight openpose模型主要是把openpose backbone网络的VGG网络换成了轻量级CNN网络中的mobilenet,此外还将openpose的两个branch合并成一个branch并且用带空洞卷积的block结构代替7*7卷积。lightweight openpose能大幅度减少模型参数,并且达到几乎一样的精度

对于lightweight openpose模型,在生成onnx时首先遇到的第一个问题就是模型输入的参数。这个模型可以接受不同的输入大小,也就是可以接受动态输入大小,若输入1_3_H_W的图片,会用到两个输出,一个是1_38_H/4_W/4的pafs图,一个是1_19_H/4_W/4的heatmaps图。我们知道onnx可以接受动态输入和静态输入,笔者也最开始使用如下代码导出动态的模型,然后试着转化为axmodel模型:

from models.with_mobilenet import PoseEstimationWithMobileNet
from modules.load_state import load_state
import torch
import argparse

def convert_to_onnx(net, output_name):
    # 动态尺寸的输入
    input = torch.randn(1, 3, 256, 456)
    dynamic_axes = {
        'data': {2: 'height', 3: 'width'},  # 动态批处理大小和宽度
        'stage_0_output_1_heatmaps': {2: 'height_out', 3: 'width_out'},
        'stage_0_output_0_pafs': {2: 'height_out', 3: 'width_out'},
        'stage_1_output_1_heatmaps': {2: 'height_out', 3: 'width_out'},
        'stage_1_output_0_pafs': {2: 'height_out', 3: 'width_out'}

    input_names = ['data']
    output_names = ['stage_0_output_1_heatmaps', 'stage_0_output_0_pafs',
                    'stage_1_output_1_heatmaps', 'stage_1_output_0_pafs']

    torch.onnx.export(net, input, output_name, verbose=True, input_names=input_names, output_names=output_names,

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint')
    parser.add_argument('--output-name', type=str, default='human-pose-estimation.onnx',
                        help='name of output model in ONNX format')
    args = parser.parse_args()

    net = PoseEstimationWithMobileNet()
    checkpoint = torch.load(args.checkpoint_path)
    load_state(net, checkpoint)

    convert_to_onnx(net, args.output_name)


2024-01-20 21:52:00.043 | WARNING  | yamain.command.build:fill_default:320 - ignore data csc config because of src_format is AutoColorSpace or src_format and tensor_format are the same
Traceback (most recent call last):
  File "<frozen yamain.common.error>", line 11, in wrapper
  File "<frozen yamain.command.build>", line 631, in optimize_onnx
  File "<frozen yamain.command.load_model>", line 633, in optimize_onnx_model
  File "<frozen frontend.parsers.onnx_parser>", line 71, in parse_onnx_model
  File "<frozen frontend.parsers.onnx_parser>", line 122, in parse_onnx_model_proto
  File "<frozen frontend.parser_utils>", line 34, in parse_value_info
  File "<frozen frontend.parser_utils>", line 28, in check_value_info
AssertionError: illegal value_info data: [1, 3, 'height', 'width']



def infer_fast(net, img, net_input_size, stride, upsample_ratio, cpu,
               pad_value=(0, 0, 0), img_mean=np.array([128, 128, 128], np.float32), img_scale=np.float32(1/256)):
    height, width, _ = img.shape
    scale = min(net_input_size[0]/height,net_input_size[1]/width)

    scaled_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
    scaled_img = normalize(scaled_img, img_mean, img_scale)
    min_dims = [net_input_size[0], net_input_size[1]]
    padded_img, pad = pad_width(scaled_img, pad_value, min_dims)

    tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float()


def pad_width(img, pad_value, min_dims):
    h, w, _ = img.shape
    pad = []
    pad.append(int(math.floor((min_dims[0] - h) / 2.0)))
    pad.append(int(math.floor((min_dims[1] - w) / 2.0)))
    pad.append(int(min_dims[0] - h - pad[0]))
    pad.append(int(min_dims[1] - w - pad[1]))
    padded_img = cv2.copyMakeBorder(img, pad[0], pad[2], pad[1], pad[3],
                                    cv2.BORDER_CONSTANT, value=pad_value)
    return padded_img, pad



import argparse

import torch

from models.with_mobilenet import PoseEstimationWithMobileNet
from modules.load_state import load_state

def convert_to_onnx(net, output_name):
    input = torch.randn(1, 3, 256, 456)
    input_names = ['data']
    output_names = ['stage_0_output_1_heatmaps', 'stage_0_output_0_pafs',
                    'stage_1_output_1_heatmaps', 'stage_1_output_0_pafs']

    torch.onnx.export(net, input, output_name, verbose=True, input_names=input_names, output_names=output_names)

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint')
    parser.add_argument('--output-name', type=str, default='human-pose-estimation.onnx',
                        help='name of output model in ONNX format')
    args = parser.parse_args()

    net = PoseEstimationWithMobileNet()
    checkpoint = torch.load(args.checkpoint_path)
    load_state(net, checkpoint)

    convert_to_onnx(net, args.output_name)

导出来的模型后面就需要用pulsar2的工具进行转换为了,但是没有现成的校准数据集,因此只能用openpose训练时的coco数据集中选择图片作为数据集。先从COCO - Common Objects in Context (cocodataset.org)下载coco数据集,然后根据annotation选择类别为person的照片,我这里选择了30张比较完整身躯的人类图片,有一个人的也有多个人的,为了合适先把这些图片按照上述预处理先进行了处理,然后组成了calibration.tar:



  "model_type": "ONNX",
  "npu_mode": "NPU1",
  "quant": {
    "input_configs": [
        "tensor_name": "data",
        "calibration_dataset": "./dataset/calibration_data.tar",
        "calibration_size": 30,
        "calibration_mean": [128,128,128],
        "calibration_std": [256,256,256]
    "calibration_method": "MinMax",
    "precision_analysis": true,
  "input_processors": [
      "tensor_name": "data",
      "tensor_format": "BGR",
      "src_format": "BGR",
      "src_dtype": "U8",
  "output_processors": [
      "tensor_name": "stage_0_output_1_heatmaps",
      "tensor_name": "stage_0_output_0_pafs",
      "tensor_name": "stage_1_output_1_heatmaps",
      "tensor_name": "stage_1_output_0_pafs",
  "compiler": {
    "check": 0


pulsar2 build --input model/human-pose-estimation.onnx --output_dir output --config config/lightweight_openpose_config.json


2024-01-21 00:23:17.469 | WARNING  | yamain.command.build:fill_default:320 - ignore data csc config because of src_format is AutoColorSpace or src_format and tensor_format are the same
Building onnx ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:00:00
2024-01-21 00:23:19.247 | INFO     | yamain.command.build:build:444 - save optimized onnx to [output/frontend/optimized.onnx]
2024-01-21 00:23:19.254 | INFO     | yamain.common.util:extract_archive:21 - extract [dataset/calibration_data.tar] to [output/quant/dataset/data]...
                                                   Quant Config Table
┃ Input ┃ Shape            ┃ Dataset Directory ┃ Data Format ┃ Tensor Format ┃ Mean               ┃ Std                ┃
│ data  │ [1, 3, 256, 456] │ data              │ Image       │ BGR           │ [128.0, 128.0,     │ [128.0, 128.0,     │
│       │                  │                   │             │               │ 128.0]             │ 128.0]             │
Transformer optimize level: 0
30 File(s) Loaded.
[00:23:24] AX LSTM Operation Format Pass Running ...      Finished.
[00:23:24] AX Set MixPrecision Pass Running ...           Finished.
[00:23:24] AX Refine Operation Config Pass Running ...    Finished.
[00:23:24] AX Reset Mul Config Pass Running ...           Finished.
[00:23:24] AX Tanh Operation Format Pass Running ...      Finished.
[00:23:24] AX Confused Op Refine Pass Running ...         Finished.
[00:23:24] AX Quantization Fusion Pass Running ...        Finished.
[00:23:24] AX Quantization Simplify Pass Running ...      Finished.
[00:23:24] AX Parameter Quantization Pass Running ...     Finished.
Calibration Progress(Phase 1): 100%|████████████████████████████████████████████████████| 30/30 [00:05<00:00,  5.03it/s]
[00:23:31] AX Passive Parameter Quantization Running ...  Finished.
[00:23:31] AX Parameter Baking Pass Running ...           Finished.
[00:23:31] AX Refine Int Parameter Pass Running ...       Finished.
[00:23:31] AX Refine Weight Parameter Pass Running ...    Finished.
--------- Network Snapshot ---------
Num of Op:                    [117]
Num of Quantized Op:          [117]
Num of Variable:              [226]
Num of Quantized Var:         [226]
------- Quantization Snapshot ------
Num of Quant Config:          [350]
BAKED:                        [54]
OVERLAPPED:                   [171]
ACTIVATED:                    [74]
PASSIVE_BAKED:                [51]
Network Quantization Finished.
quant.axmodel export success: output/quant/quant_axmodel.onnx
===>export per layer debug_data(float data) to folder: output/quant/debug/float
Writing npy... ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:00:00
===>export input/output data to folder: output/quant/debug/test_data_set_0
Building native ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:00:00
/usr/local/lib/python3.9/site-packages/scipy/spatial/distance.py:620: RuntimeWarning: invalid value encountered in float_scalars
  dist = 1.0 - uv / np.sqrt(uu * vv)
                                      Quant Precision Table【EndToEnd Reference】
┃ Operator             ┃ Type              ┃ Output Tensor        ┃ Data TypeShape             ┃ Cosin Distance     ┃
│ /model/model.0/mode… │ AxQuantizedConv   │ /model/model.0/mode… │ FP32      │ (1, 32, 128, 228) │ 0.9999498724937439 │
│ /model/model.1/mode… │ AxQuantizedConv   │ /model/model.1/mode… │ FP32      │ (1, 32, 128, 228) │ 0.9999966621398926 │
│ /model/model.1/mode… │ AxQuantizedConv   │ /model/model.1/mode… │ FP32      │ (1, 64, 128, 228) │ 0.9569284915924072 │
│ /model/model.2/mode… │ AxQuantizedConv   │ /model/model.2/mode… │ FP32      │ (1, 64, 64, 114)  │ 0.9795299172401428 │
│ /model/model.2/mode… │ AxQuantizedConv   │ /model/model.2/mode… │ FP32      │ (1, 128, 64, 114) │ 0.9590328931808472 │
│ /model/model.3/mode… │ AxQuantizedConv   │ /model/model.3/mode… │ FP32      │ (1, 128, 64, 114) │ 0.9650511741638184 │
│ /model/model.3/mode… │ AxQuantizedConv   │ /model/model.3/mode… │ FP32      │ (1, 128, 64, 114) │ 0.9283918142318726 │
│ /model/model.4/mode… │ AxQuantizedConv   │ /model/model.4/mode… │ FP32      │ (1, 128, 32, 57)  │ 0.9993457794189453 │
│ /model/model.4/mode… │ AxQuantizedConv   │ /model/model.4/mode… │ FP32      │ (1, 256, 32, 57)  │ 0.9516056776046753 │
│ /model/model.5/mode… │ AxQuantizedConv   │ /model/model.5/mode… │ FP32      │ (1, 256, 32, 57)  │ 0.9471707940101624 │
│ /model/model.5/mode… │ AxQuantizedConv   │ /model/model.5/mode… │ FP32      │ (1, 256, 32, 57)  │ 0.9344280958175659 │
│ /model/model.6/mode… │ AxQuantizedConv   │ /model/model.6/mode… │ FP32      │ (1, 256, 32, 57)  │ 0.9990376830101013 │
│ /model/model.6/mode… │ AxQuantizedConv   │ /model/model.6/mode… │ FP32      │ (1, 512, 32, 57)  │ 0.9555284976959229 │
│ /model/model.7/mode… │ AxQuantizedConv   │ /model/model.7/mode… │ FP32      │ (1, 512, 32, 57)  │ 0.9700952768325806 │
│ /model/model.7/mode… │ AxQuantizedConv   │ /model/model.7/mode… │ FP32      │ (1, 512, 32, 57)  │ 0.9461223483085632 │
│ /model/model.8/mode… │ AxQuantizedConv   │ /model/model.8/mode… │ FP32      │ (1, 512, 32, 57)  │ 0.9950165748596191 │
│ /model/model.8/mode… │ AxQuantizedConv   │ /model/model.8/mode… │ FP32      │ (1, 512, 32, 57)  │ 0.9367055892944336 │
│ /model/model.9/mode… │ AxQuantizedConv   │ /model/model.9/mode… │ FP32      │ (1, 512, 32, 57)  │ 0.980312168598175  │
│ /model/model.9/mode… │ AxQuantizedConv   │ /model/model.9/mode… │ FP32      │ (1, 512, 32, 57)  │ 0.9332307577133179 │
│ /model/model.10/mod… │ AxQuantizedConv   │ /model/model.10/mod… │ FP32      │ (1, 512, 32, 57)  │ 0.9839740991592407 │
│ /model/model.10/mod… │ AxQuantizedConv   │ /model/model.10/mod… │ FP32      │ (1, 512, 32, 57)  │ 0.9234165549278259 │
│ /model/model.11/mod… │ AxQuantizedConv   │ /model/model.11/mod… │ FP32      │ (1, 512, 32, 57)  │ 0.9968637824058533 │
│ /model/model.11/mod… │ AxQuantizedConv   │ /model/model.11/mod… │ FP32      │ (1, 512, 32, 57)  │ 0.8376449346542358 │
│ /cpm/align/align.0/… │ AxQuantizedConv   │ /cpm/align/align.0/… │ FP32      │ (1, 128, 32, 57)  │ 0.8724657297134399 │
│ /cpm/trunk/trunk.0/… │ AxQuantizedConv   │ /cpm/trunk/trunk.0/… │ FP32      │ (1, 128, 32, 57)  │ 0.8473747968673706 │
│ /cpm/trunk/trunk.0/… │ AxQuantizedElu    │ /cpm/trunk/trunk.0/… │ FP32      │ (1, 128, 32, 57)  │ 0.8474124670028687 │
│ /cpm/trunk/trunk.0/… │ AxQuantizedConv   │ /cpm/trunk/trunk.0/… │ FP32      │ (1, 128, 32, 57)  │ 1                  │
│ /cpm/trunk/trunk.0/… │ AxQuantizedElu    │ /cpm/trunk/trunk.0/… │ FP32      │ (1, 128, 32, 57)  │ 1                  │
│ /cpm/trunk/trunk.1/… │ AxQuantizedConv   │ /cpm/trunk/trunk.1/… │ FP32      │ (1, 128, 32, 57)  │ 1                  │
│ /cpm/trunk/trunk.1/… │ AxQuantizedElu    │ /cpm/trunk/trunk.1/… │ FP32      │ (1, 128, 32, 57)  │ 1                  │
│ /cpm/trunk/trunk.1/… │ AxQuantizedConv   │ /cpm/trunk/trunk.1/… │ FP32      │ (1, 128, 32, 57)  │ 1                  │
│ /cpm/trunk/trunk.1/… │ AxQuantizedElu    │ /cpm/trunk/trunk.1/… │ FP32      │ (1, 128, 32, 57)  │ 1                  │
│ /cpm/trunk/trunk.2/… │ AxQuantizedConv   │ /cpm/trunk/trunk.2/… │ FP32      │ (1, 128, 32, 57)  │ 1                  │
│ /cpm/trunk/trunk.2/… │ AxQuantizedElu    │ /cpm/trunk/trunk.2/… │ FP32      │ (1, 128, 32, 57)  │ 1                  │
│ /cpm/trunk/trunk.2/… │ AxQuantizedConv   │ /cpm/trunk/trunk.2/… │ FP32      │ (1, 128, 32, 57)  │ 1                  │
│ /cpm/trunk/trunk.2/… │ AxQuantizedElu    │ /cpm/trunk/trunk.2/… │ FP32      │ (1, 128, 32, 57)  │ 1                  │
│ /cpm/Add             │ AxQuantizedAdd    │ /cpm/Add_output_0    │ FP32      │ (1, 128, 32, 57)  │ 0.8724657297134399 │
│ /cpm/conv/conv.0/Co… │ AxQuantizedConv   │ /cpm/conv/conv.0/Co… │ FP32      │ (1, 128, 32, 57)  │ 0.8918052315711975 │
│ /initial_stage/trun… │ AxQuantizedConv   │ /initial_stage/trun… │ FP32      │ (1, 128, 32, 57)  │ 0.8402097225189209 │
│ /initial_stage/trun… │ AxQuantizedConv   │ /initial_stage/trun… │ FP32      │ (1, 128, 32, 57)  │ 0.8613110780715942 │
│ /initial_stage/trun… │ AxQuantizedConv   │ /initial_stage/trun… │ FP32      │ (1, 128, 32, 57)  │ 0.9579821228981018 │
│ /initial_stage/heat… │ AxQuantizedConv   │ /initial_stage/heat… │ FP32      │ (1, 512, 32, 57)  │ 0.9848107099533081 │
│ /initial_stage/heat… │ AxQuantizedConv   │ stage_0_output_1_he… │ FP32      │ (1, 19, 32, 57)   │ 0.9975641965866089 │
│ /initial_stage/pafs… │ AxQuantizedConv   │ /initial_stage/pafs… │ FP32      │ (1, 512, 32, 57)  │ 0.9518132209777832 │
│ /initial_stage/pafs… │ AxQuantizedConv   │ stage_0_output_0_pa… │ FP32      │ (1, 38, 32, 57)   │ 0.8149770498275757 │
│ /Concat              │ AxQuantizedConcat │ /Concat_output_0     │ FP32      │ (1, 185, 32, 57)  │ 0.9521203637123108 │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.9810469150543213 │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.8794618248939514 │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.8622568845748901 │
│ /refinement_stages.… │ AxQuantizedAdd    │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.9406654238700867 │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.9486998915672302 │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.8384159207344055 │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.8343141674995422 │
│ /refinement_stages.… │ AxQuantizedAdd    │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.9094435572624207 │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.9495641589164734 │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.8413822054862976 │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.8205063939094543 │
│ /refinement_stages.… │ AxQuantizedAdd    │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.8983384370803833 │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.9512494206428528 │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.8841379880905151 │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.8782262802124023 │
│ /refinement_stages.… │ AxQuantizedAdd    │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.9195815920829773 │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.9604532122612    │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.9433491230010986 │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.8501253128051758 │
│ /refinement_stages.… │ AxQuantizedAdd    │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.891287624835968  │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.9831163287162781 │
│ /refinement_stages.… │ AxQuantizedConv   │ /refinement_stages.… │ FP32      │ (1, 128, 32, 57)  │ 0.9556347131729126 │
│ /refinement_stages.… │ AxQuantizedConv   │ stage_1_output_1_he… │ FP32      │ (1, 19, 32, 57)   │ 0.9990192651748657 │
│ /refinement_stages.… │ AxQuantizedConv   │ stage_1_output_0_pa… │ FP32      │ (1, 38, 32, 57)   │ 0.9560971260070801 │
Building native ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:00:00
2024-01-21 00:23:37.654 | WARNING  | yamain.command.load_model:pre_process:454 - preprocess tensor [data]
2024-01-21 00:23:37.654 | INFO     | yamain.command.load_model:pre_process:456 - tensor: data, (1, 3, 256, 456), U8
2024-01-21 00:23:37.654 | INFO     | yamain.command.load_model:pre_process:456 - op: op:pre_dequant_1, AxDequantizeLinear, {'const_inputs': {'x_zeropoint': array(0, dtype=int32), 'x_scale': array(1., dtype=float32)}, 'output_dtype': <class 'numpy.float32'>, 'quant_method': 0}
2024-01-21 00:23:37.654 | INFO     | yamain.command.load_model:pre_process:456 - tensor: tensor:pre_norm_1, (1, 3, 256, 456), FP32
2024-01-21 00:23:37.654 | INFO     | yamain.command.load_model:pre_process:456 - op: op:pre_norm_1, AxNormalize, {'dim': 1, 'mean': [128.0, 128.0, 128.0], 'std': [128.0, 128.0, 128.0]}
2024-01-21 00:23:37.654 | WARNING  | yamain.command.load_model:post_process:475 - postprocess tensor [stage_0_output_1_heatmaps]
2024-01-21 00:23:37.654 | WARNING  | yamain.command.load_model:post_process:475 - postprocess tensor [stage_0_output_0_pafs]
2024-01-21 00:23:37.654 | WARNING  | yamain.command.load_model:post_process:475 - postprocess tensor [stage_1_output_1_heatmaps]
2024-01-21 00:23:37.654 | WARNING  | yamain.command.load_model:post_process:475 - postprocess tensor [stage_1_output_0_pafs]
tiling op...   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 92/92 0:00:00
new_ddr_tensor = []
build op...   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 389/389 0:00:00
add ddr swap...   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 738/738 0:00:00
calc input dependencies...   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1039/1039 0:00:00
calc output dependencies...   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1039/1039 0:00:00
assign eu heuristic   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1039/1039 0:00:00
assign eu onepass   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1039/1039 0:00:00
assign eu greedy   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1039/1039 0:00:00
2024-01-21 00:23:39.899 | INFO     | yasched.test_onepass:results2model:2004 - max_cycle = 4,752,230
2024-01-21 00:23:40.217 | INFO     | yamain.command.build:compile_npu_subgraph:1076 - QuantAxModel macs: 7,656,013,824
2024-01-21 00:23:40.218 | INFO     | yamain.command.build:compile_npu_subgraph:1084 - use random data as gt input: data, uint8, (1, 3, 256, 456)
2024-01-21 00:23:42.095 | INFO     | yamain.command.build:compile_ptq_model:1003 - fuse 1 subgraph(s)



pose_detection.py --pre_processing --image_path sim_images/cxk.jpg
--axmodel_path models/compiled.axmodel --intermediate_path sim_inputs/0


def pre_processing(args):

image_path = Path(args.image_path)
if not image_path.exists():
    raise FileNotFoundError(f"Not found image file at '{image_path}'")

axmodel_path = Path(args.axmodel_path)
if not axmodel_path.exists():
    raise FileNotFoundError(f"Not found compiled axmodel at '{axmodel_path}'")

pad_value=(0, 0, 0)
img = cv2.imread(str(image_path), cv2.IMREAD_COLOR)
height, width, _ = img.shape
net_input_size = np.array([256,456])
scale = min(net_input_size[0]/height,net_input_size[1]/width)

scaled_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
min_dims = [net_input_size[0], net_input_size[1]]
padded_img, pad = pad_width(scaled_img, pad_value, min_dims)

input_names = get_input_info(axmodel_path)
if len(input_names) != 1:
    raise NotImplementedError(f"Currently only supports length 1, but got {input_names}")

intermediate_path = Path(args.intermediate_path)
intermediate_path.mkdir(exist_ok=True, parents=True)

output_path = intermediate_path / f"{sanitize(input_names[0])}.bin"
LOGGER.info(f"Write [{input_names[0]}] to '{output_path}' successfully.")


[I]Write [data] to 'sim_inputs/0/data.bin' successfully.


pulsar2 run --model models/compiled.axmodel --input_dir sim_inputs --output_dir sim_outputs --list list.txt



from pathlib import Path
from typing import Dict, List, Tuple
import cv2
import numpy as np
import onnx

from pulsar2_run_helper.utils import get_tensor_value_info, sanitize

from torch import Tensor

def get_output_info(model_path: str):
    Returns the shape and tensor type of all outputs.

    model_obj = onnx.load(model_path)
    model_graph = model_obj.graph

    output_info = {}
    for tensor_info in model_graph.output:
        output_info.update({tensor_info.name: get_tensor_value_info(tensor_info)})
    return output_info

output_info = get_output_info('pulsar2-run-helper\\\\models\\\\compiled.axmodel')

output_data: Dict[str, np.ndarray] = {}
for k, v in output_info.items():
    data_path = Path(f"pulsar2-run-helper/sim_outputs/0/{sanitize(k)}.bin")
    if not data_path.exists():
        raise FileNotFoundError(
            f"Could not find the expected key '{k}', please double check your pulsar run output directory.",
    data = data_path.read_bytes()
    output_data[k] = np.frombuffer(data, dtype=v["tensor_type"]).reshape(v["shape"]).copy()

stage2_heatmaps = np.array(output_data['stage_1_output_1_heatmaps'])
stage2_pafs = np.array(output_data['stage_1_output_0_pafs'])

upsample_ratio = 4
heatmaps = np.transpose(stage2_heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0))
heatmaps = cv2.resize(heatmaps, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)

pafs = np.transpose(stage2_pafs.squeeze().cpu().data.numpy(), (1, 2, 0))
pafs = cv2.resize(pafs, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)



2、在官方文档里说明了可以Quantized ONNX 模型导入,但是这一块需要先进行QAT,作者对这一块可能不太会,也就放弃了。



