【米尔-TIAM62开发板-接替335x-试用评测】YOLACT实力分割模型部署

1. 前言

在TIAM62开发板上使用OpenCV部署YOLACT实例分割模型

2. YOLACT的C++实现

#define _CRT_SECURE_NO_WARNINGS
#include <fstream>
#include <iostream>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>

using namespace cv;
using namespace dnn;
using namespace std;
extern const char* class_names[] = { "background",
                                        "person", "bicycle", "car", "motorcycle", "airplane", "bus",
                                        "train", "truck", "boat", "traffic light", "fire hydrant",
                                        "stop sign", "parking meter", "bench", "bird", "cat", "dog",
                                        "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
                                        "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
                                        "skis", "snowboard", "sports ball", "kite", "baseball bat",
                                        "baseball glove", "skateboard", "surfboard", "tennis racket",
                                        "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
                                        "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
                                        "hot dog", "pizza", "donut", "cake", "chair", "couch",
                                        "potted plant", "bed", "dining table", "toilet", "tv", "laptop",
                                        "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
                                        "toaster", "sink", "refrigerator", "book", "clock", "vase",
                                        "scissors", "teddy bear", "hair drier", "toothbrush"
};

extern const unsigned char colors[81][3] = {

    {56, 0, 255},

    {226, 255, 0},

    {0, 94, 255},

    {0, 37, 255},

    {0, 255, 94},

    {255, 226, 0},

    {0, 18, 255},

    {255, 151, 0},

    {170, 0, 255},

    {0, 255, 56},

    {255, 0, 75},

    {0, 75, 255},

    {0, 255, 169},

    {255, 0, 207},

    {75, 255, 0},

    {207, 0, 255},

    {37, 0, 255},

    {0, 207, 255},

    {94, 0, 255},

    {0, 255, 113},

    {255, 18, 0},

    {255, 0, 56},

    {18, 0, 255},

    {0, 255, 226},

    {170, 255, 0},

    {255, 0, 245},

    {151, 255, 0},

    {132, 255, 0},

    {75, 0, 255},

    {151, 0, 255},

    {0, 151, 255},

    {132, 0, 255},

    {0, 255, 245},

    {255, 132, 0},

    {226, 0, 255},

    {255, 37, 0},

    {207, 255, 0},

    {0, 255, 207},

    {94, 255, 0},

    {0, 226, 255},

    {56, 255, 0},

    {255, 94, 0},

    {255, 113, 0},

    {0, 132, 255},

    {255, 0, 132},

    {255, 170, 0},

    {255, 0, 188},

    {113, 255, 0},

    {245, 0, 255},

    {113, 0, 255},

    {255, 188, 0},

    {0, 113, 255},

    {255, 0, 0},

    {0, 56, 255},

    {255, 0, 113},

    {0, 255, 188},

    {255, 0, 94},

    {255, 0, 18},

    {18, 255, 0},

    {0, 255, 132},

    {0, 188, 255},

    {0, 245, 255},

    {0, 169, 255},

    {37, 255, 0},

    {255, 0, 151},

    {188, 0, 255},

    {0, 255, 37},

    {0, 255, 0},

    {255, 0, 170},

    {255, 0, 37},

    {255, 75, 0},

    {0, 0, 255},

    {255, 207, 0},

    {255, 0, 226},

    {255, 245, 0},

    {188, 255, 0},

    {0, 255, 18},

    {0, 255, 75},

    {0, 255, 151},

    {255, 56, 0},

    {245, 255, 0}

};



class yolact

{

    public:

        yolact(float confThreshold, float nmsThreshold, const int keep_top_k = 200);

        void detect(Mat& srcimg);

    private:

        const int target_size = 550;

        const float MEANS[3] = { 123.68, 116.78, 103.94 };

        const float STD[3] = { 58.40, 57.12, 57.38 };

        float confidence_threshold;

        float nms_threshold;

        int keep_top_k;

        const int conv_ws[5] = { 69, 35, 18, 9, 5 };

        const int conv_hs[5] = { 69, 35, 18, 9, 5 };

        const float aspect_ratios[3] = { 1.f, 0.5f, 2.f };

        const float scales[5] = { 24.f, 48.f, 96.f, 192.f, 384.f };

        const float var[4] = { 0.1f, 0.1f, 0.2f, 0.2f };

        const int mask_h = 138;

        const int mask_w = 138;

        int num_priors;

        float* priorbox;

        Net net;

        void normalize(Mat& img);

        void sigmoid(Mat& out, int length);

};



yolact::yolact(float confThreshold, float nmsThreshold, const int keep_top_k)

{

    this->confidence_threshold = confThreshold;

    this->nms_threshold = nmsThreshold;

    this->keep_top_k = keep_top_k;

    this->net = readNet("yolact_base_54_800000.onnx");

    this->num_priors = 0;

    int p = 0;

    for (p = 0; p < 5; p++)

    {

        this->num_priors += this->conv_ws[p] * this->conv_hs[p] * 3;

    }

    this->priorbox = new float[4 * this->num_priors];

    ////generate priorbox

    float* pb = priorbox;

    for (p = 0; p < 5; p++)

    {

        int conv_w = this->conv_ws[p];

        int conv_h = this->conv_hs[p];



        float scale = this->scales[p];



        for (int i = 0; i < conv_h; i++)

        {

            for (int j = 0; j < conv_w; j++)

            {

                // +0.5 because priors are in center-size notation

                float cx = (j + 0.5f) / conv_w;

                float cy = (i + 0.5f) / conv_h;



                for (int k = 0; k < 3; k++)

                {

                    float ar = aspect_ratios[k];



                    ar = sqrt(ar);



                    float w = scale * ar / this->target_size;

                    float h = scale / ar / this->target_size;



                    // This is for backward compatability with a bug where I made everything square by accident

                    // cfg.backbone.use_square_anchors:

                    h = w;

                    pb[0] = cx;

                    pb[1] = cy;

                    pb[2] = w;

                    pb[3] = h;

                    pb += 4;

                }

            }

        }

    }

}



void yolact::normalize(Mat& img)

{

    img.convertTo(img, CV_32F);

    int i = 0, j = 0;

    for (i = 0; i < img.rows; i++)

    {

        float* pdata = (float*)(img.data + i * img.step);

        for (j = 0; j < img.cols; j++)

        {

            pdata[0] = (pdata[0] - this->MEANS[0]) / this->STD[0];

            pdata[1] = (pdata[1] - this->MEANS[1]) / this->STD[1];

            pdata[2] = (pdata[2] - this->MEANS[2]) / this->STD[2];

            pdata += 3;

        }

    }

}



void yolact::sigmoid(Mat& out, int length)

{

    float* pdata = (float*)(out.data);

    int i = 0;

    for (i = 0; i < length; i++)

    {

        pdata[i] = 1.0 / (1 + expf(-pdata[i]));

    }

}



void yolact::detect(Mat& srcimg)

{

    int img_w = srcimg.cols;

    int img_h = srcimg.rows;

    Mat img;

    resize(srcimg, img, Size(this->target_size, this->target_size), INTER_LINEAR);

    cvtColor(img, img, COLOR_BGR2RGB);

    this->normalize(img);

    Mat blob = blobFromImage(img);

    this->net.setInput(blob);

    vector<Mat> outs;

    this->net.forward(outs, this->net.getUnconnectedOutLayersNames());

    

    /////generate proposals

    vector<int> classIds;

    vector<float> confidences;

    vector<Rect> boxes;

    vector<int> maskIds;

    const int num_class = outs[1].cols;

    for (int i = 0; i < this->num_priors; i++)

    {

        Mat scores = outs[1].row(i).colRange(1, num_class);

        Point classIdPoint;

        double score;

        // Get the value and location of the maximum score

        minMaxLoc(scores, 0, &score, 0, &classIdPoint);

        if (score > this->confidence_threshold)

        {

            const float* loc = (float*)outs[0].data + i * 4;

            const float* pb = this->priorbox + i * 4;

            float pb_cx = pb[0];

            float pb_cy = pb[1];

            float pb_w = pb[2];

            float pb_h = pb[3];



            float bbox_cx = var[0] * loc[0] * pb_w + pb_cx;

            float bbox_cy = var[1] * loc[1] * pb_h + pb_cy;

            float bbox_w = (float)(exp(var[2] * loc[2]) * pb_w);

            float bbox_h = (float)(exp(var[3] * loc[3]) * pb_h);

            float obj_x1 = bbox_cx - bbox_w * 0.5f;

            float obj_y1 = bbox_cy - bbox_h * 0.5f;

            float obj_x2 = bbox_cx + bbox_w * 0.5f;

            float obj_y2 = bbox_cy + bbox_h * 0.5f;



            // clip

            obj_x1 = max(min(obj_x1 * img_w, (float)(img_w - 1)), 0.f);

            obj_y1 = max(min(obj_y1 * img_h, (float)(img_h - 1)), 0.f);

            obj_x2 = max(min(obj_x2 * img_w, (float)(img_w - 1)), 0.f);

            obj_y2 = max(min(obj_y2 * img_h, (float)(img_h - 1)), 0.f);

            classIds.push_back(classIdPoint.x);

            confidences.push_back(score);

            boxes.push_back(Rect((int)obj_x1, (int)obj_y1, (int)(obj_x2 - obj_x1 + 1), (int)(obj_y2 - obj_y1 + 1)));

            maskIds.push_back(i);

        }

    }



    // Perform non maximum suppression to eliminate redundant overlapping boxes with

    // lower confidences

    vector<int> indices;

    NMSBoxes(boxes, confidences, this->confidence_threshold, this->nms_threshold, indices, 1.f, this->keep_top_k);

    for (size_t i = 0; i < indices.size(); ++i)

    {

        int idx = indices[i];

        Rect box = boxes[idx];

        int xmax = box.x + box.width;

        int ymax = box.y + box.height;

        rectangle(srcimg, Point(box.x, box.y), Point(xmax, ymax), Scalar(0, 0, 255), 3);

        //Get the label for the class name and its confidence

        char text[256];

        sprintf(text, "%s: %.2f", class_names[classIds[idx] + 1], confidences[idx]);





        //Display the label at the top of the bounding box

        int baseLine;

        Size labelSize = getTextSize(text, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);

        int ymin = max(box.y, labelSize.height);

        //rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);

        putText(srcimg, text, Point(box.x, ymin), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);



        Mat mask(this->mask_h, this->mask_w, CV_32FC1);

        mask = cv::Scalar(0.f);

        int channel = outs[2].cols;

        int area = this->mask_h * this->mask_w;

        float* coeff = (float*)outs[2].data + maskIds[idx] * channel;

        float* pm = (float*)mask.data;

        const float* pmaskmap = (float*)outs[3].data;

        for (int j = 0; j < area; j++)

        {

            for (int p = 0; p < channel; p++)

            {

                pm[j] += pmaskmap[p] * coeff[p];

            }

            pmaskmap += channel;

        }



        this->sigmoid(mask, area);

        Mat mask2;

        resize(mask, mask2, Size(img_w, img_h));

        // draw mask

        for (int y = 0; y < img_h; y++)

        {

            const float* pmask = (float*)mask2.data + y * img_w;

            uchar* p = srcimg.data + y * img_w * 3;

            for (int x = 0; x < img_w; x++)

            {

                if (pmask[x] > 0.5)

                {

                    p[0] = (uchar)(p[0] * 0.5 + colors[classIds[idx] + 1][0] * 0.5);

                    p[1] = (uchar)(p[1] * 0.5 + colors[classIds[idx] + 1][1] * 0.5);

                    p[2] = (uchar)(p[2] * 0.5 + colors[classIds[idx] + 1][2] * 0.5);

                }

                p += 3;

            }

        }

    }

}



int main()

{

    yolact yolactnet(0.5, 0.5);



    string imgpath = "000000046804.jpg";

    Mat srcimg = imread(imgpath);

    yolactnet.detect(srcimg);



    static const string kWinName = "Deep learning object detection in OpenCV";

    namedWindow(kWinName, WINDOW_NORMAL);

    imshow(kWinName, srcimg);

    waitKey(0);

    destroyAllWindows();

}

CMakeLists编写：

cmake_minimum_required(VERSION 3.12)

project(opencv_yolact)



add_executable(opencv_yolact main.cpp)



target_include_directories(opencv_yolact PUBLIC /workspace/opencv-4.x/build/install/include/opencv4/)



target_link_libraries(

     opencv_yolact

    /workspace/opencv-4.x/build/install/lib/libopencv_core.so.4.8.0

    /workspace/opencv-4.x/build/install/lib/libopencv_dnn.so.4.8.0

    /workspace/opencv-4.x/build/install/lib/libopencv_highgui.so.4.8.0

    /workspace/opencv-4.x/build/install/lib/libopencv_imgcodecs.so.4.8.0

    /workspace/opencv-4.x/build/install/lib/libopencv_imgproc.so.4.8.0

)

更多回帖

Zerolinr

【米尔-TIAM62开发板-接替335x-试用评测】YOLACT实力分割模型部署

目录

1. 前言

2. YOLACT的C++实现

相关帖子

【米尔-TIAM62开发板-接替335x-试用评测】5、QT应用开发体验

【米尔-TIAM62开发板-接替335x-试用评测】1、TIAM62的SoC架构

【米尔-TIAM62开发板-接替335x-试用评测】（一）基于TIAM62开发板的ROS2交叉编译

【米尔-TIAM62开发板-接替335x-试用评测】OPENCV和NCNN交叉编译

【米尔-TIAM62开发板-接替335x-试用评测】2、remote processor (RPROC) frameworkk架构介绍

【米尔-TIAM62开发板-接替335x-试用评测】3、MCU开发实战

【米尔-TIAM62开发板-接替335x-试用评测】+(二)配置U-Boot步骤实战

【米尔-TIAM62开发板-接替335x-试用评测】4、异构通信初体验

【米尔-TIAM62开发板-接替335x-试用评测】+(一)手把手配置Yocto

【米尔-TIAM62开发板-接替335x-试用评测】+(三)手把手创建Uboot设备树与内核设备树实战

20万+工程师都在用，免费PCB检查工具