Yolov8模型转换以及推理

Yolov8模型转换以及推理

1.将pt模型转换为onnx格式

升级pip:

python3.10 -m pip install --upgrade pip
pip -V

安装yolov8和onnx:

pip install yolov8 onnx

设置相关环境变量

临时添加环境变量(立即生效):

export PATH="$PATH:$HOME/.local/bin"

• 说明:此操作仅对当前终端会话有效,关闭终端后失效。

• 验证:执⾏yolo --version,若输出版本号,则说明命令已⽣效。

永久添加环境变量(⻓期有效)

将路径写⼊配置⽂件,使其永久⽣效:

编辑⽤⼾环境变量⽂件:

echo 'export PATH="$PATH:$HOME/.local/bin"' >> ~/.bashrc
source ~/.bashrc #使修改⽴即⽣效

新建export.py用于模型转换以及导出:


from ultralytics import YOLO

# 加载同级目录下的.pt模型文件
model = YOLO('./yolov8x.pt')  # 替换为实际模型文件名

# 导出ONNX配置参数
export_params = {
    'format': 'onnx',
    'opset': 12,          # 推荐算子集版本
    'simplify': True,     # 启用模型简化
    'dynamic': False,     # 固定输入尺寸
    'imgsz': 640,         # 标准输入尺寸
    'half': False         # 保持FP32精度
}

# 执行转换并保存到同级目录
model.export(**export_params)

执行该程序完成将pt模型导出为onnx模型。

2.使用AIMO将onnx模型截断量化为Qnn_Int8模型

选择模型优化,模型格式选择onnx格式上传模型。

选择芯片型号以及目标框架,这里我们选择QCS8550+Qnn2.31。

使用Netron查看模型结构,进行输入输出的填写。

如上图output节点由Mul和Sigmod两个节点Concat而成,因此在AIMO中填写这两个节点作为截断,并且开启量化选择数据精度int8,

接下来进行提交即可,转换完成后将目标模型文件下载,解压缩后其中的.bin.aidem文件即为模型文件:

3.使用Aidlite推理Yolov8_Qnn_int8模型

检查aidlux环境中的aidlite版本是否与我们转换模型时选择的Qnn版本一致,终端执行:

sudo aid-pkg installed 

确定Qnn231已安装。

推理代码如下:

import time
import numpy as np
import cv2
import os
import aidlite
import argparse

coco_class = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
              'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
              'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
              'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
              'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
              'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
              'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
              'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
              'hair drier', 'toothbrush']

colors = {name: [np.random.randint(0, 255) for _ in range(3)] for i, name in enumerate(coco_class)}


def xywh2xyxy(x):
    '''
    Box (center x, center y, width, height) to (x1, y1, x2, y2)
    '''
    y = np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    return y


def xyxy2xywh(box):
    '''
    Box (left_top x, left_top y, right_bottom x, right_bottom y) to (left_top x, left_top y, width, height)
    '''
    box[:, 2:] = box[:, 2:] - box[:, :2]
    return box


def NMS(dets, thresh):
    '''
    单类NMS算法
    dets.shape = (N, 5), (left_top x, left_top y, right_bottom x, right_bottom y, Scores)
    '''
    dets = np.array(dets)
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    areas = (y2 - y1 + 1) * (x2 - x1 + 1)
    scores = dets[:, 4]
    keep = []
    index = scores.argsort()[::-1]
    while index.size > 0:
        i = index[0]  # every time the first is the biggst, and add it directly
        keep.append(i)
        x11 = np.maximum(x1[i], x1[index[1:]])  # calculate the points of overlap
        y11 = np.maximum(y1[i], y1[index[1:]])
        x22 = np.minimum(x2[i], x2[index[1:]])
        y22 = np.minimum(y2[i], y2[index[1:]])
        w = np.maximum(0, x22 - x11 + 1)  # the weights of overlap
        h = np.maximum(0, y22 - y11 + 1)  # the height of overlap
        overlaps = w * h
        ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
        idx = np.where(ious <= 0.005)[0]
        index = index[idx + 1]  # because index start from 1
    return dets[keep]


def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
    # Resize and pad image while meeting stride-multiple constraints
    shape = img.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better test mAP)
        r = min(r, 1.0)

    # Compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
    if auto:  # minimum rectangle
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
    elif scaleFill:  # stretch
        dw, dh = 0.0, 0.0
        new_unpad = (new_shape[1], new_shape[0])
        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return img, ratio, (dw, dh)


def preprocess_img(img, target_shape, means=[0, 0, 0], stds=[255, 255, 255]):
    '''
    图像预处理:
    target_shape: 目标shape
    div_num: 归一化除数
    means: len(means)==图像通道数,通道均值, None不进行zscore
    stds: len(stds)==图像通道数,通道方差, None不进行zscore
    '''

    # img_processed = cv2.imread(img_path)
    img_processed = np.copy(img)
    # resize
    [height, width, _] = img_processed.shape
    length = max((height, width))
    scale = length / target_shape
    ratio = [scale, scale]
    image = np.zeros((length, length, 3), np.uint8)
    image[0:height, 0:width] = img_processed
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    img_input = cv2.resize(image, (target_shape, target_shape))
    print("image.shape==", image.shape)

    # z-score
    img_processed = (img_processed - means) / stds
    img_processed = img_processed.astype(np.float32)

    return img_processed, ratio


def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
    # Rescale coords (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    coords[:, [0, 2]] -= pad[0]  # x padding
    coords[:, [1, 3]] -= pad[1]  # y padding
    coords[:, :4] /= gain
    clip_coords(coords, img0_shape)
    return coords


def clip_coords(boxes, img_shape):
    # Clip bounding xyxy bounding boxes to image shape (height, width)
    boxes[:, 0].clip(0, img_shape[1], out=boxes[:, 0])  # x1
    boxes[:, 1].clip(0, img_shape[0], out=boxes[:, 1])  # y1
    boxes[:, 2].clip(0, img_shape[1], out=boxes[:, 2])  # x2
    boxes[:, 3].clip(0, img_shape[0], out=boxes[:, 3])  # y2


def postprocess(outputs, ratio, conf_threshold=0.5, nms_threshold=0.45):
    rows = outputs.shape[0]
    boxes = []
    scores = []
    class_ids = []
    for i in range(rows):
        classes_scores = outputs[i][4:]
        (minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores)
        if maxScore >= conf_threshold:
            box = [
                outputs[i][0] - (0.5 * outputs[i][2]), outputs[i][1] - (0.5 * outputs[i][3]),
                outputs[i][2], outputs[i][3]]
            boxes.append(box)
            scores.append(maxScore)
            class_ids.append(maxClassIndex)

    result_boxes = cv2.dnn.NMSBoxes(boxes, scores, score_threshold=conf_threshold, nms_threshold=nms_threshold, eta=0.5)
    result_boxes = result_boxes.reshape(-1)
    new_bboxes = []
    new_scores = []
    new_class_ids = []
    for i in range(len(result_boxes)):
        index = result_boxes[i]
        bbox = boxes[index]
        x, y, w, h = float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3])
        new_bboxes.append([round(x * ratio[0]), round(y * ratio[1]), round(w * ratio[0]), round(h * ratio[1])])
        new_scores.append(scores[index])
        new_class_ids.append(class_ids[index])

    new_scores = np.expand_dims(new_scores, 1)
    new_class_ids = np.expand_dims(new_class_ids, 1)

    boxes = np.concatenate((new_bboxes, new_scores), axis=1)
    boxes = np.concatenate((boxes, new_class_ids), axis=1)

    return boxes


def draw_res(img, boxes):
    '''
    检测结果绘制
    :param img: 图像数组
    :param det_pred: 所有检测框,
    '''
    img = img.astype(np.uint8)
    for i, [x, y, w, h, scores, class_ids] in enumerate(boxes):
        x = int(x)
        y = int(y)
        w = int(w)
        h = int(h)
        name = coco_class[int(class_ids)]
        print(i + 1, [x, y, w, h], round(scores, 4), name)
        label = f'{name} ({scores:.2f})'
        W, H = cv2.getTextSize(label, 0, fontScale=1, thickness=2)[0]  # 文字的宽高
        color = colors[name]
        cv2.rectangle(img, (x, y), (int(x + w), int(y + h)), color, thickness=2)
        cv2.rectangle(img, (x, int(y - H)), (int(x + W / 2), y), (0, 255,), -1, cv2.LINE_AA)  # 文字矩形框
        cv2.putText(img, label, (x, int(y) - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
    return img


def main(args):
    print("Start image inference ... ...")

    # 初始化模型部分与原代码相同
    size = 640
    config = aidlite.Config.create_instance()
    if config is None:
        print("Create config failed !")
        return False

    config.implement_type = aidlite.ImplementType.TYPE_LOCAL
    if args.model_type.lower() == "qnn":
        config.framework_type = aidlite.FrameworkType.TYPE_QNN231 # 指定Qnn版本
    elif args.model_type.lower() == "snpe2" or args.model_type.lower() == "snpe":
        config.framework_type = aidlite.FrameworkType.TYPE_SNPE2

    config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
    config.is_quantify_model = 1

    model = aidlite.Model.create_instance(args.target_model)
    if model is None:
        print("Create model failed !")
        return False
    input_shapes = [[1, size, size, 3]]
    output_shapes = [[1, 4, 8400], [1, 80, 8400]]
    model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
                               output_shapes, aidlite.DataType.TYPE_FLOAT32)

    interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
    if interpreter is None:
        print("build_interpretper_from_model_and_config failed !")
        return None
    result = interpreter.init()
    if result != 0:
        print(f"interpreter init failed !")
        return False
    result = interpreter.load_model()
    if result != 0:
        print("interpreter load model failed !")
        return False
    print("detect model load success!")

    # 读取图片
    img = cv2.imread(args.image_path)
    if img is None:
        print("Error: Could not open image file")
        return False

    # 图片预处理
    img_processed = np.copy(img)
    [h, w, _] = img_processed.shape
    length = max((h, w))
    scale = length / size
    ratio = [scale, scale]
    image = np.zeros((length, length, 3), np.uint8)
    image[0:h, 0:w] = img_processed
    img_input = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    img_input = cv2.resize(img_input, (size, size))

    mean_data = [0, 0, 0]
    std_data = [255, 255, 255]
    img_input = (img_input - mean_data) / std_data  # HWC
    img_input = img_input.astype(np.float32)

    # 设置输入tensor
    result = interpreter.set_input_tensor(0, img_input.data)
    if result != 0:
        print("interpreter set_input_tensor() failed")
        return False

    # 只计算模型推理的时间
    t1 = time.time()
    result = interpreter.invoke()
    t2 = time.time()
    
    if result != 0:
        print("interpreter invoke() failed")
        return False

    # 获取输出结果
    qnn_local = interpreter.get_output_tensor(0).reshape(*output_shapes[0])
    qnn_conf = interpreter.get_output_tensor(1).reshape(*output_shapes[1])

    # 计算处理时间
    inference_time = (t2 - t1) * 1000  # 毫秒
    print(f"Model inference time: {inference_time:.2f} ms")

    # 后处理
    qnn_result = np.concatenate((qnn_local, qnn_conf), axis=1)
    qnn_result = qnn_result.transpose(0, 2, 1)
    qnn_result = qnn_result[0]

    detect = postprocess(qnn_result, ratio, conf_threshold=0.5, nms_threshold=0.45)
    print(f"Detected {len(detect)} targets in the image")

    # 画图
    res_img = draw_res(img, list(detect))

    # 添加处理时间文本
    cv2.putText(res_img, f"Inference Time: {inference_time:.2f} ms", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # 保存结果图片
    cv2.imwrite('output.jpg', res_img)
    print("Output image saved as 'output.jpg'")

    # 释放资源
    result = interpreter.destory()


def parser_args():
    parser = argparse.ArgumentParser(description="Run image inference benchmarks")
    parser.add_argument('--target_model', type=str,
                        default='/home/aidlux/yolov8/8550_models/cutoff_yolov8l_qcs8550_w8a8.qnn231.ctx.bin',
                        help="inference model path")
    parser.add_argument('--image_path', type=str, default='bus.jpg', help="Input image path")
    parser.add_argument('--model_type', type=str, default='QNN', help="run backend")
    args = parser.parse_args()
    return args


if __name__ == "__main__":
    args = parser_args()
    main(args)

QCS8550以及QCS6490对Yolov8目标检测推理速度(推理100次取平均时间):

模型 尺寸 (像素) QCS8550推理速度 NPU QNN (ms) QCS6490推理速度 NPU QNN (ms)
YOLOv8n 640 1.66 4.66
YOLOv8s 640 2.38 7.59
YOLOv8m 640 4.65 17.10
YOLOv8l 640 8.42 29.55
YOLOv8x 640 13.13 46.45
1 个赞