风的彷徨 · 2025年11月09日 · 山东

【“星睿O6”AI PC开发套件评测】MI-GAN图片重绘O6-NPU部署全过程与CPU性能对比

部署NPU需要申请CIX早鸟计划获取SDK

非常感谢Molly提供的编译好的新版本Debian(Orz),Radxa目前提供的镜像无法使用25Q3的Cix SDK,需要手动编译新版本系统。

早鸟计划

MI-GAN简介

MI-GAN (全称 “A Simple Baseline for Image Inpainting on Mobile Devices”)是一个针对图像修复/缺失区域填补(image inpainting)任务的生成对抗网络(GAN)模型,由 Picsart‑AI‑Research 团队于 ICCV 2023 提出。其目标是:在 移动设备或者资源受限的环境下,仍能实现较高质量的图像修复,同时保持模型较少的参数量和较快的推理速度。

teaser.png

Torch ➡️ onnx

MI-GAN提供了两种分辨率的模型,考虑到MI-GAN本身比较轻量,且O6具有的澎湃算力,我们直接选择512分辨率的。

Picsart-AI-Research/MI-GAN: [ICCV 2023] MI-GAN: A Simple Baseline for Image Inpainting on Mobile Devices

git clone https://github.com/Picsart-AI-Research/MI-GAN.git
pip install -r requirements.txt

然后从migan - Google 云端硬盘下载模型,放在项目根目录下的models文件夹里。

权重下载好后,执行

python -m scripts.demo \
    --model-name migan-512 \
    --model-path ./models/migan_512_places2.pt \
    --images-dir ./examples/places2_512_object/images \
    --masks-dir ./examples/places2_512_object/masks \
    --output-dir ./examples/places2_512_object/results/migan \
    --device cuda \
    --invert-mask

看看效果

image-20251106211318778.png

image-20251106211326912.png

效果看上去还不错?

在项目根目录下建一个export.py导出脚本(请使用torch2.8及以前的版本)

import argparse
import os
import warnings
from glob import glob
from pathlib import Path

import cv2
import numpy as np
import pickle
import PIL.Image
import pnnx
import torch
from PIL import Image
from tqdm import tqdm

from lib.model_zoo.migan_inference import Generator as MIGAN

model_list = [
    "migan_512"
    "migan_256",
]

target_model = "migan_512"

model = MIGAN(resolution=256 if target_model == "migan_256" else 512)
model_path = "./models/migan_256_places2.pt" if target_model == "migan_256" else "./models/migan_512_places2.pt"
model.load_state_dict(torch.load(model_path))
model.eval()

torch.onnx.export(model,
                    (torch.rand(1, 4, 256, 256) if target_model == "migan_256" else torch.rand(1, 4, 512, 512)),
                  f"{target_model}_inference.onnx",
                    opset_version=11,
                    input_names=["in0"],
                    output_names=["out0"],
)

产生以下这些文件:

  • migan_512_inference.onnx 导出的onnx模型(重要)

onnx ➡️ cix 🚀

准备校准集

import os
import cv2
import numpy as np
from tqdm import tqdm

resolution = 512

def clamp01(v):
    return np.clip(v, 0.0, 1.0)

def preprocess_input(bgrImage, rawMask):
    if bgrImage is None or rawMask is None:
        raise ValueError("Empty image or mask")
    if bgrImage.ndim != 3 or bgrImage.shape[2] not in (3, 4):
        raise ValueError("Unsupported image channels")

    H0, W0 = bgrImage.shape[:2]

    # --- 等比缩放到最长边=resolution ---
    if W0 >= H0:
        W1 = resolution
        H1 = max(1, round(H0 * (resolution / W0)))
    else:
        H1 = resolution
        W1 = max(1, round(W0 * (resolution / H0)))

    imgAspectBGR = cv2.resize(bgrImage, (W1, H1), interpolation=cv2.INTER_CUBIC)

    # --- mask resize + 转灰度 ---
    if rawMask.ndim == 2:
        maskGrayAspect = cv2.resize(rawMask, (W1, H1), interpolation=cv2.INTER_NEAREST)
    else:
        tmp = cv2.resize(rawMask, (W1, H1), interpolation=cv2.INTER_NEAREST)
        if tmp.shape[2] == 3:
            maskGrayAspect = cv2.cvtColor(tmp, cv2.COLOR_BGR2GRAY)
        else:
            maskGrayAspect = cv2.cvtColor(tmp, cv2.COLOR_BGRA2GRAY)

    # --- 白=洞 ---
    _, holeMask255Aspect = cv2.threshold(maskGrayAspect, 127, 255, cv2.THRESH_BINARY)
    knownMask255Aspect = cv2.bitwise_not(holeMask255Aspect)

    # --- 拉伸到方形 512×512 ---
    imgAspectRGB = cv2.cvtColor(imgAspectBGR, cv2.COLOR_BGR2RGB)
    imgWarpRGB = cv2.resize(imgAspectRGB, (resolution, resolution), interpolation=cv2.INTER_CUBIC)
    knownWarp255 = cv2.resize(knownMask255Aspect, (resolution, resolution), interpolation=cv2.INTER_NEAREST)

    # --- 构造 [4, H, W] ---
    H, W = resolution, resolution
    input_tensor = np.zeros((4, H, W), dtype=np.float32)

    known = (knownWarp255 > 0).astype(np.float32)
    input_tensor[0, :, :] = known - 0.5

    img_f = imgWarpRGB.astype(np.float32) / 255.0 * 2.0 - 1.0
    for c in range(3):
        input_tensor[c + 1, :, :] = img_f[:, :, c] * known

    return input_tensor


def generate_calibration_npy(image_dir, mask_dir, output_path="calibration.npy"):
    image_files = sorted([f for f in os.listdir(image_dir) if f.lower().endswith(".png")])
    all_inputs = []

    for name in tqdm(image_files, desc="Processing"):
        img_path = os.path.join(image_dir, name)
        mask_path = os.path.join(mask_dir, name)
        if not os.path.exists(mask_path):
            print(f"Warning: mask not found for {name}, skipped")
            continue

        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        mask = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)
        if img is None or mask is None:
            print(f"Skip invalid {name}")
            continue

        input_tensor = preprocess_input(img, mask)
        all_inputs.append(input_tensor)

    if not all_inputs:
        raise RuntimeError("No valid image-mask pairs found.")

    npy_data = np.stack(all_inputs, axis=0).astype(np.float32)
    np.save(output_path, npy_data)
    print(f"✅ Saved {npy_data.shape[0]} samples to {output_path}")
    print(f"Shape: {npy_data.shape}  dtype: {npy_data.dtype}")


if __name__ == "__main__":
    generate_calibration_npy(
        "examples/places2_512_object/images",
        "examples/places2_512_object/masks",
        "calibration.npy"
    )

获得calibration.npy

编译cix

首先我们需要用pip装一个CixBuilder(需要Python3.10 && Linux && x86_64)

pip install CixBuilder-6.1.3407.2-cp310-none-linux_x86_64.whl

然后准备一个cfg文件

[Common]
mode = build

[Parser]
model_type = onnx
model_name = migan_512
detection_postprocess =
model_domain = image_segmentation
input_model = ./migan_512_inference.onnx
output_dir = ./
input_shape = [1, 4, 512, 512]
input = in0
output = out0

[Optimizer]
calibration_data = calibration.npy
calibration_batch_size = 1
metric_batch_size = 1
output_dir = ./
dataset = NumpyDataset
save_statistic_info = True
cast_dtypes_for_lib = True
quantize_method_for_activation = per_tensor_asymmetric
quantize_method_for_weight = per_channel_symmetric_restricted_range

[GBuilder]
target = X2_1204MP3
outputs = migan_512_inference.cix
profile = True
tiling = fps

执行

cixbuild migan.cfg

libaipu_simulator_x2.so 错误解决

感谢@nihui 提供解决方法

【“星睿O6”评测】RVM人像分割torch➡️ncnn-CPU/GPU和o6-NPU部署全过程 - 极术社区 - 连接开发者与智能计算生态

参考

export LD_LIBRARY_PATH=/home/ice/.local/lib/python3.10/site-packages/AIPUBuilder/simulator-lib
cixbuild migan.cfg 

正常情况下会输出

build success.......
Total errors: 0,  warnings: 373

部署到Radxa O6

安装/更新驱动

拷贝cix提供的deb文件到O6,并运行(4月份发布的Radxa发布的Debian千万不要用这个升级,请自行完成系统编译再用)

sudo apt install ./cix-npu-driver_2.0.1_arm64.deb
sudo apt install ./cix-noe-umd_2.0.2_arm64.deb

安装依赖包

pip install opencv-python matplotlib --break-system-packages

测试代码

from libnoe import *
import numpy as np
import struct
import time
from typing import Union
import cv2
import matplotlib.pyplot as plt
import NOE_Engine.NOE_Engine as NOE_Engine

resolution = 512

def clamp01(v):
    return np.clip(v, 0.0, 1.0)

def preprocess_input(bgrImage, rawMask):
    if bgrImage is None or rawMask is None:
        raise ValueError("Empty image or mask")
    if bgrImage.ndim != 3 or bgrImage.shape[2] not in (3, 4):
        raise ValueError("Unsupported image channels")

    H0, W0 = bgrImage.shape[:2]

    # --- 等比缩放到最长边=resolution ---
    if W0 >= H0:
        W1 = resolution
        H1 = max(1, round(H0 * (resolution / W0)))
    else:
        H1 = resolution
        W1 = max(1, round(W0 * (resolution / H0)))

    imgAspectBGR = cv2.resize(bgrImage, (W1, H1), interpolation=cv2.INTER_CUBIC)

    # --- mask resize + 转灰度 ---
    if rawMask.ndim == 2:
        maskGrayAspect = cv2.resize(rawMask, (W1, H1), interpolation=cv2.INTER_NEAREST)
    else:
        tmp = cv2.resize(rawMask, (W1, H1), interpolation=cv2.INTER_NEAREST)
        if tmp.shape[2] == 3:
            maskGrayAspect = cv2.cvtColor(tmp, cv2.COLOR_BGR2GRAY)
        else:
            maskGrayAspect = cv2.cvtColor(tmp, cv2.COLOR_BGRA2GRAY)

    # --- 白=洞 ---
    _, holeMask255Aspect = cv2.threshold(maskGrayAspect, 127, 255, cv2.THRESH_BINARY)
    knownMask255Aspect = cv2.bitwise_not(holeMask255Aspect)

    # --- 拉伸到方形 512×512 ---
    imgAspectRGB = cv2.cvtColor(imgAspectBGR, cv2.COLOR_BGR2RGB)
    imgWarpRGB = cv2.resize(imgAspectRGB, (resolution, resolution), interpolation=cv2.INTER_CUBIC)
    knownWarp255 = cv2.resize(knownMask255Aspect, (resolution, resolution), interpolation=cv2.INTER_NEAREST)

    # --- 构造 [4, H, W] ---
    H, W = resolution, resolution
    input_tensor = np.zeros((4, H, W), dtype=np.float32)

    known = (knownWarp255 > 0).astype(np.float32)
    input_tensor[0, :, :] = known - 0.5

    img_f = imgWarpRGB.astype(np.float32) / 255.0 * 2.0 - 1.0
    for c in range(3):
        input_tensor[c + 1, :, :] = img_f[:, :, c] * known

    return input_tensor

if __name__ == "__main__":
    image = cv2.imread("1.png", cv2.IMREAD_COLOR)
    mask = cv2.imread("1_mask.png", cv2.IMREAD_UNCHANGED)

    input_tensor = preprocess_input(image, mask)
    engine = NOE_Engine.EngineInfer("migan_512_inference.cix")
    output = engine.forward(input_tensor)
    inpaint = output[0].reshape((3, resolution, resolution))

    inpaint = (clamp01((inpaint + 1) / 2) * 255).astype(np.uint8)
    inpaint = inpaint.transpose(1, 2, 0)
    inpaint = cv2.resize(inpaint, image.shape[:2][::-1])
    inpaint = cv2.cvtColor(inpaint, cv2.COLOR_BGR2RGB)

    # 通过掩码将修复区域融合回原图
    mask_3ch = mask / 255.0  # 归一化到0-1范围
    result = image * (1 - mask_3ch) + inpaint * mask_3ch
    result = result.astype(np.uint8)

    plt.figure(figsize=(12, 4))
    plt.subplot(1, 3, 1)
    plt.title("Original Image")
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.axis('off')
    plt.subplot(1, 3, 2)
    plt.title("Inpainted Image")
    plt.imshow(cv2.cvtColor(inpaint, cv2.COLOR_BGR2RGB))
    plt.axis('off')
    plt.subplot(1, 3, 3)
    plt.title("Final Result")
    plt.imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
    plt.axis('off')
    plt.tight_layout()
    plt.show()

    engine.clean()

测试效果

image-20251109180520448.png

擦除效果非常好!

性能&精度对比

CPU(OnnxRuntime)NPU(Zhouyi X2)
单次推理延迟946.95ms249.2ms

image-20251109210112957.png

精度差距很很低

推荐阅读
关注数
2
内容数
4
目录
极术微信服务号
关注极术微信号
实时接收点赞提醒和评论通知
安谋科技学堂公众号
关注安谋科技学堂
实时获取安谋科技及 Arm 教学资源
安谋科技招聘公众号
关注安谋科技招聘
实时获取安谋科技中国职位信息