部署NPU需要申请CIX早鸟计划获取SDK
非常感谢Molly提供的编译好的新版本Debian(Orz),Radxa目前提供的镜像无法使用25Q3的Cix SDK,需要手动编译新版本系统。
MI-GAN简介
MI-GAN (全称 “A Simple Baseline for Image Inpainting on Mobile Devices”)是一个针对图像修复/缺失区域填补(image inpainting)任务的生成对抗网络(GAN)模型,由 Picsart‑AI‑Research 团队于 ICCV 2023 提出。其目标是:在 移动设备或者资源受限的环境下,仍能实现较高质量的图像修复,同时保持模型较少的参数量和较快的推理速度。
Torch ➡️ onnx
MI-GAN提供了两种分辨率的模型,考虑到MI-GAN本身比较轻量,且O6具有的澎湃算力,我们直接选择512分辨率的。
git clone https://github.com/Picsart-AI-Research/MI-GAN.git
pip install -r requirements.txt然后从migan - Google 云端硬盘下载模型,放在项目根目录下的models文件夹里。
权重下载好后,执行
python -m scripts.demo \
--model-name migan-512 \
--model-path ./models/migan_512_places2.pt \
--images-dir ./examples/places2_512_object/images \
--masks-dir ./examples/places2_512_object/masks \
--output-dir ./examples/places2_512_object/results/migan \
--device cuda \
--invert-mask看看效果
效果看上去还不错?
在项目根目录下建一个export.py导出脚本(请使用torch2.8及以前的版本)
import argparse
import os
import warnings
from glob import glob
from pathlib import Path
import cv2
import numpy as np
import pickle
import PIL.Image
import pnnx
import torch
from PIL import Image
from tqdm import tqdm
from lib.model_zoo.migan_inference import Generator as MIGAN
model_list = [
"migan_512"
"migan_256",
]
target_model = "migan_512"
model = MIGAN(resolution=256 if target_model == "migan_256" else 512)
model_path = "./models/migan_256_places2.pt" if target_model == "migan_256" else "./models/migan_512_places2.pt"
model.load_state_dict(torch.load(model_path))
model.eval()
torch.onnx.export(model,
(torch.rand(1, 4, 256, 256) if target_model == "migan_256" else torch.rand(1, 4, 512, 512)),
f"{target_model}_inference.onnx",
opset_version=11,
input_names=["in0"],
output_names=["out0"],
)产生以下这些文件:
- migan_512_inference.onnx 导出的onnx模型(重要)
onnx ➡️ cix 🚀
准备校准集
import os
import cv2
import numpy as np
from tqdm import tqdm
resolution = 512
def clamp01(v):
return np.clip(v, 0.0, 1.0)
def preprocess_input(bgrImage, rawMask):
if bgrImage is None or rawMask is None:
raise ValueError("Empty image or mask")
if bgrImage.ndim != 3 or bgrImage.shape[2] not in (3, 4):
raise ValueError("Unsupported image channels")
H0, W0 = bgrImage.shape[:2]
# --- 等比缩放到最长边=resolution ---
if W0 >= H0:
W1 = resolution
H1 = max(1, round(H0 * (resolution / W0)))
else:
H1 = resolution
W1 = max(1, round(W0 * (resolution / H0)))
imgAspectBGR = cv2.resize(bgrImage, (W1, H1), interpolation=cv2.INTER_CUBIC)
# --- mask resize + 转灰度 ---
if rawMask.ndim == 2:
maskGrayAspect = cv2.resize(rawMask, (W1, H1), interpolation=cv2.INTER_NEAREST)
else:
tmp = cv2.resize(rawMask, (W1, H1), interpolation=cv2.INTER_NEAREST)
if tmp.shape[2] == 3:
maskGrayAspect = cv2.cvtColor(tmp, cv2.COLOR_BGR2GRAY)
else:
maskGrayAspect = cv2.cvtColor(tmp, cv2.COLOR_BGRA2GRAY)
# --- 白=洞 ---
_, holeMask255Aspect = cv2.threshold(maskGrayAspect, 127, 255, cv2.THRESH_BINARY)
knownMask255Aspect = cv2.bitwise_not(holeMask255Aspect)
# --- 拉伸到方形 512×512 ---
imgAspectRGB = cv2.cvtColor(imgAspectBGR, cv2.COLOR_BGR2RGB)
imgWarpRGB = cv2.resize(imgAspectRGB, (resolution, resolution), interpolation=cv2.INTER_CUBIC)
knownWarp255 = cv2.resize(knownMask255Aspect, (resolution, resolution), interpolation=cv2.INTER_NEAREST)
# --- 构造 [4, H, W] ---
H, W = resolution, resolution
input_tensor = np.zeros((4, H, W), dtype=np.float32)
known = (knownWarp255 > 0).astype(np.float32)
input_tensor[0, :, :] = known - 0.5
img_f = imgWarpRGB.astype(np.float32) / 255.0 * 2.0 - 1.0
for c in range(3):
input_tensor[c + 1, :, :] = img_f[:, :, c] * known
return input_tensor
def generate_calibration_npy(image_dir, mask_dir, output_path="calibration.npy"):
image_files = sorted([f for f in os.listdir(image_dir) if f.lower().endswith(".png")])
all_inputs = []
for name in tqdm(image_files, desc="Processing"):
img_path = os.path.join(image_dir, name)
mask_path = os.path.join(mask_dir, name)
if not os.path.exists(mask_path):
print(f"Warning: mask not found for {name}, skipped")
continue
img = cv2.imread(img_path, cv2.IMREAD_COLOR)
mask = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)
if img is None or mask is None:
print(f"Skip invalid {name}")
continue
input_tensor = preprocess_input(img, mask)
all_inputs.append(input_tensor)
if not all_inputs:
raise RuntimeError("No valid image-mask pairs found.")
npy_data = np.stack(all_inputs, axis=0).astype(np.float32)
np.save(output_path, npy_data)
print(f"✅ Saved {npy_data.shape[0]} samples to {output_path}")
print(f"Shape: {npy_data.shape} dtype: {npy_data.dtype}")
if __name__ == "__main__":
generate_calibration_npy(
"examples/places2_512_object/images",
"examples/places2_512_object/masks",
"calibration.npy"
)
获得calibration.npy
编译cix
首先我们需要用pip装一个CixBuilder(需要Python3.10 && Linux && x86_64)
pip install CixBuilder-6.1.3407.2-cp310-none-linux_x86_64.whl然后准备一个cfg文件
[Common]
mode = build
[Parser]
model_type = onnx
model_name = migan_512
detection_postprocess =
model_domain = image_segmentation
input_model = ./migan_512_inference.onnx
output_dir = ./
input_shape = [1, 4, 512, 512]
input = in0
output = out0
[Optimizer]
calibration_data = calibration.npy
calibration_batch_size = 1
metric_batch_size = 1
output_dir = ./
dataset = NumpyDataset
save_statistic_info = True
cast_dtypes_for_lib = True
quantize_method_for_activation = per_tensor_asymmetric
quantize_method_for_weight = per_channel_symmetric_restricted_range
[GBuilder]
target = X2_1204MP3
outputs = migan_512_inference.cix
profile = True
tiling = fps执行
cixbuild migan.cfglibaipu_simulator_x2.so 错误解决
感谢@nihui 提供解决方法
【“星睿O6”评测】RVM人像分割torch➡️ncnn-CPU/GPU和o6-NPU部署全过程 - 极术社区 - 连接开发者与智能计算生态
参考
export LD_LIBRARY_PATH=/home/ice/.local/lib/python3.10/site-packages/AIPUBuilder/simulator-lib cixbuild migan.cfg
正常情况下会输出
build success.......
Total errors: 0, warnings: 373部署到Radxa O6
安装/更新驱动
拷贝cix提供的deb文件到O6,并运行(4月份发布的Radxa发布的Debian千万不要用这个升级,请自行完成系统编译再用)
sudo apt install ./cix-npu-driver_2.0.1_arm64.deb
sudo apt install ./cix-noe-umd_2.0.2_arm64.deb安装依赖包
pip install opencv-python matplotlib --break-system-packages测试代码
from libnoe import *
import numpy as np
import struct
import time
from typing import Union
import cv2
import matplotlib.pyplot as plt
import NOE_Engine.NOE_Engine as NOE_Engine
resolution = 512
def clamp01(v):
return np.clip(v, 0.0, 1.0)
def preprocess_input(bgrImage, rawMask):
if bgrImage is None or rawMask is None:
raise ValueError("Empty image or mask")
if bgrImage.ndim != 3 or bgrImage.shape[2] not in (3, 4):
raise ValueError("Unsupported image channels")
H0, W0 = bgrImage.shape[:2]
# --- 等比缩放到最长边=resolution ---
if W0 >= H0:
W1 = resolution
H1 = max(1, round(H0 * (resolution / W0)))
else:
H1 = resolution
W1 = max(1, round(W0 * (resolution / H0)))
imgAspectBGR = cv2.resize(bgrImage, (W1, H1), interpolation=cv2.INTER_CUBIC)
# --- mask resize + 转灰度 ---
if rawMask.ndim == 2:
maskGrayAspect = cv2.resize(rawMask, (W1, H1), interpolation=cv2.INTER_NEAREST)
else:
tmp = cv2.resize(rawMask, (W1, H1), interpolation=cv2.INTER_NEAREST)
if tmp.shape[2] == 3:
maskGrayAspect = cv2.cvtColor(tmp, cv2.COLOR_BGR2GRAY)
else:
maskGrayAspect = cv2.cvtColor(tmp, cv2.COLOR_BGRA2GRAY)
# --- 白=洞 ---
_, holeMask255Aspect = cv2.threshold(maskGrayAspect, 127, 255, cv2.THRESH_BINARY)
knownMask255Aspect = cv2.bitwise_not(holeMask255Aspect)
# --- 拉伸到方形 512×512 ---
imgAspectRGB = cv2.cvtColor(imgAspectBGR, cv2.COLOR_BGR2RGB)
imgWarpRGB = cv2.resize(imgAspectRGB, (resolution, resolution), interpolation=cv2.INTER_CUBIC)
knownWarp255 = cv2.resize(knownMask255Aspect, (resolution, resolution), interpolation=cv2.INTER_NEAREST)
# --- 构造 [4, H, W] ---
H, W = resolution, resolution
input_tensor = np.zeros((4, H, W), dtype=np.float32)
known = (knownWarp255 > 0).astype(np.float32)
input_tensor[0, :, :] = known - 0.5
img_f = imgWarpRGB.astype(np.float32) / 255.0 * 2.0 - 1.0
for c in range(3):
input_tensor[c + 1, :, :] = img_f[:, :, c] * known
return input_tensor
if __name__ == "__main__":
image = cv2.imread("1.png", cv2.IMREAD_COLOR)
mask = cv2.imread("1_mask.png", cv2.IMREAD_UNCHANGED)
input_tensor = preprocess_input(image, mask)
engine = NOE_Engine.EngineInfer("migan_512_inference.cix")
output = engine.forward(input_tensor)
inpaint = output[0].reshape((3, resolution, resolution))
inpaint = (clamp01((inpaint + 1) / 2) * 255).astype(np.uint8)
inpaint = inpaint.transpose(1, 2, 0)
inpaint = cv2.resize(inpaint, image.shape[:2][::-1])
inpaint = cv2.cvtColor(inpaint, cv2.COLOR_BGR2RGB)
# 通过掩码将修复区域融合回原图
mask_3ch = mask / 255.0 # 归一化到0-1范围
result = image * (1 - mask_3ch) + inpaint * mask_3ch
result = result.astype(np.uint8)
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.title("Original Image")
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.subplot(1, 3, 2)
plt.title("Inpainted Image")
plt.imshow(cv2.cvtColor(inpaint, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.subplot(1, 3, 3)
plt.title("Final Result")
plt.imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.tight_layout()
plt.show()
engine.clean()
测试效果
擦除效果非常好!
性能&精度对比
| CPU(OnnxRuntime) | NPU(Zhouyi X2) | |
|---|---|---|
| 单次推理延迟 | 946.95ms | 249.2ms |
精度差距很很低