记录Stable Diffusion webUI TensorRT插件使用过程的报错:
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)
拷贝下面的代码覆盖extensions\stable-diffusion-webui-tensorrt里的export_onnx.py文件,将模型和相关的张量移动到GPU,即可解决。
import os
from modules import sd_hijack, sd_unet
from modules import shared, devices
import torch
def export_current_unet_to_onnx(filename, opset_version=17):
if torch.cuda.is_available():
print("CUDA is available")
else:
print("CUDA is not available")
device = 'cuda' if torch.cuda.is_available() else 'cpu' # 根据CUDA是否可用选择设备
shared.sd_model.model.diffusion_model.to(device)
x = torch.randn(1, 4, 16, 16).to(devices.device, devices.dtype)
timesteps = torch.zeros((1,)).to(devices.device, devices.dtype) + 500
context = torch.randn(1, 77, 768).to(devices.device, devices.dtype)
x = x.to(device)
timesteps = timesteps.to(device)
context = context.to(device)
print(x.device, timesteps.device, context.device)
def disable_checkpoint(self):
if getattr(self, 'use_checkpoint', False) == True:
self.use_checkpoint = False
if getattr(self, 'checkpoint', False) == True:
self.checkpoint = False
shared.sd_model.model.diffusion_model.apply(disable_checkpoint)
sd_unet.apply_unet("None")
sd_hijack.model_hijack.apply_optimizations('None')
os.makedirs(os.path.dirname(filename), exist_ok=True)
with devices.autocast():
torch.onnx.export(
shared.sd_model.model.diffusion_model,
(x, timesteps, context),
filename,
export_params=True,
opset_version=opset_version,
do_constant_folding=True,
input_names=['x', 'timesteps', 'context'],
output_names=['output'],
dynamic_axes={
'x': {0: 'batch_size', 2: 'height', 3: 'width'},
'timesteps': {0: 'batch_size'},
'context': {0: 'batch_size', 1: 'sequence_length'},
'output': {0: 'batch_size'},
},
)
sd_hijack.model_hijack.apply_optimizations()
sd_unet.apply_unet()