解决Stable Diffusion TensorRT转换模型报错cpu and cuda:0! (when checking argument for argume

记录Stable Diffusion webUI TensorRT插件使用过程的报错：

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)

拷贝下面的代码覆盖extensions\stable-diffusion-webui-tensorrt里的export_onnx.py文件，将模型和相关的张量移动到GPU，即可解决。

import os

from modules import sd_hijack, sd_unet
from modules import shared, devices
import torch


def export_current_unet_to_onnx(filename, opset_version=17):
    if torch.cuda.is_available():  
       print("CUDA is available")  
    else:  
        print("CUDA is not available")
    device = 'cuda' if torch.cuda.is_available() else 'cpu'  # 根据CUDA是否可用选择设备  
    shared.sd_model.model.diffusion_model.to(device)
    x = torch.randn(1, 4, 16, 16).to(devices.device, devices.dtype)
    timesteps = torch.zeros((1,)).to(devices.device, devices.dtype) + 500
    context = torch.randn(1, 77, 768).to(devices.device, devices.dtype)
    x = x.to(device)  
    timesteps = timesteps.to(device)  
    context = context.to(device)  
    print(x.device, timesteps.device, context.device)
    def disable_checkpoint(self):
        if getattr(self, 'use_checkpoint', False) == True:
            self.use_checkpoint = False
        if getattr(self, 'checkpoint', False) == True:
            self.checkpoint = False

    shared.sd_model.model.diffusion_model.apply(disable_checkpoint)

    sd_unet.apply_unet("None")
    sd_hijack.model_hijack.apply_optimizations('None')

    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with devices.autocast():
        torch.onnx.export(
            shared.sd_model.model.diffusion_model,
            (x, timesteps, context),
            filename,
            export_params=True,
            opset_version=opset_version,
            do_constant_folding=True,
            input_names=['x', 'timesteps', 'context'],
            output_names=['output'],
            dynamic_axes={
                'x': {0: 'batch_size', 2: 'height', 3: 'width'},
                'timesteps': {0: 'batch_size'},
                'context': {0: 'batch_size', 1: 'sequence_length'},
                'output': {0: 'batch_size'},
            },
        )

    sd_hijack.model_hijack.apply_optimizations()
    sd_unet.apply_unet()