From 39c12cf789fde4cd49fec7f3f7a4479dcee208a9 Mon Sep 17 00:00:00 2001 From: Peter Willemsen Date: Mon, 1 Jun 2026 19:46:26 +0200 Subject: [PATCH] Avoid ROCm Conv3d crash in Qwen35 vision patch embedding by using equivalent linear projection --- comfy/text_encoders/qwen35.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/comfy/text_encoders/qwen35.py b/comfy/text_encoders/qwen35.py index 416ce9d18..196a5a3ec 100644 --- a/comfy/text_encoders/qwen35.py +++ b/comfy/text_encoders/qwen35.py @@ -452,6 +452,14 @@ class Qwen35VisionPatchEmbed(nn.Module): def forward(self, x): x = x.view(-1, self.in_channels, self.temporal_patch_size, self.patch_size, self.patch_size) + if ( + comfy.model_management.is_amd() + and x.is_cuda + and x.dtype in (torch.float16, torch.bfloat16) + ): + # This Conv3d is a full-patch projection, equivalent to Linear. + # Avoid the ROCm/MIOpen reduced-precision Conv3d kernel that can segfault. + return F.linear(x.flatten(1), self.proj.weight.flatten(1), self.proj.bias) return self.proj(x).view(-1, self.embed_dim)