From 767ee30f217e72797df6b018417234bf8b3f7b69 Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Fri, 19 Dec 2025 21:22:17 -0800
Subject: [PATCH] ZImageFunControlNet: Fix mask concatenation in --gpu-only
 (#11421)

This operation trades in latents which in --gpu-only may be out of the GPU
The two VAE results will follow the --gpu-only defined behaviour so follow
the inpaint image device when calculating the mask in this path.
---
 comfy_extras/nodes_model_patch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy_extras/nodes_model_patch.py b/comfy_extras/nodes_model_patch.py
index 2a0cfcf18..1355b3c93 100644
--- a/comfy_extras/nodes_model_patch.py
+++ b/comfy_extras/nodes_model_patch.py
@@ -348,7 +348,7 @@ class ZImageControlPatch:
             if self.mask is None:
                 mask_ = torch.zeros_like(inpaint_image_latent)[:, :1]
             else:
-                mask_ = comfy.utils.common_upscale(self.mask.view(self.mask.shape[0], -1, self.mask.shape[-2], self.mask.shape[-1]).mean(dim=1, keepdim=True), inpaint_image_latent.shape[-1], inpaint_image_latent.shape[-2], "nearest", "center")
+                mask_ = comfy.utils.common_upscale(self.mask.view(self.mask.shape[0], -1, self.mask.shape[-2], self.mask.shape[-1]).mean(dim=1, keepdim=True).to(device=inpaint_image_latent.device), inpaint_image_latent.shape[-1], inpaint_image_latent.shape[-2], "nearest", "center")
 
             if latent_image is None:
                 latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(torch.ones_like(inpaint_image) * 0.5))