Reduce tiled decode peak memory (#13050)

2026-05-25 16:37:23 +08:00 · 2026-03-19 19:29:34 +02:00 · 2026-03-19 19:29:34 +02:00 · fd0261d2bc
commit fd0261d2bc
parent ab14541ef7
1 changed files with 4 additions and 4 deletions
--- a/comfy/utils.py
+++ b/comfy/utils.py
@ -1135,8 +1135,8 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am
                pbar.update(1)
            continue
-        out = torch.zeros([s.shape[0], out_channels] + mult_list_upscale(s.shape[2:]), device=output_device)
+        out = output[b:b+1].zero_()
-        out_div = torch.zeros([s.shape[0], out_channels] + mult_list_upscale(s.shape[2:]), device=output_device)
+        out_div = torch.zeros([s.shape[0], 1] + mult_list_upscale(s.shape[2:]), device=output_device)
        positions = [range(0, s.shape[d+2] - overlap[d], tile[d] - overlap[d]) if s.shape[d+2] > tile[d] else [0] for d in range(dims)]
@ -1151,7 +1151,7 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am
                upscaled.append(round(get_pos(d, pos)))
            ps = function(s_in).to(output_device)
-            mask = torch.ones_like(ps)
+            mask = torch.ones([1, 1] + list(ps.shape[2:]), device=output_device)
            for d in range(2, dims + 2):
                feather = round(get_scale(d - 2, overlap[d - 2]))
@ -1174,7 +1174,7 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am
            if pbar is not None:
                pbar.update(1)
-        output[b:b+1] = out/out_div
+        out.div_(out_div)
    return output
 def tiled_scale(samples, function, tile_x=64, tile_y=64, overlap = 8, upscale_amount = 4, out_channels = 3, output_device="cpu", pbar = None):