diff --git a/comfy/cli_args.py b/comfy/cli_args.py index 29e5fb159..dc1597d88 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -42,7 +42,7 @@ parser.add_argument("--auto-launch", action="store_true", help="Automatically la parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.") cm_group = parser.add_mutually_exclusive_group() cm_group.add_argument("--cuda-malloc", action="store_true", help="Enable cudaMallocAsync (enabled by default for torch 2.0 and up).") -cm_group.add_argument("--disable-cuda-malloc", action="store_true", help="Enable cudaMallocAsync.") +cm_group.add_argument("--disable-cuda-malloc", action="store_true", help="Disable cudaMallocAsync.") parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.") diff --git a/comfy/k_diffusion/external.py b/comfy/k_diffusion/external.py index 7335d56c4..c1a137d9c 100644 --- a/comfy/k_diffusion/external.py +++ b/comfy/k_diffusion/external.py @@ -91,7 +91,9 @@ class DiscreteSchedule(nn.Module): return log_sigma.exp() def predict_eps_discrete_timestep(self, input, t, **kwargs): - sigma = self.t_to_sigma(t.round()) + if t.dtype != torch.int64 and t.dtype != torch.int32: + t = t.round() + sigma = self.t_to_sigma(t) input = input * ((utils.append_dims(sigma, input.ndim) ** 2 + 1.0) ** 0.5) return (input - self(input, sigma, **kwargs)) / utils.append_dims(sigma, input.ndim)