Merge a6ae08b7ee into 6592bffc60

seeds_2: add phi_2 variant and sampler node (#11309 )
* Add phi_2 solver type to seeds_2 * Add sampler node of seeds_2
2025-12-15 01:07:03 +08:00 · 2025-12-14 13:06:52 +08:00 · 2025-12-14 00:03:29 -05:00 · 2025-12-13 11:22:25 +08:00 · 2025-12-09 23:06:53 +08:00 · 2025-12-09 22:52:32 +08:00
6 changed files with 800 additions and 235 deletions
--- a/comfy/k_diffusion/sampling.py
+++ b/comfy/k_diffusion/sampling.py
@ -1557,10 +1557,13 @@ def sample_er_sde(model, x, sigmas, extra_args=None, callback=None, disable=None


@torch.no_grad()
-def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=0.5):
+def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=0.5, solver_type="phi_1"):
    """SEEDS-2 - Stochastic Explicit Exponential Derivative-free Solvers (VP Data Prediction) stage 2.
    arXiv: https://arxiv.org/abs/2305.14267 (NeurIPS 2023)
    """
+    if solver_type not in {"phi_1", "phi_2"}:
+        raise ValueError("solver_type must be 'phi_1' or 'phi_2'")
+
    extra_args = {} if extra_args is None else extra_args
    seed = extra_args.get("seed", None)
    noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler
@ -1600,8 +1603,14 @@ def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=Non
        denoised_2 = model(x_2, sigma_s_1 * s_in, **extra_args)

        # Step 2
-        denoised_d = torch.lerp(denoised, denoised_2, fac)
-        x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x - alpha_t * ei_h_phi_1(-h_eta) * denoised_d
+        if solver_type == "phi_1":
+            denoised_d = torch.lerp(denoised, denoised_2, fac)
+            x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x - alpha_t * ei_h_phi_1(-h_eta) * denoised_d
+        elif solver_type == "phi_2":
+            b2 = ei_h_phi_2(-h_eta) / r
+            b1 = ei_h_phi_1(-h_eta) - b2
+            x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x - alpha_t * (b1 * denoised + b2 * denoised_2)
+
        if inject_noise:
            segment_factor = (r - 1) * h * eta
            sde_noise = sde_noise * segment_factor.exp()
--- a/comfy/sampler_helpers.py
+++ b/comfy/sampler_helpers.py
@ -122,20 +122,21 @@ def estimate_memory(model, noise_shape, conds):
    minimum_memory_required = model.model.memory_required([noise_shape[0]] + list(noise_shape[1:]), cond_shapes=cond_shapes_min)
    return memory_required, minimum_memory_required

-def prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None):
+def prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None, skip_load_model=False):
    executor = comfy.patcher_extension.WrapperExecutor.new_executor(
        _prepare_sampling,
        comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.PREPARE_SAMPLING, model_options, is_model_options=True)
    )
-    return executor.execute(model, noise_shape, conds, model_options=model_options)
+    return executor.execute(model, noise_shape, conds, model_options=model_options, skip_load_model=skip_load_model)

-def _prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None):
+def _prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None, skip_load_model=False):
    real_model: BaseModel = None
    models, inference_memory = get_additional_models(conds, model.model_dtype())
    models += get_additional_models_from_model_options(model_options)
    models += model.get_nested_additional_models()  # TODO: does this require inference_memory update?
    memory_required, minimum_memory_required = estimate_memory(model, noise_shape, conds)
-    comfy.model_management.load_models_gpu([model] + models, memory_required=memory_required + inference_memory, minimum_memory_required=minimum_memory_required + inference_memory)
+    models_list = [model] if not skip_load_model else []
+    comfy.model_management.load_models_gpu(models_list + models, memory_required=memory_required + inference_memory, minimum_memory_required=minimum_memory_required + inference_memory)
    real_model = model.model

    return real_model, conds, models
--- a/comfy_extras/nodes_custom_sampler.py
+++ b/comfy_extras/nodes_custom_sampler.py
@ -659,6 +659,31 @@ class SamplerSASolver(io.ComfyNode):
    get_sampler = execute


+class SamplerSEEDS2(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerSEEDS2",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Combo.Input("solver_type", options=["phi_1", "phi_2"]),
+                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="Stochastic strength"),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="SDE noise multiplier"),
+                io.Float.Input("r", default=0.5, min=0.01, max=1.0, step=0.01, round=False, tooltip="Relative step size for the intermediate stage (c2 node)"),
+            ],
+            outputs=[io.Sampler.Output()]
+        )
+
+    @classmethod
+    def execute(cls, solver_type, eta, s_noise, r) -> io.NodeOutput:
+        sampler_name = "seeds_2"
+        sampler = comfy.samplers.ksampler(
+            sampler_name,
+            {"eta": eta, "s_noise": s_noise, "r": r, "solver_type": solver_type},
+        )
+        return io.NodeOutput(sampler)
+
+
 class Noise_EmptyNoise:
    def __init__(self):
        self.seed = 0
@ -996,6 +1021,7 @@ class CustomSamplersExtension(ComfyExtension):
            SamplerDPMAdaptative,
            SamplerER_SDE,
            SamplerSASolver,
+            SamplerSEEDS2,
            SplitSigmas,
            SplitSigmasDenoise,
            FlipSigmas,
--- a/comfy_extras/nodes_dataset.py
+++ b/comfy_extras/nodes_dataset.py
@ -727,6 +727,29 @@ class RandomCropImagesNode(ImageProcessingNode):
        return pil_to_tensor(img)


+class FlipImagesNode(ImageProcessingNode):
+    node_id = "FlipImages"
+    display_name = "Flip Images"
+    description = "Flip all images horizontally or vertically."
+    extra_inputs = [
+        io.Combo.Input(
+            "direction",
+            options=["horizontal", "vertical"],
+            default="horizontal",
+            tooltip="Flip direction.",
+        ),
+    ]
+
+    @classmethod
+    def _process(cls, image, direction):
+        img = tensor_to_pil(image)
+        if direction == "horizontal":
+            img = img.transpose(Image.FLIP_LEFT_RIGHT)
+        else:
+            img = img.transpose(Image.FLIP_TOP_BOTTOM)
+        return pil_to_tensor(img)
+
+
 class NormalizeImagesNode(ImageProcessingNode):
    node_id = "NormalizeImages"
    display_name = "Normalize Images"
@ -1125,6 +1148,99 @@ class MergeTextListsNode(TextProcessingNode):
 # ========== Training Dataset Nodes ==========


+class ResolutionBucket(io.ComfyNode):
+    """Bucket latents and conditions by resolution for efficient batch training."""
+
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ResolutionBucket",
+            display_name="Resolution Bucket",
+            category="dataset",
+            is_experimental=True,
+            is_input_list=True,
+            inputs=[
+                io.Latent.Input(
+                    "latents",
+                    tooltip="List of latent dicts to bucket by resolution.",
+                ),
+                io.Conditioning.Input(
+                    "conditioning",
+                    tooltip="List of conditioning lists (must match latents length).",
+                ),
+            ],
+            outputs=[
+                io.Latent.Output(
+                    display_name="latents",
+                    is_output_list=True,
+                    tooltip="List of batched latent dicts, one per resolution bucket.",
+                ),
+                io.Conditioning.Output(
+                    display_name="conditioning",
+                    is_output_list=True,
+                    tooltip="List of condition lists, one per resolution bucket.",
+                ),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, latents, conditioning):
+        # latents: list[{"samples": tensor}] where tensor is (B, C, H, W), typically B=1
+        # conditioning: list[list[cond]]
+
+        # Validate lengths match
+        if len(latents) != len(conditioning):
+            raise ValueError(
+                f"Number of latents ({len(latents)}) does not match number of conditions ({len(conditioning)})."
+            )
+
+        # Flatten latents and conditions to individual samples
+        flat_latents = []  # list of (C, H, W) tensors
+        flat_conditions = []  # list of condition lists
+
+        for latent_dict, cond in zip(latents, conditioning):
+            samples = latent_dict["samples"]  # (B, C, H, W)
+            batch_size = samples.shape[0]
+
+            # cond is a list of conditions with length == batch_size
+            for i in range(batch_size):
+                flat_latents.append(samples[i])  # (C, H, W)
+                flat_conditions.append(cond[i])  # single condition
+
+        # Group by resolution (H, W)
+        buckets = {}  # (H, W) -> {"latents": list, "conditions": list}
+
+        for latent, cond in zip(flat_latents, flat_conditions):
+            # latent shape is (..., H, W) (B, C, H, W) or (B, T, C, H ,W)
+            h, w = latent.shape[-2], latent.shape[-1]
+            key = (h, w)
+
+            if key not in buckets:
+                buckets[key] = {"latents": [], "conditions": []}
+
+            buckets[key]["latents"].append(latent)
+            buckets[key]["conditions"].append(cond)
+
+        # Convert buckets to output format
+        output_latents = []  # list[{"samples": tensor}] where tensor is (Bi, ..., H, W)
+        output_conditions = []  # list[list[cond]] where each inner list has Bi conditions
+
+        for (h, w), bucket_data in buckets.items():
+            # Stack latents into batch: list of (..., H, W) -> (Bi, ..., H, W)
+            stacked_latents = torch.stack(bucket_data["latents"], dim=0)
+            output_latents.append({"samples": stacked_latents})
+
+            # Conditions stay as list of condition lists
+            output_conditions.append(bucket_data["conditions"])
+
+            logging.info(
+                f"Resolution bucket ({h}x{w}): {len(bucket_data['latents'])} samples"
+            )
+
+        logging.info(f"Created {len(buckets)} resolution buckets from {len(flat_latents)} samples")
+        return io.NodeOutput(output_latents, output_conditions)
+
+
 class MakeTrainingDataset(io.ComfyNode):
    """Encode images with VAE and texts with CLIP to create a training dataset."""

@ -1373,7 +1489,7 @@ class LoadTrainingDataset(io.ComfyNode):
            shard_path = os.path.join(dataset_dir, shard_file)

            with open(shard_path, "rb") as f:
-                shard_data = torch.load(f, weights_only=True)
+                shard_data = torch.load(f)

            all_latents.extend(shard_data["latents"])
            all_conditioning.extend(shard_data["conditioning"])
@ -1403,6 +1519,7 @@ class DatasetExtension(ComfyExtension):
            ResizeImagesByLongerEdgeNode,
            CenterCropImagesNode,
            RandomCropImagesNode,
+            FlipImagesNode,
            NormalizeImagesNode,
            AdjustBrightnessNode,
            AdjustContrastNode,
@ -1425,6 +1542,7 @@ class DatasetExtension(ComfyExtension):
            MakeTrainingDataset,
            SaveTrainingDataset,
            LoadTrainingDataset,
+            ResolutionBucket,
        ]


--- a/comfy_extras/nodes_post_processing.py
+++ b/comfy_extras/nodes_post_processing.py
@ -221,6 +221,7 @@ class ImageScaleToTotalPixels(io.ComfyNode):
                io.Image.Input("image"),
                io.Combo.Input("upscale_method", options=cls.upscale_methods),
                io.Float.Input("megapixels", default=1.0, min=0.01, max=16.0, step=0.01),
+                io.Int.Input("resolution_steps", default=1, min=1, max=256),
            ],
            outputs=[
                io.Image.Output(),
@ -228,15 +229,15 @@ class ImageScaleToTotalPixels(io.ComfyNode):
        )

    @classmethod
-    def execute(cls, image, upscale_method, megapixels) -> io.NodeOutput:
+    def execute(cls, image, upscale_method, megapixels, resolution_steps) -> io.NodeOutput:
        samples = image.movedim(-1,1)
-        total = int(megapixels * 1024 * 1024)
+        total = megapixels * 1024 * 1024

        scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2]))
-        width = round(samples.shape[3] * scale_by)
-        height = round(samples.shape[2] * scale_by)
+        width = round(samples.shape[3] * scale_by / resolution_steps) * resolution_steps
+        height = round(samples.shape[2] * scale_by / resolution_steps) * resolution_steps

-        s = comfy.utils.common_upscale(samples, width, height, upscale_method, "disabled")
+        s = comfy.utils.common_upscale(samples, int(width), int(height), upscale_method, "disabled")
        s = s.movedim(1,-1)
        return io.NodeOutput(s)

--- a/comfy_extras/nodes_train.py
+++ b/comfy_extras/nodes_train.py
@ -10,6 +10,7 @@ from PIL import Image, ImageDraw, ImageFont
 from typing_extensions import override

 import comfy.samplers
+import comfy.sampler_helpers
 import comfy.sd
 import comfy.utils
 import comfy.model_management
@ -21,6 +22,68 @@ from comfy_api.latest import ComfyExtension, io, ui
 from comfy.utils import ProgressBar


+class TrainGuider(comfy_extras.nodes_custom_sampler.Guider_Basic):
+    """
+    CFGGuider with modifications for training specific logic
+    """
+    def outer_sample(
+        self,
+        noise,
+        latent_image,
+        sampler,
+        sigmas,
+        denoise_mask=None,
+        callback=None,
+        disable_pbar=False,
+        seed=None,
+        latent_shapes=None,
+    ):
+        self.inner_model, self.conds, self.loaded_models = (
+            comfy.sampler_helpers.prepare_sampling(
+                self.model_patcher,
+                noise.shape,
+                self.conds,
+                self.model_options,
+                skip_load_model=True, # skip load model as we manage it in TrainLoraNode.execute()
+            )
+        )
+        device = self.model_patcher.load_device
+
+        if denoise_mask is not None:
+            denoise_mask = comfy.sampler_helpers.prepare_mask(
+                denoise_mask, noise.shape, device
+            )
+
+        noise = noise.to(device)
+        latent_image = latent_image.to(device)
+        sigmas = sigmas.to(device)
+        comfy.samplers.cast_to_load_options(
+            self.model_options, device=device, dtype=self.model_patcher.model_dtype()
+        )
+
+        try:
+            self.model_patcher.pre_run()
+            output = self.inner_sample(
+                noise,
+                latent_image,
+                device,
+                sampler,
+                sigmas,
+                denoise_mask,
+                callback,
+                disable_pbar,
+                seed,
+                latent_shapes=latent_shapes,
+            )
+        finally:
+            self.model_patcher.cleanup()
+
+        comfy.sampler_helpers.cleanup_models(self.conds, self.loaded_models)
+        del self.inner_model
+        del self.loaded_models
+        return output
+
+
 def make_batch_extra_option_dict(d, indicies, full_size=None):
    new_dict = {}
    for k, v in d.items():
@ -65,6 +128,7 @@ class TrainSampler(comfy.samplers.Sampler):
        seed=0,
        training_dtype=torch.bfloat16,
        real_dataset=None,
+        bucket_latents=None,
    ):
        self.loss_fn = loss_fn
        self.optimizer = optimizer
@ -75,6 +139,28 @@ class TrainSampler(comfy.samplers.Sampler):
        self.seed = seed
        self.training_dtype = training_dtype
        self.real_dataset: list[torch.Tensor] | None = real_dataset
+        # Bucket mode data
+        self.bucket_latents: list[torch.Tensor] | None = (
+            bucket_latents  # list of (Bi, C, Hi, Wi)
+        )
+        # Precompute bucket offsets and weights for sampling
+        if bucket_latents is not None:
+            self._init_bucket_data(bucket_latents)
+        else:
+            self.bucket_offsets = None
+            self.bucket_weights = None
+            self.num_images = None
+
+    def _init_bucket_data(self, bucket_latents):
+        """Initialize bucket offsets and weights for sampling."""
+        self.bucket_offsets = [0]
+        bucket_sizes = []
+        for lat in bucket_latents:
+            bucket_sizes.append(lat.shape[0])
+            self.bucket_offsets.append(self.bucket_offsets[-1] + lat.shape[0])
+        self.num_images = self.bucket_offsets[-1]
+        # Weights for sampling buckets proportional to their size
+        self.bucket_weights = torch.tensor(bucket_sizes, dtype=torch.float32)

    def fwd_bwd(
        self,
@ -115,6 +201,108 @@ class TrainSampler(comfy.samplers.Sampler):
            bwd_loss.backward()
        return loss

+    def _generate_batch_sigmas(self, model_wrap, batch_size, device):
+        """Generate random sigma values for a batch."""
+        batch_sigmas = [
+            model_wrap.inner_model.model_sampling.percent_to_sigma(
+                torch.rand((1,)).item()
+            )
+            for _ in range(batch_size)
+        ]
+        return torch.tensor(batch_sigmas).to(device)
+
+    def _train_step_bucket_mode(self, model_wrap, cond, extra_args, noisegen, latent_image, pbar):
+        """Execute one training step in bucket mode."""
+        # Sample bucket (weighted by size), then sample batch from bucket
+        bucket_idx = torch.multinomial(self.bucket_weights, 1).item()
+        bucket_latent = self.bucket_latents[bucket_idx]  # (Bi, C, Hi, Wi)
+        bucket_size = bucket_latent.shape[0]
+        bucket_offset = self.bucket_offsets[bucket_idx]
+
+        # Sample indices from this bucket (use all if bucket_size < batch_size)
+        actual_batch_size = min(self.batch_size, bucket_size)
+        relative_indices = torch.randperm(bucket_size)[:actual_batch_size].tolist()
+        # Convert to absolute indices for fwd_bwd (cond is flattened, use absolute index)
+        absolute_indices = [bucket_offset + idx for idx in relative_indices]
+
+        batch_latent = bucket_latent[relative_indices].to(latent_image)  # (actual_batch_size, C, H, W)
+        batch_noise = noisegen.generate_noise({"samples": batch_latent}).to(
+            batch_latent.device
+        )
+        batch_sigmas = self._generate_batch_sigmas(model_wrap, actual_batch_size, batch_latent.device)
+
+        loss = self.fwd_bwd(
+            model_wrap,
+            batch_sigmas,
+            batch_noise,
+            batch_latent,
+            cond,  # Use flattened cond with absolute indices
+            absolute_indices,
+            extra_args,
+            self.num_images,
+            bwd=True,
+        )
+        if self.loss_callback:
+            self.loss_callback(loss.item())
+        pbar.set_postfix({"loss": f"{loss.item():.4f}", "bucket": bucket_idx})
+
+    def _train_step_standard_mode(self, model_wrap, cond, extra_args, noisegen, latent_image, dataset_size, pbar):
+        """Execute one training step in standard (non-bucket, non-multi-res) mode."""
+        indicies = torch.randperm(dataset_size)[: self.batch_size].tolist()
+        batch_latent = torch.stack([latent_image[i] for i in indicies])
+        batch_noise = noisegen.generate_noise({"samples": batch_latent}).to(
+            batch_latent.device
+        )
+        batch_sigmas = self._generate_batch_sigmas(model_wrap, min(self.batch_size, dataset_size), batch_latent.device)
+
+        loss = self.fwd_bwd(
+            model_wrap,
+            batch_sigmas,
+            batch_noise,
+            batch_latent,
+            cond,
+            indicies,
+            extra_args,
+            dataset_size,
+            bwd=True,
+        )
+        if self.loss_callback:
+            self.loss_callback(loss.item())
+        pbar.set_postfix({"loss": f"{loss.item():.4f}"})
+
+    def _train_step_multires_mode(self, model_wrap, cond, extra_args, noisegen, latent_image, dataset_size, pbar):
+        """Execute one training step in multi-resolution mode (real_dataset is set)."""
+        indicies = torch.randperm(dataset_size)[: self.batch_size].tolist()
+        total_loss = 0
+        for index in indicies:
+            single_latent = self.real_dataset[index].to(latent_image)
+            batch_noise = noisegen.generate_noise(
+                {"samples": single_latent}
+            ).to(single_latent.device)
+            batch_sigmas = (
+                model_wrap.inner_model.model_sampling.percent_to_sigma(
+                    torch.rand((1,)).item()
+                )
+            )
+            batch_sigmas = torch.tensor([batch_sigmas]).to(single_latent.device)
+            loss = self.fwd_bwd(
+                model_wrap,
+                batch_sigmas,
+                batch_noise,
+                single_latent,
+                cond,
+                [index],
+                extra_args,
+                dataset_size,
+                bwd=False,
+            )
+            total_loss += loss
+        total_loss = total_loss / self.grad_acc / len(indicies)
+        total_loss.backward()
+        if self.loss_callback:
+            self.loss_callback(total_loss.item())
+        pbar.set_postfix({"loss": f"{total_loss.item():.4f}"})
+
    def sample(
        self,
        model_wrap,
@ -142,70 +330,18 @@ class TrainSampler(comfy.samplers.Sampler):
            noisegen = comfy_extras.nodes_custom_sampler.Noise_RandomNoise(
                self.seed + i * 1000
            )
-            indicies = torch.randperm(dataset_size)[: self.batch_size].tolist()

-            if self.real_dataset is None:
-                batch_latent = torch.stack([latent_image[i] for i in indicies])
-                batch_noise = noisegen.generate_noise({"samples": batch_latent}).to(
-                    batch_latent.device
-                )
-                batch_sigmas = [
-                    model_wrap.inner_model.model_sampling.percent_to_sigma(
-                        torch.rand((1,)).item()
-                    )
-                    for _ in range(min(self.batch_size, dataset_size))
-                ]
-                batch_sigmas = torch.tensor(batch_sigmas).to(batch_latent.device)
-
-                loss = self.fwd_bwd(
-                    model_wrap,
-                    batch_sigmas,
-                    batch_noise,
-                    batch_latent,
-                    cond,
-                    indicies,
-                    extra_args,
-                    dataset_size,
-                    bwd=True,
-                )
-                if self.loss_callback:
-                    self.loss_callback(loss.item())
-                pbar.set_postfix({"loss": f"{loss.item():.4f}"})
+            if self.bucket_latents is not None:
+                self._train_step_bucket_mode(model_wrap, cond, extra_args, noisegen, latent_image, pbar)
+            elif self.real_dataset is None:
+                self._train_step_standard_mode(model_wrap, cond, extra_args, noisegen, latent_image, dataset_size, pbar)
            else:
-                total_loss = 0
-                for index in indicies:
-                    single_latent = self.real_dataset[index].to(latent_image)
-                    batch_noise = noisegen.generate_noise(
-                        {"samples": single_latent}
-                    ).to(single_latent.device)
-                    batch_sigmas = (
-                        model_wrap.inner_model.model_sampling.percent_to_sigma(
-                            torch.rand((1,)).item()
-                        )
-                    )
-                    batch_sigmas = torch.tensor([batch_sigmas]).to(single_latent.device)
-                    loss = self.fwd_bwd(
-                        model_wrap,
-                        batch_sigmas,
-                        batch_noise,
-                        single_latent,
-                        cond,
-                        [index],
-                        extra_args,
-                        dataset_size,
-                        bwd=False,
-                    )
-                    total_loss += loss
-                total_loss = total_loss / self.grad_acc / len(indicies)
-                total_loss.backward()
-                if self.loss_callback:
-                    self.loss_callback(total_loss.item())
-                pbar.set_postfix({"loss": f"{total_loss.item():.4f}"})
+                self._train_step_multires_mode(model_wrap, cond, extra_args, noisegen, latent_image, dataset_size, pbar)

            if (i + 1) % self.grad_acc == 0:
                self.optimizer.step()
                self.optimizer.zero_grad()
-                ui_pbar.update(1)
+            ui_pbar.update(1)
        torch.cuda.empty_cache()
        return torch.zeros_like(latent_image)

@ -283,6 +419,364 @@ def unpatch(m):
        del m.org_forward


+def _process_latents_bucket_mode(latents):
+    """Process latents for bucket mode training.
+
+    Args:
+        latents: list[{"samples": tensor}] where each tensor is (Bi, C, Hi, Wi)
+
+    Returns:
+        list of latent tensors
+    """
+    bucket_latents = []
+    for latent_dict in latents:
+        bucket_latents.append(latent_dict["samples"])  # (Bi, C, Hi, Wi)
+    return bucket_latents
+
+
+def _process_latents_standard_mode(latents):
+    """Process latents for standard (non-bucket) mode training.
+
+    Args:
+        latents: list of latent dicts or single latent dict
+
+    Returns:
+        Processed latents (tensor or list of tensors)
+    """
+    if len(latents) == 1:
+        return latents[0]["samples"]  # Single latent dict
+
+    latent_list = []
+    for latent in latents:
+        latent = latent["samples"]
+        bs = latent.shape[0]
+        if bs != 1:
+            for sub_latent in latent:
+                latent_list.append(sub_latent[None])
+        else:
+            latent_list.append(latent)
+    return latent_list
+
+
+def _process_conditioning(positive):
+    """Process conditioning - either single list or list of lists.
+
+    Args:
+        positive: list of conditioning
+
+    Returns:
+        Flattened conditioning list
+    """
+    if len(positive) == 1:
+        return positive[0]  # Single conditioning list
+
+    # Multiple conditioning lists - flatten
+    flat_positive = []
+    for cond in positive:
+        if isinstance(cond, list):
+            flat_positive.extend(cond)
+        else:
+            flat_positive.append(cond)
+    return flat_positive
+
+
+def _prepare_latents_and_count(latents, dtype, bucket_mode):
+    """Convert latents to dtype and compute image counts.
+
+    Args:
+        latents: Latents (tensor, list of tensors, or bucket list)
+        dtype: Target dtype
+        bucket_mode: Whether bucket mode is enabled
+
+    Returns:
+        tuple: (processed_latents, num_images, multi_res)
+    """
+    if bucket_mode:
+        # In bucket mode, latents is list of tensors (Bi, C, Hi, Wi)
+        latents = [t.to(dtype) for t in latents]
+        num_buckets = len(latents)
+        num_images = sum(t.shape[0] for t in latents)
+        multi_res = False  # Not using multi_res path in bucket mode
+
+        logging.info(f"Bucket mode: {num_buckets} buckets, {num_images} total samples")
+        for i, lat in enumerate(latents):
+            logging.info(f"  Bucket {i}: shape {lat.shape}")
+        return latents, num_images, multi_res
+
+    # Non-bucket mode
+    if isinstance(latents, list):
+        all_shapes = set()
+        latents = [t.to(dtype) for t in latents]
+        for latent in latents:
+            all_shapes.add(latent.shape)
+        logging.info(f"Latent shapes: {all_shapes}")
+        if len(all_shapes) > 1:
+            multi_res = True
+        else:
+            multi_res = False
+            latents = torch.cat(latents, dim=0)
+        num_images = len(latents)
+    elif isinstance(latents, torch.Tensor):
+        latents = latents.to(dtype)
+        num_images = latents.shape[0]
+        multi_res = False
+    else:
+        logging.error(f"Invalid latents type: {type(latents)}")
+        num_images = 0
+        multi_res = False
+
+    return latents, num_images, multi_res
+
+
+def _validate_and_expand_conditioning(positive, num_images, bucket_mode):
+    """Validate conditioning count matches image count, expand if needed.
+
+    Args:
+        positive: Conditioning list
+        num_images: Number of images
+        bucket_mode: Whether bucket mode is enabled
+
+    Returns:
+        Validated/expanded conditioning list
+
+    Raises:
+        ValueError: If conditioning count doesn't match image count
+    """
+    if bucket_mode:
+        return positive  # Skip validation in bucket mode
+
+    logging.info(f"Total Images: {num_images}, Total Captions: {len(positive)}")
+    if len(positive) == 1 and num_images > 1:
+        return positive * num_images
+    elif len(positive) != num_images:
+        raise ValueError(
+            f"Number of positive conditions ({len(positive)}) does not match number of images ({num_images})."
+        )
+    return positive
+
+
+def _load_existing_lora(existing_lora):
+    """Load existing LoRA weights if provided.
+
+    Args:
+        existing_lora: LoRA filename or "[None]"
+
+    Returns:
+        tuple: (existing_weights dict, existing_steps int)
+    """
+    if existing_lora == "[None]":
+        return {}, 0
+
+    lora_path = folder_paths.get_full_path_or_raise("loras", existing_lora)
+    # Extract steps from filename like "trained_lora_10_steps_20250225_203716"
+    existing_steps = int(existing_lora.split("_steps_")[0].split("_")[-1])
+    existing_weights = {}
+    if lora_path:
+        existing_weights = comfy.utils.load_torch_file(lora_path)
+    return existing_weights, existing_steps
+
+
+def _create_weight_adapter(
+    module, module_name, existing_weights, algorithm, lora_dtype, rank
+):
+    """Create a weight adapter for a module with weight.
+
+    Args:
+        module: The module to create adapter for
+        module_name: Name of the module
+        existing_weights: Dict of existing LoRA weights
+        algorithm: Algorithm name for new adapters
+        lora_dtype: dtype for LoRA weights
+        rank: Rank for new LoRA adapters
+
+    Returns:
+        tuple: (train_adapter, lora_params dict)
+    """
+    key = f"{module_name}.weight"
+    shape = module.weight.shape
+    lora_params = {}
+
+    if len(shape) >= 2:
+        alpha = float(existing_weights.get(f"{key}.alpha", 1.0))
+        dora_scale = existing_weights.get(f"{key}.dora_scale", None)
+
+        # Try to load existing adapter
+        existing_adapter = None
+        for adapter_cls in adapters:
+            existing_adapter = adapter_cls.load(
+                module_name, existing_weights, alpha, dora_scale
+            )
+            if existing_adapter is not None:
+                break
+
+        if existing_adapter is None:
+            adapter_cls = adapter_maps[algorithm]
+
+        if existing_adapter is not None:
+            train_adapter = existing_adapter.to_train().to(lora_dtype)
+        else:
+            # Use LoRA with alpha=1.0 by default
+            train_adapter = adapter_cls.create_train(
+                module.weight, rank=rank, alpha=1.0
+            ).to(lora_dtype)
+
+        for name, parameter in train_adapter.named_parameters():
+            lora_params[f"{module_name}.{name}"] = parameter
+
+        return train_adapter.train().requires_grad_(True), lora_params
+    else:
+        # 1D weight - use BiasDiff
+        diff = torch.nn.Parameter(
+            torch.zeros(module.weight.shape, dtype=lora_dtype, requires_grad=True)
+        )
+        diff_module = BiasDiff(diff).train().requires_grad_(True)
+        lora_params[f"{module_name}.diff"] = diff
+        return diff_module, lora_params
+
+
+def _create_bias_adapter(module, module_name, lora_dtype):
+    """Create a bias adapter for a module with bias.
+
+    Args:
+        module: The module with bias
+        module_name: Name of the module
+        lora_dtype: dtype for LoRA weights
+
+    Returns:
+        tuple: (bias_module, lora_params dict)
+    """
+    bias = torch.nn.Parameter(
+        torch.zeros(module.bias.shape, dtype=lora_dtype, requires_grad=True)
+    )
+    bias_module = BiasDiff(bias).train().requires_grad_(True)
+    lora_params = {f"{module_name}.diff_b": bias}
+    return bias_module, lora_params
+
+
+def _setup_lora_adapters(mp, existing_weights, algorithm, lora_dtype, rank):
+    """Setup all LoRA adapters on the model.
+
+    Args:
+        mp: Model patcher
+        existing_weights: Dict of existing LoRA weights
+        algorithm: Algorithm name for new adapters
+        lora_dtype: dtype for LoRA weights
+        rank: Rank for new LoRA adapters
+
+    Returns:
+        tuple: (lora_sd dict, all_weight_adapters list)
+    """
+    lora_sd = {}
+    all_weight_adapters = []
+
+    for n, m in mp.model.named_modules():
+        if hasattr(m, "weight_function"):
+            if m.weight is not None:
+                adapter, params = _create_weight_adapter(
+                    m, n, existing_weights, algorithm, lora_dtype, rank
+                )
+                lora_sd.update(params)
+                key = f"{n}.weight"
+                mp.add_weight_wrapper(key, adapter)
+                all_weight_adapters.append(adapter)
+
+            if hasattr(m, "bias") and m.bias is not None:
+                bias_adapter, bias_params = _create_bias_adapter(m, n, lora_dtype)
+                lora_sd.update(bias_params)
+                key = f"{n}.bias"
+                mp.add_weight_wrapper(key, bias_adapter)
+                all_weight_adapters.append(bias_adapter)
+
+    return lora_sd, all_weight_adapters
+
+
+def _create_optimizer(optimizer_name, parameters, learning_rate):
+    """Create optimizer based on name.
+
+    Args:
+        optimizer_name: Name of optimizer ("Adam", "AdamW", "SGD", "RMSprop")
+        parameters: Parameters to optimize
+        learning_rate: Learning rate
+
+    Returns:
+        Optimizer instance
+    """
+    if optimizer_name == "Adam":
+        return torch.optim.Adam(parameters, lr=learning_rate)
+    elif optimizer_name == "AdamW":
+        return torch.optim.AdamW(parameters, lr=learning_rate)
+    elif optimizer_name == "SGD":
+        return torch.optim.SGD(parameters, lr=learning_rate)
+    elif optimizer_name == "RMSprop":
+        return torch.optim.RMSprop(parameters, lr=learning_rate)
+
+
+def _create_loss_function(loss_function_name):
+    """Create loss function based on name.
+
+    Args:
+        loss_function_name: Name of loss function ("MSE", "L1", "Huber", "SmoothL1")
+
+    Returns:
+        Loss function instance
+    """
+    if loss_function_name == "MSE":
+        return torch.nn.MSELoss()
+    elif loss_function_name == "L1":
+        return torch.nn.L1Loss()
+    elif loss_function_name == "Huber":
+        return torch.nn.HuberLoss()
+    elif loss_function_name == "SmoothL1":
+        return torch.nn.SmoothL1Loss()
+
+
+def _run_training_loop(
+    guider, train_sampler, latents, num_images, seed, bucket_mode, multi_res
+):
+    """Execute the training loop.
+
+    Args:
+        guider: The guider object
+        train_sampler: The training sampler
+        latents: Latent tensors
+        num_images: Number of images
+        seed: Random seed
+        bucket_mode: Whether bucket mode is enabled
+        multi_res: Whether multi-resolution mode is enabled
+    """
+    sigmas = torch.tensor(range(num_images))
+    noise = comfy_extras.nodes_custom_sampler.Noise_RandomNoise(seed)
+
+    if bucket_mode:
+        # Use first bucket's first latent as dummy for guider
+        dummy_latent = latents[0][:1].repeat(num_images, 1, 1, 1)
+        guider.sample(
+            noise.generate_noise({"samples": dummy_latent}),
+            dummy_latent,
+            train_sampler,
+            sigmas,
+            seed=noise.seed,
+        )
+    elif multi_res:
+        # use first latent as dummy latent if multi_res
+        latents = latents[0].repeat(num_images, 1, 1, 1)
+        guider.sample(
+            noise.generate_noise({"samples": latents}),
+            latents,
+            train_sampler,
+            sigmas,
+            seed=noise.seed,
+        )
+    else:
+        guider.sample(
+            noise.generate_noise({"samples": latents}),
+            latents,
+            train_sampler,
+            sigmas,
+            seed=noise.seed,
+        )
+
+
 class TrainLoraNode(io.ComfyNode):
    @classmethod
    def define_schema(cls):
@ -385,6 +879,11 @@ class TrainLoraNode(io.ComfyNode):
                    default="[None]",
                    tooltip="The existing LoRA to append to. Set to None for new LoRA.",
                ),
+                io.Boolean.Input(
+                    "bucket_mode",
+                    default=False,
+                    tooltip="Enable resolution bucket mode. When enabled, expects pre-bucketed latents from ResolutionBucket node.",
+                ),
            ],
            outputs=[
                io.Model.Output(
@ -419,6 +918,7 @@ class TrainLoraNode(io.ComfyNode):
        algorithm,
        gradient_checkpointing,
        existing_lora,
+        bucket_mode,
    ):
        # Extract scalars from lists (due to is_input_list=True)
        model = model[0]
@ -427,215 +927,125 @@ class TrainLoraNode(io.ComfyNode):
        grad_accumulation_steps = grad_accumulation_steps[0]
        learning_rate = learning_rate[0]
        rank = rank[0]
-        optimizer = optimizer[0]
-        loss_function = loss_function[0]
+        optimizer_name = optimizer[0]
+        loss_function_name = loss_function[0]
        seed = seed[0]
        training_dtype = training_dtype[0]
        lora_dtype = lora_dtype[0]
        algorithm = algorithm[0]
        gradient_checkpointing = gradient_checkpointing[0]
        existing_lora = existing_lora[0]
+        bucket_mode = bucket_mode[0]

-        # Handle latents - either single dict or list of dicts
-        if len(latents) == 1:
-            latents = latents[0]["samples"]  # Single latent dict
+        # Process latents based on mode
+        if bucket_mode:
+            latents = _process_latents_bucket_mode(latents)
        else:
-            latent_list = []
-            for latent in latents:
-                latent = latent["samples"]
-                bs = latent.shape[0]
-                if bs != 1:
-                    for sub_latent in latent:
-                        latent_list.append(sub_latent[None])
-                else:
-                    latent_list.append(latent)
-            latents = latent_list
+            latents = _process_latents_standard_mode(latents)

-        # Handle conditioning - either single list or list of lists
-        if len(positive) == 1:
-            positive = positive[0]  # Single conditioning list
-        else:
-            # Multiple conditioning lists - flatten
-            flat_positive = []
-            for cond in positive:
-                if isinstance(cond, list):
-                    flat_positive.extend(cond)
-                else:
-                    flat_positive.append(cond)
-            positive = flat_positive
+        # Process conditioning
+        positive = _process_conditioning(positive)

+        # Setup model and dtype
        mp = model.clone()
        dtype = node_helpers.string_to_torch_dtype(training_dtype)
        lora_dtype = node_helpers.string_to_torch_dtype(lora_dtype)
        mp.set_model_compute_dtype(dtype)

-        # latents here can be list of different size latent or one large batch
-        if isinstance(latents, list):
-            all_shapes = set()
-            latents = [t.to(dtype) for t in latents]
-            for latent in latents:
-                all_shapes.add(latent.shape)
-            logging.info(f"Latent shapes: {all_shapes}")
-            if len(all_shapes) > 1:
-                multi_res = True
-            else:
-                multi_res = False
-                latents = torch.cat(latents, dim=0)
-            num_images = len(latents)
-        elif isinstance(latents, torch.Tensor):
-            latents = latents.to(dtype)
-            num_images = latents.shape[0]
-        else:
-            logging.error(f"Invalid latents type: {type(latents)}")
+        # Prepare latents and compute counts
+        latents, num_images, multi_res = _prepare_latents_and_count(
+            latents, dtype, bucket_mode
+        )

-        logging.info(f"Total Images: {num_images}, Total Captions: {len(positive)}")
-        if len(positive) == 1 and num_images > 1:
-            positive = positive * num_images
-        elif len(positive) != num_images:
-            raise ValueError(
-                f"Number of positive conditions ({len(positive)}) does not match number of images ({num_images})."
-            )
+        # Validate and expand conditioning
+        positive = _validate_and_expand_conditioning(positive, num_images, bucket_mode)

        with torch.inference_mode(False):
-            lora_sd = {}
-            generator = torch.Generator()
-            generator.manual_seed(seed)
+            # Setup models for training
+            mp.model.requires_grad_(False)

            # Load existing LoRA weights if provided
-            existing_weights = {}
-            existing_steps = 0
-            if existing_lora != "[None]":
-                lora_path = folder_paths.get_full_path_or_raise("loras", existing_lora)
-                # Extract steps from filename like "trained_lora_10_steps_20250225_203716"
-                existing_steps = int(existing_lora.split("_steps_")[0].split("_")[-1])
-                if lora_path:
-                    existing_weights = comfy.utils.load_torch_file(lora_path)
+            existing_weights, existing_steps = _load_existing_lora(existing_lora)

-            all_weight_adapters = []
-            for n, m in mp.model.named_modules():
-                if hasattr(m, "weight_function"):
-                    if m.weight is not None:
-                        key = "{}.weight".format(n)
-                        shape = m.weight.shape
-                        if len(shape) >= 2:
-                            alpha = float(existing_weights.get(f"{key}.alpha", 1.0))
-                            dora_scale = existing_weights.get(f"{key}.dora_scale", None)
-                            for adapter_cls in adapters:
-                                existing_adapter = adapter_cls.load(
-                                    n, existing_weights, alpha, dora_scale
-                                )
-                                if existing_adapter is not None:
-                                    break
-                            else:
-                                existing_adapter = None
-                                adapter_cls = adapter_maps[algorithm]
+            # Setup LoRA adapters
+            lora_sd, all_weight_adapters = _setup_lora_adapters(
+                mp, existing_weights, algorithm, lora_dtype, rank
+            )

-                            if existing_adapter is not None:
-                                train_adapter = existing_adapter.to_train().to(
-                                    lora_dtype
-                                )
-                            else:
-                                # Use LoRA with alpha=1.0 by default
-                                train_adapter = adapter_cls.create_train(
-                                    m.weight, rank=rank, alpha=1.0
-                                ).to(lora_dtype)
-                            for name, parameter in train_adapter.named_parameters():
-                                lora_sd[f"{n}.{name}"] = parameter
+            # Create optimizer and loss function
+            optimizer = _create_optimizer(
+                optimizer_name, lora_sd.values(), learning_rate
+            )
+            criterion = _create_loss_function(loss_function_name)

-                            mp.add_weight_wrapper(key, train_adapter)
-                            all_weight_adapters.append(train_adapter)
-                        else:
-                            diff = torch.nn.Parameter(
-                                torch.zeros(
-                                    m.weight.shape, dtype=lora_dtype, requires_grad=True
-                                )
-                            )
-                            diff_module = BiasDiff(diff)
-                            mp.add_weight_wrapper(key, BiasDiff(diff))
-                            all_weight_adapters.append(diff_module)
-                            lora_sd["{}.diff".format(n)] = diff
-                    if hasattr(m, "bias") and m.bias is not None:
-                        key = "{}.bias".format(n)
-                        bias = torch.nn.Parameter(
-                            torch.zeros(
-                                m.bias.shape, dtype=lora_dtype, requires_grad=True
-                            )
-                        )
-                        bias_module = BiasDiff(bias)
-                        lora_sd["{}.diff_b".format(n)] = bias
-                        mp.add_weight_wrapper(key, BiasDiff(bias))
-                        all_weight_adapters.append(bias_module)
-
-            if optimizer == "Adam":
-                optimizer = torch.optim.Adam(lora_sd.values(), lr=learning_rate)
-            elif optimizer == "AdamW":
-                optimizer = torch.optim.AdamW(lora_sd.values(), lr=learning_rate)
-            elif optimizer == "SGD":
-                optimizer = torch.optim.SGD(lora_sd.values(), lr=learning_rate)
-            elif optimizer == "RMSprop":
-                optimizer = torch.optim.RMSprop(lora_sd.values(), lr=learning_rate)
-
-            # Setup loss function based on selection
-            if loss_function == "MSE":
-                criterion = torch.nn.MSELoss()
-            elif loss_function == "L1":
-                criterion = torch.nn.L1Loss()
-            elif loss_function == "Huber":
-                criterion = torch.nn.HuberLoss()
-            elif loss_function == "SmoothL1":
-                criterion = torch.nn.SmoothL1Loss()
-
-            # setup models
+            # Setup gradient checkpointing
            if gradient_checkpointing:
                for m in find_all_highest_child_module_with_forward(
                    mp.model.diffusion_model
                ):
                    patch(m)
-            mp.model.requires_grad_(False)
+
+            torch.cuda.empty_cache()
+            # With force_full_load=False we should be able to have offloading
+            # But for offloading in training we need custom AutoGrad hooks for fwd/bwd
            comfy.model_management.load_models_gpu(
                [mp], memory_required=1e20, force_full_load=True
            )
+            torch.cuda.empty_cache()

-            # Setup sampler and guider like in test script
+            # Setup loss tracking
            loss_map = {"loss": []}

            def loss_callback(loss):
                loss_map["loss"].append(loss)

-            train_sampler = TrainSampler(
-                criterion,
-                optimizer,
-                loss_callback=loss_callback,
-                batch_size=batch_size,
-                grad_acc=grad_accumulation_steps,
-                total_steps=steps * grad_accumulation_steps,
-                seed=seed,
-                training_dtype=dtype,
-                real_dataset=latents if multi_res else None,
-            )
-            guider = comfy_extras.nodes_custom_sampler.Guider_Basic(mp)
-            guider.set_conds(positive)  # Set conditioning from input
+            # Create sampler
+            if bucket_mode:
+                train_sampler = TrainSampler(
+                    criterion,
+                    optimizer,
+                    loss_callback=loss_callback,
+                    batch_size=batch_size,
+                    grad_acc=grad_accumulation_steps,
+                    total_steps=steps * grad_accumulation_steps,
+                    seed=seed,
+                    training_dtype=dtype,
+                    bucket_latents=latents,
+                )
+            else:
+                train_sampler = TrainSampler(
+                    criterion,
+                    optimizer,
+                    loss_callback=loss_callback,
+                    batch_size=batch_size,
+                    grad_acc=grad_accumulation_steps,
+                    total_steps=steps * grad_accumulation_steps,
+                    seed=seed,
+                    training_dtype=dtype,
+                    real_dataset=latents if multi_res else None,
+                )

-            # Training loop
+            # Setup guider
+            guider = TrainGuider(mp)
+            guider.set_conds(positive)
+
+            # Run training loop
            try:
-                # Generate dummy sigmas and noise
-                sigmas = torch.tensor(range(num_images))
-                noise = comfy_extras.nodes_custom_sampler.Noise_RandomNoise(seed)
-                if multi_res:
-                    # use first latent as dummy latent if multi_res
-                    latents = latents[0].repeat((num_images,) + ((1,) * (latents[0].ndim - 1)))
-                guider.sample(
-                    noise.generate_noise({"samples": latents}),
-                    latents,
+                _run_training_loop(
+                    guider,
                    train_sampler,
-                    sigmas,
-                    seed=noise.seed,
+                    latents,
+                    num_images,
+                    seed,
+                    bucket_mode,
+                    multi_res,
                )
            finally:
                for m in mp.model.modules():
                    unpatch(m)
            del train_sampler, optimizer

+            # Finalize adapters
            for adapter in all_weight_adapters:
                adapter.requires_grad_(False)

@ -645,7 +1055,7 @@ class TrainLoraNode(io.ComfyNode):
            return io.NodeOutput(mp, lora_sd, loss_map, steps + existing_steps)


-class LoraModelLoader(io.ComfyNode):
+class LoraModelLoader(io.ComfyNode):#
    @classmethod
    def define_schema(cls):
        return io.Schema(
Author	SHA1	Message	Date
Kohaku-Blueleaf	4b7b933432	Merge `a6ae08b7ee` into `6592bffc60`	2025-12-14 13:06:52 +08:00
chaObserv	6592bffc60	seeds_2: add phi_2 variant and sampler node (#11309 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details * Add phi_2 solver type to seeds_2 * Add sampler node of seeds_2	2025-12-14 00:03:29 -05:00
Kohaku-Blueleaf	a6ae08b7ee	modify existing node for needed feature instead of new node	2025-12-13 11:22:25 +08:00
Kohaku-Blueleaf	d330bb2a37	fix wrong ui pbar update	2025-12-09 23:06:53 +08:00
Kohaku-Blueleaf	305602c668	ensure the model train properly in both grad ckpt or not	2025-12-09 22:52:32 +08:00
Kohaku-Blueleaf	37139daa98	Merge branch 'master' into resolution-bucket	2025-12-05 17:26:15 +08:00
Kohaku-Blueleaf	4004af3290	Custom guider for correct offloading behavior	2025-12-05 17:24:55 +08:00
Kohaku-Blueleaf	bf573e94a2	Refactoring with better layout for maintainability	2025-12-01 23:53:20 +08:00
Kohaku-Blueleaf	7a93c55a9f	Add resolution bucketing	2025-12-01 23:31:26 +08:00