diff --git a/README.md b/README.md
index 55d745a10..ba8892b17 100644
--- a/README.md
+++ b/README.md
@@ -86,6 +86,7 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
 - Smart memory management: can automatically run models on GPUs with as low as 1GB vram.
 - Works even if you don't have a GPU with: ```--cpu``` (slow)
 - Can load ckpt, safetensors and diffusers models/checkpoints. Standalone VAEs and CLIP models.
+- Safe loading of ckpt, pt, pth, etc.. files.
 - Embeddings/Textual inversion
 - [Loras (regular, locon and loha)](https://comfyanonymous.github.io/ComfyUI_examples/lora/)
 - [Hypernetworks](https://comfyanonymous.github.io/ComfyUI_examples/hypernetworks/)
@@ -101,7 +102,6 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
 - [Model Merging](https://comfyanonymous.github.io/ComfyUI_examples/model_merging/)
 - [LCM models and Loras](https://comfyanonymous.github.io/ComfyUI_examples/lcm/)
 - Latent previews with [TAESD](#how-to-show-high-quality-previews)
-- Starts up very fast.
 - Works fully offline: core will never download anything unless you want to.
 - Optional API nodes to use paid models from external providers through the online [Comfy API](https://docs.comfy.org/tutorials/api-nodes/overview).
 - [Config file](extra_model_paths.yaml.example) to set the search paths for models.
diff --git a/comfy/gligen.py b/comfy/gligen.py
index 161d8a5e5..1d7b6c2f4 100644
--- a/comfy/gligen.py
+++ b/comfy/gligen.py
@@ -1,55 +1,10 @@
 import math
 import torch
 from torch import nn
-from .ldm.modules.attention import CrossAttention
-from inspect import isfunction
+from .ldm.modules.attention import CrossAttention, FeedForward
 import comfy.ops
 ops = comfy.ops.manual_cast
 
-def exists(val):
-    return val is not None
-
-
-def uniq(arr):
-    return{el: True for el in arr}.keys()
-
-
-def default(val, d):
-    if exists(val):
-        return val
-    return d() if isfunction(d) else d
-
-
-# feedforward
-class GEGLU(nn.Module):
-    def __init__(self, dim_in, dim_out):
-        super().__init__()
-        self.proj = ops.Linear(dim_in, dim_out * 2)
-
-    def forward(self, x):
-        x, gate = self.proj(x).chunk(2, dim=-1)
-        return x * torch.nn.functional.gelu(gate)
-
-
-class FeedForward(nn.Module):
-    def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.):
-        super().__init__()
-        inner_dim = int(dim * mult)
-        dim_out = default(dim_out, dim)
-        project_in = nn.Sequential(
-            ops.Linear(dim, inner_dim),
-            nn.GELU()
-        ) if not glu else GEGLU(dim, inner_dim)
-
-        self.net = nn.Sequential(
-            project_in,
-            nn.Dropout(dropout),
-            ops.Linear(inner_dim, dim_out)
-        )
-
-    def forward(self, x):
-        return self.net(x)
-
 
 class GatedCrossAttentionDense(nn.Module):
     def __init__(self, query_dim, context_dim, n_heads, d_head):
diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py
index e231d6a3d..34218337a 100644
--- a/comfy/k_diffusion/sampling.py
+++ b/comfy/k_diffusion/sampling.py
@@ -412,9 +412,13 @@ def sample_lms(model, x, sigmas, extra_args=None, callback=None, disable=None, o
             ds.pop(0)
         if callback is not None:
             callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
-        cur_order = min(i + 1, order)
-        coeffs = [linear_multistep_coeff(cur_order, sigmas_cpu, i, j) for j in range(cur_order)]
-        x = x + sum(coeff * d for coeff, d in zip(coeffs, reversed(ds)))
+        if sigmas[i + 1] == 0:
+            # Denoising step
+            x = denoised
+        else:
+            cur_order = min(i + 1, order)
+            coeffs = [linear_multistep_coeff(cur_order, sigmas_cpu, i, j) for j in range(cur_order)]
+            x = x + sum(coeff * d for coeff, d in zip(coeffs, reversed(ds)))
     return x
 
 
@@ -1067,7 +1071,9 @@ def sample_ipndm(model, x, sigmas, extra_args=None, callback=None, disable=None,
         d_cur = (x_cur - denoised) / t_cur
 
         order = min(max_order, i+1)
-        if order == 1:      # First Euler step.
+        if t_next == 0:     # Denoising step
+            x_next = denoised
+        elif order == 1:    # First Euler step.
             x_next = x_cur + (t_next - t_cur) * d_cur
         elif order == 2:    # Use one history point.
             x_next = x_cur + (t_next - t_cur) * (3 * d_cur - buffer_model[-1]) / 2
@@ -1085,6 +1091,7 @@ def sample_ipndm(model, x, sigmas, extra_args=None, callback=None, disable=None,
 
     return x_next
 
+
 #From https://github.com/zju-pi/diff-sampler/blob/main/diff-solvers-main/solvers.py
 #under Apache 2 license
 def sample_ipndm_v(model, x, sigmas, extra_args=None, callback=None, disable=None, max_order=4):
@@ -1108,7 +1115,9 @@ def sample_ipndm_v(model, x, sigmas, extra_args=None, callback=None, disable=Non
         d_cur = (x_cur - denoised) / t_cur
 
         order = min(max_order, i+1)
-        if order == 1:      # First Euler step.
+        if t_next == 0:     # Denoising step
+            x_next = denoised
+        elif order == 1:    # First Euler step.
             x_next = x_cur + (t_next - t_cur) * d_cur
         elif order == 2:    # Use one history point.
             h_n = (t_next - t_cur)
@@ -1148,6 +1157,7 @@ def sample_ipndm_v(model, x, sigmas, extra_args=None, callback=None, disable=Non
 
     return x_next
 
+
 #From https://github.com/zju-pi/diff-sampler/blob/main/diff-solvers-main/solvers.py
 #under Apache 2 license
 @torch.no_grad()
@@ -1198,6 +1208,7 @@ def sample_deis(model, x, sigmas, extra_args=None, callback=None, disable=None,
 
     return x_next
 
+
 @torch.no_grad()
 def sample_euler_cfg_pp(model, x, sigmas, extra_args=None, callback=None, disable=None):
     extra_args = {} if extra_args is None else extra_args
@@ -1404,6 +1415,7 @@ def sample_res_multistep_ancestral(model, x, sigmas, extra_args=None, callback=N
 def sample_res_multistep_ancestral_cfg_pp(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
     return res_multistep(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, s_noise=s_noise, noise_sampler=noise_sampler, eta=eta, cfg_pp=True)
 
+
 @torch.no_grad()
 def sample_gradient_estimation(model, x, sigmas, extra_args=None, callback=None, disable=None, ge_gamma=2., cfg_pp=False):
     """Gradient-estimation sampler. Paper: https://openreview.net/pdf?id=o2ND9v0CeK"""
@@ -1430,19 +1442,19 @@ def sample_gradient_estimation(model, x, sigmas, extra_args=None, callback=None,
         if callback is not None:
             callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
         dt = sigmas[i + 1] - sigmas[i]
-        if i == 0:
+        if sigmas[i + 1] == 0:
+            # Denoising step
+            x = denoised
+        else:
             # Euler method
             if cfg_pp:
                 x = denoised + d * sigmas[i + 1]
             else:
                 x = x + d * dt
-        else:
-            # Gradient estimation
-            if cfg_pp:
+
+            if i >= 1:
+                # Gradient estimation
                 d_bar = (ge_gamma - 1) * (d - old_d)
-                x = denoised + d * sigmas[i + 1] + d_bar * dt
-            else:
-                d_bar = ge_gamma * d + (1 - ge_gamma) * old_d
                 x = x + d_bar * dt
         old_d = d
     return x
diff --git a/comfy/utils.py b/comfy/utils.py
index 1f8d71292..47981d8f6 100644
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -77,6 +77,7 @@ def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
         if safe_load or ALWAYS_SAFE_LOAD:
             pl_sd = torch.load(ckpt, map_location=device, weights_only=True, **torch_args)
         else:
+            logging.warning("WARNING: loading {} unsafely, upgrade your pytorch to 2.4 or newer to load this file safely.".format(ckpt))
             pl_sd = torch.load(ckpt, map_location=device, pickle_module=comfy.checkpoint_pickle)
         if "state_dict" in pl_sd:
             sd = pl_sd["state_dict"]
diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py
index 49af1eae4..8cd647846 100644
--- a/comfy_extras/nodes_audio.py
+++ b/comfy_extras/nodes_audio.py
@@ -133,14 +133,6 @@ def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=Non
             if sample_rate != audio["sample_rate"]:
                 waveform = torchaudio.functional.resample(waveform, audio["sample_rate"], sample_rate)
 
-        # Create in-memory WAV buffer
-        wav_buffer = io.BytesIO()
-        torchaudio.save(wav_buffer, waveform, sample_rate, format="WAV")
-        wav_buffer.seek(0)  # Rewind for reading
-
-        # Use PyAV to convert and add metadata
-        input_container = av.open(wav_buffer)
-
         # Create output with specified format
         output_buffer = io.BytesIO()
         output_container = av.open(output_buffer, mode='w', format=format)
@@ -150,7 +142,6 @@ def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=Non
             output_container.metadata[key] = value
 
         # Set up the output stream with appropriate properties
-        input_container.streams.audio[0]
         if format == "opus":
             out_stream = output_container.add_stream("libopus", rate=sample_rate)
             if quality == "64k":
@@ -175,18 +166,16 @@ def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=Non
         else: #format == "flac":
             out_stream = output_container.add_stream("flac", rate=sample_rate)
 
-
-        # Copy frames from input to output
-        for frame in input_container.decode(audio=0):
-            frame.pts = None  # Let PyAV handle timestamps
-            output_container.mux(out_stream.encode(frame))
+        frame = av.AudioFrame.from_ndarray(waveform.movedim(0, 1).reshape(1, -1).float().numpy(), format='flt', layout='mono' if waveform.shape[0] == 1 else 'stereo')
+        frame.sample_rate = sample_rate
+        frame.pts = 0
+        output_container.mux(out_stream.encode(frame))
 
         # Flush encoder
         output_container.mux(out_stream.encode(None))
 
         # Close containers
         output_container.close()
-        input_container.close()
 
         # Write the output to file
         output_buffer.seek(0)
diff --git a/comfy_extras/nodes_images.py b/comfy_extras/nodes_images.py
index ed54ccc57..fba80e2ae 100644
--- a/comfy_extras/nodes_images.py
+++ b/comfy_extras/nodes_images.py
@@ -583,6 +583,49 @@ class GetImageSize:
 
         return width, height, batch_size
 
+class ImageRotate:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "image": (IO.IMAGE,),
+                              "rotation": (["none", "90 degrees", "180 degrees", "270 degrees"],),
+                              }}
+    RETURN_TYPES = (IO.IMAGE,)
+    FUNCTION = "rotate"
+
+    CATEGORY = "image/transform"
+
+    def rotate(self, image, rotation):
+        rotate_by = 0
+        if rotation.startswith("90"):
+            rotate_by = 1
+        elif rotation.startswith("180"):
+            rotate_by = 2
+        elif rotation.startswith("270"):
+            rotate_by = 3
+
+        image = torch.rot90(image, k=rotate_by, dims=[2, 1])
+        return (image,)
+
+class ImageFlip:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "image": (IO.IMAGE,),
+                              "flip_method": (["x-axis: vertically", "y-axis: horizontally"],),
+                              }}
+    RETURN_TYPES = (IO.IMAGE,)
+    FUNCTION = "flip"
+
+    CATEGORY = "image/transform"
+
+    def flip(self, image, flip_method):
+        if flip_method.startswith("x"):
+            image = torch.flip(image, dims=[1])
+        elif flip_method.startswith("y"):
+            image = torch.flip(image, dims=[2])
+
+        return (image,)
+
+
 NODE_CLASS_MAPPINGS = {
     "ImageCrop": ImageCrop,
     "RepeatImageBatch": RepeatImageBatch,
@@ -594,4 +637,6 @@ NODE_CLASS_MAPPINGS = {
     "ImageStitch": ImageStitch,
     "ResizeAndPadImage": ResizeAndPadImage,
     "GetImageSize": GetImageSize,
+    "ImageRotate": ImageRotate,
+    "ImageFlip": ImageFlip,
 }
diff --git a/requirements.txt b/requirements.txt
index 7051bbfaf..afa790b4b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 comfyui-frontend-package==1.23.4
-comfyui-workflow-templates==0.1.32
-comfyui-embedded-docs==0.2.3
+comfyui-workflow-templates==0.1.33
+comfyui-embedded-docs==0.2.4
 comfyui_manager
 torch
 torchsde