diff --git a/comfy/model_management.py b/comfy/model_management.py
index 72348258b..b6291f340 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1724,11 +1724,9 @@ def soft_empty_cache(force=False):
     elif is_mlu():
         torch.mlu.empty_cache()
     elif torch.cuda.is_available():
-        if comfy.memory_management.aimdo_allocator is None:
-            #Pytorch 2.7 and earlier crashes if you try and empty_cache when mempools exist
-            torch.cuda.synchronize()
-            torch.cuda.empty_cache()
-            torch.cuda.ipc_collect()
+        torch.cuda.synchronize()
+        torch.cuda.empty_cache()
+        torch.cuda.ipc_collect()
 
 def unload_all_models():
     free_memory(1e30, get_torch_device())
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index cdf289395..d888dbcfb 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -1400,7 +1400,7 @@ class ModelPatcher:
                 continue
             key = "diffusion_model." + k
             unet_state_dict[k] = LazyCastingParam(self, key, comfy.utils.get_attr(self.model, key))
-        return self.model.state_dict_for_saving(unet_state_dict)
+        return self.model.state_dict_for_saving(unet_state_dict, clip_state_dict=clip_state_dict, vae_state_dict=vae_state_dict, clip_vision_state_dict=clip_vision_state_dict)
 
     def __del__(self):
         self.unpin_all_weights()
diff --git a/comfy/ops.py b/comfy/ops.py
index 53c5e4dc3..0f4eca7c7 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -54,6 +54,8 @@ try:
             SDPA_BACKEND_PRIORITY.insert(0, SDPBackend.CUDNN_ATTENTION)
 
             def scaled_dot_product_attention(q, k, v, *args, **kwargs):
+                if q.nelement() < 1024 * 128:  # arbitrary number, for small inputs cudnn attention seems slower
+                    return torch.nn.functional.scaled_dot_product_attention(q, k, v, *args, **kwargs)
                 with sdpa_kernel(SDPA_BACKEND_PRIORITY, set_priority=True):
                     return torch.nn.functional.scaled_dot_product_attention(q, k, v, *args, **kwargs)
         else:
diff --git a/comfy/text_encoders/ace15.py b/comfy/text_encoders/ace15.py
index 73d710671..fce2b67ce 100644
--- a/comfy/text_encoders/ace15.py
+++ b/comfy/text_encoders/ace15.py
@@ -19,6 +19,7 @@ def sample_manual_loop_no_classes(
     min_tokens: int = 1,
     max_new_tokens: int = 2048,
     audio_start_id: int = 151669,  # The cutoff ID for audio codes
+    audio_end_id: int = 215669,
     eos_token_id: int = 151645,
 ):
     device = model.execution_device
@@ -60,6 +61,7 @@ def sample_manual_loop_no_classes(
         remove_logit_value = torch.finfo(cfg_logits.dtype).min
         # Only generate audio tokens
         cfg_logits[:, :audio_start_id] = remove_logit_value
+        cfg_logits[:, audio_end_id:] = remove_logit_value
 
         if eos_token_id is not None and eos_token_id < audio_start_id and min_tokens < step:
             cfg_logits[:, eos_token_id] = eos_score
diff --git a/comfy/text_encoders/llama.py b/comfy/text_encoders/llama.py
index 3afd094d1..b6735d210 100644
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@@ -651,10 +651,10 @@ class Llama2_(nn.Module):
         mask = None
         if attention_mask is not None:
             mask = 1.0 - attention_mask.to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])).expand(attention_mask.shape[0], 1, seq_len, attention_mask.shape[-1])
-            mask = mask.masked_fill(mask.to(torch.bool), torch.finfo(x.dtype).min)
+            mask = mask.masked_fill(mask.to(torch.bool), torch.finfo(x.dtype).min / 4)
 
         if seq_len > 1:
-            causal_mask = torch.empty(past_len + seq_len, past_len + seq_len, dtype=x.dtype, device=x.device).fill_(torch.finfo(x.dtype).min).triu_(1)
+            causal_mask = torch.empty(past_len + seq_len, past_len + seq_len, dtype=x.dtype, device=x.device).fill_(torch.finfo(x.dtype).min / 4).triu_(1)
             if mask is not None:
                 mask += causal_mask
             else:
diff --git a/comfy/utils.py b/comfy/utils.py
index c1b536833..1337e2205 100644
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -82,14 +82,12 @@ _TYPES = {
 def load_safetensors(ckpt):
     f = open(ckpt, "rb")
     mapping = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
+    mv = memoryview(mapping)
 
     header_size = struct.unpack("<Q", mapping[:8])[0]
     header = json.loads(mapping[8:8+header_size].decode("utf-8"))
 
-    with warnings.catch_warnings():
-        #We are working with read-only RAM by design
-        warnings.filterwarnings("ignore", message="The given buffer is not writable")
-        data_area = torch.frombuffer(mapping, dtype=torch.uint8)[8 + header_size:]
+    mv = mv[8 + header_size:]
 
     sd = {}
     for name, info in header.items():
@@ -97,7 +95,13 @@ def load_safetensors(ckpt):
             continue
 
         start, end = info["data_offsets"]
-        sd[name] = data_area[start:end].view(_TYPES[info["dtype"]]).view(info["shape"])
+        if start == end:
+            sd[name] = torch.empty(info["shape"], dtype =_TYPES[info["dtype"]])
+        else:
+            with warnings.catch_warnings():
+                #We are working with read-only RAM by design
+                warnings.filterwarnings("ignore", message="The given buffer is not writable")
+                sd[name] = torch.frombuffer(mv[start:end], dtype=_TYPES[info["dtype"]]).view(info["shape"])
 
     return sd, header.get("__metadata__", {}),
 
diff --git a/comfy_extras/nodes_hunyuan3d.py b/comfy_extras/nodes_hunyuan3d.py
index eda1639ab..c2df3e859 100644
--- a/comfy_extras/nodes_hunyuan3d.py
+++ b/comfy_extras/nodes_hunyuan3d.py
@@ -618,6 +618,7 @@ class SaveGLB(IO.ComfyNode):
     def define_schema(cls):
         return IO.Schema(
             node_id="SaveGLB",
+            display_name="Save 3D Model",
             search_aliases=["export 3d model", "save mesh"],
             category="3d",
             is_output_node=True,
@@ -626,8 +627,14 @@ class SaveGLB(IO.ComfyNode):
                     IO.Mesh.Input("mesh"),
                     types=[
                         IO.File3DGLB,
+                        IO.File3DGLTF,
+                        IO.File3DOBJ,
+                        IO.File3DFBX,
+                        IO.File3DSTL,
+                        IO.File3DUSDZ,
+                        IO.File3DAny,
                     ],
-                    tooltip="Mesh or GLB file to save",
+                    tooltip="Mesh or 3D file to save",
                 ),
                 IO.String.Input("filename_prefix", default="mesh/ComfyUI"),
             ],
@@ -649,7 +656,8 @@ class SaveGLB(IO.ComfyNode):
 
         if isinstance(mesh, Types.File3D):
             # Handle File3D input - save BytesIO data to output folder
-            f = f"{filename}_{counter:05}_.glb"
+            ext = mesh.format or "glb"
+            f = f"{filename}_{counter:05}_.{ext}"
             mesh.save_to(os.path.join(full_output_folder, f))
             results.append({
                 "filename": f,
diff --git a/comfy_extras/nodes_load_3d.py b/comfy_extras/nodes_load_3d.py
index f29510488..edbb5cd40 100644
--- a/comfy_extras/nodes_load_3d.py
+++ b/comfy_extras/nodes_load_3d.py
@@ -45,6 +45,7 @@ class Load3D(IO.ComfyNode):
                 IO.Image.Output(display_name="normal"),
                 IO.Load3DCamera.Output(display_name="camera_info"),
                 IO.Video.Output(display_name="recording_video"),
+                IO.File3DAny.Output(display_name="model_3d"),
             ],
         )
 
@@ -66,7 +67,8 @@ class Load3D(IO.ComfyNode):
 
             video = InputImpl.VideoFromFile(recording_video_path)
 
-        return IO.NodeOutput(output_image, output_mask, model_file, normal_image, image['camera_info'], video)
+        file_3d = Types.File3D(folder_paths.get_annotated_filepath(model_file))
+        return IO.NodeOutput(output_image, output_mask, model_file, normal_image, image['camera_info'], video, file_3d)
 
     process = execute  # TODO: remove
 
diff --git a/comfyui_version.py b/comfyui_version.py
index 2e2c12ced..5d296cd1b 100644
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.12.1"
+__version__ = "0.12.2"
diff --git a/main.py b/main.py
index b8c951375..92d705b4d 100644
--- a/main.py
+++ b/main.py
@@ -192,7 +192,10 @@ import comfy_aimdo.control
 import comfy_aimdo.torch
 
 if enables_dynamic_vram():
-    if comfy_aimdo.control.init_device(comfy.model_management.get_torch_device().index):
+    if comfy.model_management.torch_version_numeric < (2, 8):
+        logging.warning("Unsupported Pytorch detected. DynamicVRAM support requires Pytorch version 2.8 or later. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
+        comfy.memory_management.aimdo_allocator = None
+    elif comfy_aimdo.control.init_device(comfy.model_management.get_torch_device().index):
         if args.verbose == 'DEBUG':
             comfy_aimdo.control.set_log_debug()
         elif args.verbose == 'CRITICAL':
@@ -208,7 +211,7 @@ if enables_dynamic_vram():
         comfy.memory_management.aimdo_allocator = comfy_aimdo.torch.get_torch_allocator()
         logging.info("DynamicVRAM support detected and enabled")
     else:
-        logging.info("No working comfy-aimdo install detected. DynamicVRAM support disabled. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
+        logging.warning("No working comfy-aimdo install detected. DynamicVRAM support disabled. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
         comfy.memory_management.aimdo_allocator = None
 
 
diff --git a/pyproject.toml b/pyproject.toml
index c21ee03f1..1ddcc3596 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.12.1"
+version = "0.12.2"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.10"