diff --git a/comfy/__init__.py b/comfy/__init__.py
index 848463ce4..6309216fc 100644
--- a/comfy/__init__.py
+++ b/comfy/__init__.py
@@ -1,6 +1,6 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.76"
+__version__ = "0.4.0"
 
 # This deals with workspace issues
 from comfy_compatibility.workspace import auto_patch_workspace_and_restart
diff --git a/comfy/ldm/hunyuan_video/model.py b/comfy/ldm/hunyuan_video/model.py
index 17f62db66..261c01b1e 100644
--- a/comfy/ldm/hunyuan_video/model.py
+++ b/comfy/ldm/hunyuan_video/model.py
@@ -35,6 +35,7 @@ class HunyuanVideoParams:
     meanflow: bool
     use_cond_type_embedding: bool
     vision_in_dim: int
+    meanflow_sum: bool
 
 
 class SelfAttentionRef(nn.Module):
@@ -316,7 +317,7 @@ class HunyuanVideo(nn.Module):
                 timesteps_r = transformer_options['sample_sigmas'][w[0] + 1]
                 timesteps_r = timesteps_r.unsqueeze(0).to(device=timesteps.device, dtype=timesteps.dtype)
                 vec_r = self.time_r_in(timestep_embedding(timesteps_r, 256, time_factor=1000.0).to(img.dtype))
-                vec = (vec + vec_r) / 2
+                vec = (vec + vec_r) if self.params.meanflow_sum else (vec + vec_r) / 2
 
         if ref_latent is not None:
             ref_latent_ids = self.img_ids(ref_latent)
diff --git a/comfy/model_detection.py b/comfy/model_detection.py
index adbbcdaf0..cae910bc3 100644
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -189,8 +189,10 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
             dit_config["use_cond_type_embedding"] = False
         if '{}vision_in.proj.0.weight'.format(key_prefix) in state_dict_keys:
             dit_config["vision_in_dim"] = state_dict['{}vision_in.proj.0.weight'.format(key_prefix)].shape[0]
+            dit_config["meanflow_sum"] = True
         else:
             dit_config["vision_in_dim"] = None
+            dit_config["meanflow_sum"] = False
         return dit_config
 
     if '{}double_blocks.0.img_attn.norm.key_norm.scale'.format(key_prefix) in state_dict_keys and ('{}img_in.weight'.format(key_prefix) in state_dict_keys or f"{key_prefix}distilled_guidance_layer.norms.0.scale" in state_dict_keys):  # Flux, Chroma or Chroma Radiance (has no img_in.weight)
diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py
index d0b8add23..ec6ac2013 100644
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@@ -2,7 +2,7 @@ import torch
 import logging
 logger = logging.getLogger(__name__)
 from typing import Tuple, Dict
-import comfy.float
+from .float import stochastic_rounding as stochastic_rounding_fn
 
 _LAYOUT_REGISTRY = {}
 _GENERIC_UTILS = {}
@@ -400,7 +400,10 @@ class TensorCoreFP8Layout(QuantizedLayout):
         orig_dtype = tensor.dtype
 
         if isinstance(scale, str) and scale == "recalculate":
-            scale = torch.amax(tensor.abs()) / torch.finfo(dtype).max
+            scale = torch.amax(tensor.abs()).to(dtype=torch.float32) / torch.finfo(dtype).max
+            if tensor.dtype not in [torch.float32, torch.bfloat16]:  # Prevent scale from being too small
+                tensor_info = torch.finfo(tensor.dtype)
+                scale = (1.0 / torch.clamp((1.0 / scale), min=tensor_info.min, max=tensor_info.max))
 
         if scale is not None:
             if not isinstance(scale, torch.Tensor):
@@ -415,7 +418,7 @@ class TensorCoreFP8Layout(QuantizedLayout):
             scale = torch.ones((), device=tensor.device, dtype=torch.float32)
 
         if stochastic_rounding > 0:
-            tensor = comfy.float.stochastic_rounding(tensor, dtype=dtype, seed=stochastic_rounding)
+            tensor = stochastic_rounding_fn(tensor, dtype=dtype, seed=stochastic_rounding)
         else:
             lp_amax = torch.finfo(dtype).max
             torch.clamp(tensor, min=-lp_amax, max=lp_amax, out=tensor)
diff --git a/pyproject.toml b/pyproject.toml
index 3da4a2559..694a62377 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "comfyui"
-version = "0.3.76"
+version = "0.4.0"
 description = "An installable version of ComfyUI"
 readme = "README.md"
 authors = [