diff --git a/README.md b/README.md
index 9b5f301c9..47514d1b4 100644
--- a/README.md
+++ b/README.md
@@ -197,11 +197,11 @@ Put your VAE in: models/vae
 ### AMD GPUs (Linux only)
 AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version:
 
-```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2.4```
+```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.3```
 
-This is the command to install the nightly with ROCm 6.3 which might have some performance improvements:
+This is the command to install the nightly with ROCm 6.4 which might have some performance improvements:
 
-```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.3```
+```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.4```
 
 ### Intel GPUs (Windows and Linux)
 
diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index 320f2aba8..7cb43eb27 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -88,6 +88,7 @@ parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE"
 
 parser.add_argument("--oneapi-device-selector", type=str, default=None, metavar="SELECTOR_STRING", help="Sets the oneAPI device(s) this instance will use.")
 parser.add_argument("--disable-ipex-optimize", action="store_true", help="Disables ipex.optimize default when loading models with Intel's Extension for Pytorch.")
+parser.add_argument("--supports-fp8-compute", action="store_true", help="ComfyUI will act like if the device supports fp8 compute.")
 
 class LatentPreviewMethod(enum.Enum):
     NoPreviews = "none"
diff --git a/comfy/ldm/chroma/model.py b/comfy/ldm/chroma/model.py
index 636748fc5..c75023a31 100644
--- a/comfy/ldm/chroma/model.py
+++ b/comfy/ldm/chroma/model.py
@@ -163,7 +163,7 @@ class Chroma(nn.Module):
         distil_guidance = timestep_embedding(guidance.detach().clone(), 16).to(img.device, img.dtype)
 
         # get all modulation index
-        modulation_index = timestep_embedding(torch.arange(mod_index_length), 32).to(img.device, img.dtype)
+        modulation_index = timestep_embedding(torch.arange(mod_index_length, device=img.device), 32).to(img.device, img.dtype)
         # we need to broadcast the modulation index here so each batch has all of the index
         modulation_index = modulation_index.unsqueeze(0).repeat(img.shape[0], 1, 1).to(img.device, img.dtype)
         # and we need to broadcast timestep and guidance along too
diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py
index 45f9e311e..2cb77d85d 100644
--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@@ -20,8 +20,11 @@ if model_management.xformers_enabled():
 if model_management.sage_attention_enabled():
     try:
         from sageattention import sageattn
-    except ModuleNotFoundError:
-        logging.error(f"\n\nTo use the `--use-sage-attention` feature, the `sageattention` package must be installed first.\ncommand:\n\t{sys.executable} -m pip install sageattention")
+    except ModuleNotFoundError as e:
+        if e.name == "sageattention":
+            logging.error(f"\n\nTo use the `--use-sage-attention` feature, the `sageattention` package must be installed first.\ncommand:\n\t{sys.executable} -m pip install sageattention")
+        else:
+            raise e
         exit(-1)
 
 if model_management.flash_attention_enabled():
diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py
index a996dedf4..1b51a4e4a 100644
--- a/comfy/ldm/wan/model.py
+++ b/comfy/ldm/wan/model.py
@@ -635,7 +635,7 @@ class VaceWanModel(WanModel):
         t,
         context,
         vace_context,
-        vace_strength=1.0,
+        vace_strength,
         clip_fea=None,
         freqs=None,
         transformer_options={},
@@ -661,8 +661,11 @@ class VaceWanModel(WanModel):
                 context = torch.concat([context_clip, context], dim=1)
             context_img_len = clip_fea.shape[-2]
 
+        orig_shape = list(vace_context.shape)
+        vace_context = vace_context.movedim(0, 1).reshape([-1] + orig_shape[2:])
         c = self.vace_patch_embedding(vace_context.float()).to(vace_context.dtype)
         c = c.flatten(2).transpose(1, 2)
+        c = list(c.split(orig_shape[0], dim=0))
 
         # arguments
         x_orig = x
@@ -682,8 +685,9 @@ class VaceWanModel(WanModel):
 
             ii = self.vace_layers_mapping.get(i, None)
             if ii is not None:
-                c_skip, c = self.vace_blocks[ii](c, x=x_orig, e=e0, freqs=freqs, context=context, context_img_len=context_img_len)
-                x += c_skip * vace_strength
+                for iii in range(len(c)):
+                    c_skip, c[iii] = self.vace_blocks[ii](c[iii], x=x_orig, e=e0, freqs=freqs, context=context, context_img_len=context_img_len)
+                    x += c_skip * vace_strength[iii]
                 del c_skip
         # head
         x = self.head(x, e)
diff --git a/comfy/model_base.py b/comfy/model_base.py
index f475e837e..fb4724690 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -1062,20 +1062,25 @@ class WAN21_Vace(WAN21):
         vace_frames = kwargs.get("vace_frames", None)
         if vace_frames is None:
             noise_shape[1] = 32
-            vace_frames = torch.zeros(noise_shape, device=noise.device, dtype=noise.dtype)
-
-        for i in range(0, vace_frames.shape[1], 16):
-            vace_frames = vace_frames.clone()
-            vace_frames[:, i:i + 16] = self.process_latent_in(vace_frames[:, i:i + 16])
+            vace_frames = [torch.zeros(noise_shape, device=noise.device, dtype=noise.dtype)]
 
         mask = kwargs.get("vace_mask", None)
         if mask is None:
             noise_shape[1] = 64
-            mask = torch.ones(noise_shape, device=noise.device, dtype=noise.dtype)
+            mask = [torch.ones(noise_shape, device=noise.device, dtype=noise.dtype)] * len(vace_frames)
 
-        out['vace_context'] = comfy.conds.CONDRegular(torch.cat([vace_frames.to(noise), mask.to(noise)], dim=1))
+        vace_frames_out = []
+        for j in range(len(vace_frames)):
+            vf = vace_frames[j].clone()
+            for i in range(0, vf.shape[1], 16):
+                vf[:, i:i + 16] = self.process_latent_in(vf[:, i:i + 16])
+            vf = torch.cat([vf, mask[j]], dim=1)
+            vace_frames_out.append(vf)
 
-        vace_strength = kwargs.get("vace_strength", 1.0)
+        vace_frames = torch.stack(vace_frames_out, dim=1)
+        out['vace_context'] = comfy.conds.CONDRegular(vace_frames)
+
+        vace_strength = kwargs.get("vace_strength", [1.0] * len(vace_frames_out))
         out['vace_strength'] = comfy.conds.CONDConstant(vace_strength)
         return out
 
diff --git a/comfy/model_detection.py b/comfy/model_detection.py
index 20f287df9..74f539598 100644
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -620,6 +620,9 @@ def convert_config(unet_config):
 
 
 def unet_config_from_diffusers_unet(state_dict, dtype=None):
+    if "conv_in.weight" not in state_dict:
+        return None
+
     match = {}
     transformer_depth = []
 
diff --git a/comfy/model_management.py b/comfy/model_management.py
index 44aff3762..a49ed83e6 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1257,6 +1257,9 @@ def should_use_bf16(device=None, model_params=0, prioritize_performance=True, ma
     return False
 
 def supports_fp8_compute(device=None):
+    if args.supports_fp8_compute:
+        return True
+
     if not is_nvidia():
         return False
 
diff --git a/comfy_api/torch_helpers/__init__.py b/comfy_api/torch_helpers/__init__.py
new file mode 100644
index 000000000..be7ae7a61
--- /dev/null
+++ b/comfy_api/torch_helpers/__init__.py
@@ -0,0 +1,5 @@
+from .torch_compile import set_torch_compile_wrapper
+
+__all__ = [
+    "set_torch_compile_wrapper",
+]
diff --git a/comfy_api/torch_helpers/torch_compile.py b/comfy_api/torch_helpers/torch_compile.py
new file mode 100644
index 000000000..9223f58db
--- /dev/null
+++ b/comfy_api/torch_helpers/torch_compile.py
@@ -0,0 +1,69 @@
+from __future__ import annotations
+import torch
+
+import comfy.utils
+from comfy.patcher_extension import WrappersMP
+from typing import TYPE_CHECKING, Callable, Optional
+if TYPE_CHECKING:
+    from comfy.model_patcher import ModelPatcher
+    from comfy.patcher_extension import WrapperExecutor
+
+
+COMPILE_KEY = "torch.compile"
+TORCH_COMPILE_KWARGS = "torch_compile_kwargs"
+
+
+def apply_torch_compile_factory(compiled_module_dict: dict[str, Callable]) -> Callable:
+    '''
+    Create a wrapper that will refer to the compiled_diffusion_model.
+    '''
+    def apply_torch_compile_wrapper(executor: WrapperExecutor, *args, **kwargs):
+        try:
+            orig_modules = {}
+            for key, value in compiled_module_dict.items():
+                orig_modules[key] = comfy.utils.get_attr(executor.class_obj, key)
+                comfy.utils.set_attr(executor.class_obj, key, value)
+            return executor(*args, **kwargs)
+        finally:
+            for key, value in orig_modules.items():
+                comfy.utils.set_attr(executor.class_obj, key, value)
+    return apply_torch_compile_wrapper
+
+
+def set_torch_compile_wrapper(model: ModelPatcher, backend: str, options: Optional[dict[str,str]]=None,
+                              mode: Optional[str]=None, fullgraph=False, dynamic: Optional[bool]=None,
+                              keys: list[str]=["diffusion_model"], *args, **kwargs):
+    '''
+    Perform torch.compile that will be applied at sample time for either the whole model or specific params of the BaseModel instance.
+
+    When keys is None, it will default to using ["diffusion_model"], compiling the whole diffusion_model.
+    When a list of keys is provided, it will perform torch.compile on only the selected modules.
+    '''
+    # clear out any other torch.compile wrappers
+    model.remove_wrappers_with_key(WrappersMP.APPLY_MODEL, COMPILE_KEY)
+    # if no keys, default to 'diffusion_model'
+    if not keys:
+        keys = ["diffusion_model"]
+    # create kwargs dict that can be referenced later
+    compile_kwargs = {
+        "backend": backend,
+        "options": options,
+        "mode": mode,
+        "fullgraph": fullgraph,
+        "dynamic": dynamic,
+    }
+    # get a dict of compiled keys
+    compiled_modules = {}
+    for key in keys:
+        compiled_modules[key] = torch.compile(
+                model=model.get_model_object(key),
+                **compile_kwargs,
+            )
+    # add torch.compile wrapper
+    wrapper_func = apply_torch_compile_factory(
+        compiled_module_dict=compiled_modules,
+    )
+    # store wrapper to run on BaseModel's apply_model function
+    model.add_wrapper_with_key(WrappersMP.APPLY_MODEL, COMPILE_KEY, wrapper_func)
+    # keep compile kwargs for reference
+    model.model_options[TORCH_COMPILE_KWARGS] = compile_kwargs
diff --git a/comfy_api_nodes/nodes_kling.py b/comfy_api_nodes/nodes_kling.py
index 456a86905..641cd6353 100644
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@@ -65,6 +65,12 @@ from comfy_api_nodes.apinode_utils import (
     download_url_to_image_tensor,
 )
 from comfy_api_nodes.mapper_utils import model_field_to_node_input
+from comfy_api_nodes.util.validation_utils import (
+    validate_image_dimensions,
+    validate_image_aspect_ratio,
+    validate_video_dimensions,
+    validate_video_duration,
+)
 from comfy_api.input.basic_types import AudioInput
 from comfy_api.input.video_types import VideoInput
 from comfy_api.input_impl import VideoFromFile
@@ -80,18 +86,16 @@ PATH_CHARACTER_IMAGE = f"/proxy/kling/{KLING_API_VERSION}/images/generations"
 PATH_VIRTUAL_TRY_ON = f"/proxy/kling/{KLING_API_VERSION}/images/kolors-virtual-try-on"
 PATH_IMAGE_GENERATIONS = f"/proxy/kling/{KLING_API_VERSION}/images/generations"
 
-
 MAX_PROMPT_LENGTH_T2V = 2500
 MAX_PROMPT_LENGTH_I2V = 500
 MAX_PROMPT_LENGTH_IMAGE_GEN = 500
 MAX_NEGATIVE_PROMPT_LENGTH_IMAGE_GEN = 200
 MAX_PROMPT_LENGTH_LIP_SYNC = 120
 
-# TODO: adjust based on tests
-AVERAGE_DURATION_T2V = 319  # 319,
-AVERAGE_DURATION_I2V = 164  # 164,
-AVERAGE_DURATION_LIP_SYNC = 120
-AVERAGE_DURATION_VIRTUAL_TRY_ON = 19  # 19,
+AVERAGE_DURATION_T2V = 319
+AVERAGE_DURATION_I2V = 164
+AVERAGE_DURATION_LIP_SYNC = 455
+AVERAGE_DURATION_VIRTUAL_TRY_ON = 19
 AVERAGE_DURATION_IMAGE_GEN = 32
 AVERAGE_DURATION_VIDEO_EFFECTS = 320
 AVERAGE_DURATION_VIDEO_EXTEND = 320
@@ -211,23 +215,8 @@ def validate_input_image(image: torch.Tensor) -> None:
 
     See: https://app.klingai.com/global/dev/document-api/apiReference/model/imageToVideo
     """
-    if len(image.shape) == 4:
-        height, width = image.shape[1], image.shape[2]
-    elif len(image.shape) == 3:
-        height, width = image.shape[0], image.shape[1]
-    else:
-        raise ValueError("Invalid image tensor shape.")
-
-    # Ensure minimum resolution is met
-    if height < 300:
-        raise ValueError("Image height must be at least 300px")
-    if width < 300:
-        raise ValueError("Image width must be at least 300px")
-
-    # Ensure aspect ratio is within acceptable range
-    aspect_ratio = width / height
-    if aspect_ratio < 1 / 2.5 or aspect_ratio > 2.5:
-        raise ValueError("Image aspect ratio must be between 1:2.5 and 2.5:1")
+    validate_image_dimensions(image, min_width=300, min_height=300)
+    validate_image_aspect_ratio(image, min_aspect_ratio=1 / 2.5, max_aspect_ratio=2.5)
 
 
 def get_camera_control_input_config(
@@ -1243,6 +1232,17 @@ class KlingLipSyncBase(KlingNodeBase):
     RETURN_TYPES = ("VIDEO", "STRING", "STRING")
     RETURN_NAMES = ("VIDEO", "video_id", "duration")
 
+    def validate_lip_sync_video(self, video: VideoInput):
+        """
+        Validates the input video adheres to the expectations of the Kling Lip Sync API:
+        - Video length does not exceed 10s and is not shorter than 2s
+        - Length and width dimensions should both be between 720px and 1920px
+
+        See: https://app.klingai.com/global/dev/document-api/apiReference/model/videoTolip
+        """
+        validate_video_dimensions(video, 720, 1920)
+        validate_video_duration(video, 2, 10)
+
     def validate_text(self, text: str):
         if not text:
             raise ValueError("Text is required")
@@ -1282,6 +1282,7 @@ class KlingLipSyncBase(KlingNodeBase):
     ) -> tuple[VideoFromFile, str, str]:
         if text:
             self.validate_text(text)
+        self.validate_lip_sync_video(video)
 
         # Upload video to Comfy API and get download URL
         video_url = upload_video_to_comfyapi(video, auth_kwargs=kwargs)
@@ -1352,7 +1353,7 @@ class KlingLipSyncAudioToVideoNode(KlingLipSyncBase):
             },
         }
 
-    DESCRIPTION = "Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file."
+    DESCRIPTION = "Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file. When using, ensure that the audio contains clearly distinguishable vocals and that the video contains a distinct face. The audio file should not be larger than 5MB. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length."
 
     def api_call(
         self,
@@ -1464,7 +1465,7 @@ class KlingLipSyncTextToVideoNode(KlingLipSyncBase):
             },
         }
 
-    DESCRIPTION = "Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt."
+    DESCRIPTION = "Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length."
 
     def api_call(
         self,
diff --git a/comfy_api_nodes/util/__init__.py b/comfy_api_nodes/util/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/comfy_api_nodes/util/validation_utils.py b/comfy_api_nodes/util/validation_utils.py
new file mode 100644
index 000000000..031b9fbd3
--- /dev/null
+++ b/comfy_api_nodes/util/validation_utils.py
@@ -0,0 +1,100 @@
+import logging
+from typing import Optional
+
+import torch
+from comfy_api.input.video_types import VideoInput
+
+
+def get_image_dimensions(image: torch.Tensor) -> tuple[int, int]:
+    if len(image.shape) == 4:
+        return image.shape[1], image.shape[2]
+    elif len(image.shape) == 3:
+        return image.shape[0], image.shape[1]
+    else:
+        raise ValueError("Invalid image tensor shape.")
+
+
+def validate_image_dimensions(
+    image: torch.Tensor,
+    min_width: Optional[int] = None,
+    max_width: Optional[int] = None,
+    min_height: Optional[int] = None,
+    max_height: Optional[int] = None,
+):
+    height, width = get_image_dimensions(image)
+
+    if min_width is not None and width < min_width:
+        raise ValueError(f"Image width must be at least {min_width}px, got {width}px")
+    if max_width is not None and width > max_width:
+        raise ValueError(f"Image width must be at most {max_width}px, got {width}px")
+    if min_height is not None and height < min_height:
+        raise ValueError(
+            f"Image height must be at least {min_height}px, got {height}px"
+        )
+    if max_height is not None and height > max_height:
+        raise ValueError(f"Image height must be at most {max_height}px, got {height}px")
+
+
+def validate_image_aspect_ratio(
+    image: torch.Tensor,
+    min_aspect_ratio: Optional[float] = None,
+    max_aspect_ratio: Optional[float] = None,
+):
+    width, height = get_image_dimensions(image)
+    aspect_ratio = width / height
+
+    if min_aspect_ratio is not None and aspect_ratio < min_aspect_ratio:
+        raise ValueError(
+            f"Image aspect ratio must be at least {min_aspect_ratio}, got {aspect_ratio}"
+        )
+    if max_aspect_ratio is not None and aspect_ratio > max_aspect_ratio:
+        raise ValueError(
+            f"Image aspect ratio must be at most {max_aspect_ratio}, got {aspect_ratio}"
+        )
+
+
+def validate_video_dimensions(
+    video: VideoInput,
+    min_width: Optional[int] = None,
+    max_width: Optional[int] = None,
+    min_height: Optional[int] = None,
+    max_height: Optional[int] = None,
+):
+    try:
+        width, height = video.get_dimensions()
+    except Exception as e:
+        logging.error("Error getting dimensions of video: %s", e)
+        return
+
+    if min_width is not None and width < min_width:
+        raise ValueError(f"Video width must be at least {min_width}px, got {width}px")
+    if max_width is not None and width > max_width:
+        raise ValueError(f"Video width must be at most {max_width}px, got {width}px")
+    if min_height is not None and height < min_height:
+        raise ValueError(
+            f"Video height must be at least {min_height}px, got {height}px"
+        )
+    if max_height is not None and height > max_height:
+        raise ValueError(f"Video height must be at most {max_height}px, got {height}px")
+
+
+def validate_video_duration(
+    video: VideoInput,
+    min_duration: Optional[float] = None,
+    max_duration: Optional[float] = None,
+):
+    try:
+        duration = video.get_duration()
+    except Exception as e:
+        logging.error("Error getting duration of video: %s", e)
+        return
+
+    epsilon = 0.0001
+    if min_duration is not None and min_duration - epsilon > duration:
+        raise ValueError(
+            f"Video duration must be at least {min_duration}s, got {duration}s"
+        )
+    if max_duration is not None and duration > max_duration + epsilon:
+        raise ValueError(
+            f"Video duration must be at most {max_duration}s, got {duration}s"
+        )
diff --git a/comfy_extras/nodes_images.py b/comfy_extras/nodes_images.py
index 77c305619..29a5d5b61 100644
--- a/comfy_extras/nodes_images.py
+++ b/comfy_extras/nodes_images.py
@@ -13,6 +13,7 @@ import os
 import re
 from io import BytesIO
 from inspect import cleandoc
+import torch
 
 from comfy.comfy_types import FileLocator
 
@@ -74,6 +75,24 @@ class ImageFromBatch:
         s = s_in[batch_index:batch_index + length].clone()
         return (s,)
 
+
+class ImageAddNoise:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "image": ("IMAGE",),
+                              "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff, "control_after_generate": True, "tooltip": "The random seed used for creating the noise."}),
+                              "strength": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01}),
+                              }}
+    RETURN_TYPES = ("IMAGE",)
+    FUNCTION = "repeat"
+
+    CATEGORY = "image"
+
+    def repeat(self, image, seed, strength):
+        generator = torch.manual_seed(seed)
+        s = torch.clip((image + strength * torch.randn(image.size(), generator=generator, device="cpu").to(image)), min=0.0, max=1.0)
+        return (s,)
+
 class SaveAnimatedWEBP:
     def __init__(self):
         self.output_dir = folder_paths.get_output_directory()
@@ -295,6 +314,7 @@ NODE_CLASS_MAPPINGS = {
     "ImageCrop": ImageCrop,
     "RepeatImageBatch": RepeatImageBatch,
     "ImageFromBatch": ImageFromBatch,
+    "ImageAddNoise": ImageAddNoise,
     "SaveAnimatedWEBP": SaveAnimatedWEBP,
     "SaveAnimatedPNG": SaveAnimatedPNG,
     "SaveSVGNode": SaveSVGNode,
diff --git a/comfy_extras/nodes_load_3d.py b/comfy_extras/nodes_load_3d.py
index d5b4d9111..40d03e18a 100644
--- a/comfy_extras/nodes_load_3d.py
+++ b/comfy_extras/nodes_load_3d.py
@@ -16,7 +16,7 @@ class Load3D():
 
         os.makedirs(input_dir, exist_ok=True)
 
-        files = [normalize_path(os.path.join("3d", f)) for f in os.listdir(input_dir) if f.endswith(('.gltf', '.glb', '.obj', '.mtl', '.fbx', '.stl'))]
+        files = [normalize_path(os.path.join("3d", f)) for f in os.listdir(input_dir) if f.endswith(('.gltf', '.glb', '.obj', '.fbx', '.stl'))]
 
         return {"required": {
             "model_file": (sorted(files), {"file_upload": True}),
diff --git a/comfy_extras/nodes_string.py b/comfy_extras/nodes_string.py
index a852326e5..b24222cee 100644
--- a/comfy_extras/nodes_string.py
+++ b/comfy_extras/nodes_string.py
@@ -8,7 +8,8 @@ class StringConcatenate():
         return {
             "required": {
                 "string_a": (IO.STRING, {"multiline": True}),
-                "string_b": (IO.STRING, {"multiline": True})
+                "string_b": (IO.STRING, {"multiline": True}),
+                "delimiter": (IO.STRING, {"multiline": False, "default": ""})
             }
         }
 
@@ -16,8 +17,8 @@ class StringConcatenate():
     FUNCTION = "execute"
     CATEGORY = "utils/string"
 
-    def execute(self, string_a, string_b, **kwargs):
-        return string_a + string_b,
+    def execute(self, string_a, string_b, delimiter, **kwargs):
+        return delimiter.join((string_a, string_b)),
 
 class StringSubstring():
     @classmethod
diff --git a/comfy_extras/nodes_torch_compile.py b/comfy_extras/nodes_torch_compile.py
index 1fe6f42c7..605536678 100644
--- a/comfy_extras/nodes_torch_compile.py
+++ b/comfy_extras/nodes_torch_compile.py
@@ -1,4 +1,5 @@
-import torch
+from comfy_api.torch_helpers import set_torch_compile_wrapper
+
 
 class TorchCompileModel:
     @classmethod
@@ -14,7 +15,7 @@ class TorchCompileModel:
 
     def patch(self, model, backend):
         m = model.clone()
-        m.add_object_patch("diffusion_model", torch.compile(model=m.get_model_object("diffusion_model"), backend=backend))
+        set_torch_compile_wrapper(model=m, backend=backend)
         return (m, )
 
 NODE_CLASS_MAPPINGS = {
diff --git a/comfy_extras/nodes_wan.py b/comfy_extras/nodes_wan.py
index a91b4aba9..c35c4871c 100644
--- a/comfy_extras/nodes_wan.py
+++ b/comfy_extras/nodes_wan.py
@@ -268,8 +268,9 @@ class WanVaceToVideo:
             trim_latent = reference_image.shape[2]
 
         mask = mask.unsqueeze(0)
-        positive = node_helpers.conditioning_set_values(positive, {"vace_frames": control_video_latent, "vace_mask": mask, "vace_strength": strength})
-        negative = node_helpers.conditioning_set_values(negative, {"vace_frames": control_video_latent, "vace_mask": mask, "vace_strength": strength})
+
+        positive = node_helpers.conditioning_set_values(positive, {"vace_frames": [control_video_latent], "vace_mask": [mask], "vace_strength": [strength]}, append=True)
+        negative = node_helpers.conditioning_set_values(negative, {"vace_frames": [control_video_latent], "vace_mask": [mask], "vace_strength": [strength]}, append=True)
 
         latent = torch.zeros([batch_size, 16, latent_length, height // 8, width // 8], device=comfy.model_management.intermediate_device())
         out_latent = {}
diff --git a/comfyui_version.py b/comfyui_version.py
index b740b378d..817b7d83b 100644
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.34"
+__version__ = "0.3.36"
diff --git a/execution.py b/execution.py
index e5d1c69d9..15ff7567c 100644
--- a/execution.py
+++ b/execution.py
@@ -909,7 +909,6 @@ class PromptQueue:
         self.currently_running = {}
         self.history = {}
         self.flags = {}
-        server.prompt_queue = self
 
     def put(self, item):
         with self.mutex:
@@ -954,6 +953,7 @@ class PromptQueue:
             self.history[prompt[1]].update(history_result)
             self.server.queue_updated()
 
+    # Note: slow
     def get_current_queue(self):
         with self.mutex:
             out = []
@@ -961,6 +961,13 @@ class PromptQueue:
                 out += [x]
             return (out, copy.deepcopy(self.queue))
 
+    # read-safe as long as queue items are immutable
+    def get_current_queue_volatile(self):
+        with self.mutex:
+            running = [x for x in self.currently_running.values()]
+            queued = copy.copy(self.queue)
+            return (running, queued)
+
     def get_tasks_remaining(self):
         with self.mutex:
             return len(self.queue) + len(self.currently_running)
diff --git a/main.py b/main.py
index acb23423d..8533666b8 100644
--- a/main.py
+++ b/main.py
@@ -272,7 +272,6 @@ def start_comfyui(asyncio_loop=None):
         asyncio_loop = asyncio.new_event_loop()
         asyncio.set_event_loop(asyncio_loop)
     prompt_server = server.PromptServer(asyncio_loop)
-    q = execution.PromptQueue(prompt_server)
 
     if not args.disable_manager and not args.disable_manager_ui:
         comfyui_manager.start()
@@ -286,7 +285,7 @@ def start_comfyui(asyncio_loop=None):
     prompt_server.add_routes()
     hijack_progress(prompt_server)
 
-    threading.Thread(target=prompt_worker, daemon=True, args=(q, prompt_server,)).start()
+    threading.Thread(target=prompt_worker, daemon=True, args=(prompt_server.prompt_queue, prompt_server,)).start()
 
     if args.quick_test_for_ci:
         exit(0)
diff --git a/node_helpers.py b/node_helpers.py
index c3e1a14ca..4ff960ef8 100644
--- a/node_helpers.py
+++ b/node_helpers.py
@@ -5,12 +5,18 @@ from comfy.cli_args import args
 
 from PIL import ImageFile, UnidentifiedImageError
 
-def conditioning_set_values(conditioning, values={}):
+def conditioning_set_values(conditioning, values={}, append=False):
     c = []
     for t in conditioning:
         n = [t[0], t[1].copy()]
         for k in values:
-            n[1][k] = values[k]
+            val = values[k]
+            if append:
+                old_val = n[1].get(k, None)
+                if old_val is not None:
+                    val = old_val + val
+
+            n[1][k] = val
         c.append(n)
 
     return c
diff --git a/nodes.py b/nodes.py
index 0bc9ef2b9..0bf7a3472 100644
--- a/nodes.py
+++ b/nodes.py
@@ -1106,16 +1106,7 @@ class unCLIPConditioning:
         if strength == 0:
             return (conditioning, )
 
-        c = []
-        for t in conditioning:
-            o = t[1].copy()
-            x = {"clip_vision_output": clip_vision_output, "strength": strength, "noise_augmentation": noise_augmentation}
-            if "unclip_conditioning" in o:
-                o["unclip_conditioning"] = o["unclip_conditioning"][:] + [x]
-            else:
-                o["unclip_conditioning"] = [x]
-            n = [t[0], o]
-            c.append(n)
+        c = node_helpers.conditioning_set_values(conditioning, {"unclip_conditioning": [{"clip_vision_output": clip_vision_output, "strength": strength, "noise_augmentation": noise_augmentation}]}, append=True)
         return (c, )
 
 class GLIGENLoader:
diff --git a/pyproject.toml b/pyproject.toml
index 80061b39a..accf6f864 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.3.34"
+version = "0.3.36"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"
diff --git a/requirements.txt b/requirements.txt
index 45f9b5c1b..2c299a2d4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
-comfyui-frontend-package==1.19.9
-comfyui-workflow-templates==0.1.14
+comfyui-frontend-package==1.20.5
+comfyui-workflow-templates==0.1.18
 comfyui_manager
 torch
 torchsde
diff --git a/server.py b/server.py
index cb1c6a8fd..1b0a73601 100644
--- a/server.py
+++ b/server.py
@@ -29,6 +29,7 @@ import comfy.model_management
 import node_helpers
 from comfyui_version import __version__
 from app.frontend_management import FrontendManager
+
 from app.user_manager import UserManager
 from app.model_manager import ModelFileManager
 from app.custom_node_manager import CustomNodeManager
@@ -159,7 +160,7 @@ class PromptServer():
         self.custom_node_manager = CustomNodeManager()
         self.internal_routes = InternalRoutes(self)
         self.supports = ["custom_nodes_from_web"]
-        self.prompt_queue = None
+        self.prompt_queue = execution.PromptQueue(self)
         self.loop = loop
         self.messages = asyncio.Queue()
         self.client_session:Optional[aiohttp.ClientSession] = None
@@ -226,7 +227,7 @@ class PromptServer():
             return response
 
         @routes.get("/embeddings")
-        def get_embeddings(self):
+        def get_embeddings(request):
             embeddings = folder_paths.get_filename_list("embeddings")
             return web.json_response(list(map(lambda a: os.path.splitext(a)[0], embeddings)))
 
@@ -282,7 +283,6 @@ class PromptServer():
                     a.update(f.read())
                     b.update(image.file.read())
                     image.file.seek(0)
-                    f.close()
                 return a.hexdigest() == b.hexdigest()
             return False
 
@@ -621,7 +621,7 @@ class PromptServer():
         @routes.get("/queue")
         async def get_queue(request):
             queue_info = {}
-            current_queue = self.prompt_queue.get_current_queue()
+            current_queue = self.prompt_queue.get_current_queue_volatile()
             queue_info['queue_running'] = current_queue[0]
             queue_info['queue_pending'] = current_queue[1]
             return web.json_response(queue_info)