diff --git a/comfy_extras/nodes_kandinsky5.py b/comfy_extras/nodes_kandinsky5.py
new file mode 100644
index 000000000..dd9c73d3a
--- /dev/null
+++ b/comfy_extras/nodes_kandinsky5.py
@@ -0,0 +1,61 @@
+import nodes
+import node_helpers
+import torch
+import comfy.model_management
+import comfy.utils
+
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io
+
+class Kandinsky5ImageToVideo(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="Kandinsky5ImageToVideo",
+            category="conditioning/video_models",
+            inputs=[
+                io.Conditioning.Input("positive"),
+                io.Conditioning.Input("negative"),
+                io.Vae.Input("vae"),
+                io.Int.Input("width", default=768, min=16, max=nodes.MAX_RESOLUTION, step=16),
+                io.Int.Input("height", default=512, min=16, max=nodes.MAX_RESOLUTION, step=16),
+                io.Int.Input("length", default=121, min=1, max=nodes.MAX_RESOLUTION, step=4),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
+                io.Image.Input("start_image", optional=True),
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative"),
+                io.Latent.Output(display_name="latent"),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, positive, negative, vae, width, height, length, batch_size, start_image=None) -> io.NodeOutput:
+        latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
+        if start_image is not None:
+            start_image = comfy.utils.common_upscale(start_image[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
+            encoded = vae.encode(start_image[:, :, :, :3])
+            concat_latent_image = latent.clone()
+            concat_latent_image[:, :, :encoded.shape[2], :, :] = encoded
+
+            mask = torch.ones((1, 1, latent.shape[2], concat_latent_image.shape[-2], concat_latent_image.shape[-1]), device=start_image.device, dtype=start_image.dtype)
+            mask[:, :, :((start_image.shape[0] - 1) // 4) + 1] = 0.0
+
+            positive = node_helpers.conditioning_set_values(positive, {"concat_latent_image": concat_latent_image, "concat_mask": mask})
+            negative = node_helpers.conditioning_set_values(negative, {"concat_latent_image": concat_latent_image, "concat_mask": mask})
+
+        out_latent = {}
+        out_latent["samples"] = latent
+        return io.NodeOutput(positive, negative, out_latent)
+
+
+class Kandinsky5Extension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            Kandinsky5ImageToVideo,
+        ]
+
+async def comfy_entrypoint() -> Kandinsky5Extension:
+    return Kandinsky5Extension()
diff --git a/nodes.py b/nodes.py
index 61f06eb92..cc1a88262 100644
--- a/nodes.py
+++ b/nodes.py
@@ -2357,6 +2357,7 @@ async def init_builtin_extra_nodes():
         "nodes_rope.py",
         "nodes_logic.py",
         "nodes_nop.py",
+        "nodes_kandinsky5.py",
     ]
 
     import_failed = []