ComfyUI/comfy_extras/nodes_hunyuan_foley.py
Yousef Rafat 12824eac0d init
2025-09-27 13:17:20 +03:00

54 lines
1.8 KiB
Python

import torch
import comfy.model_management
class EmptyLatentHunyuanFoley:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"length": ("INT", {"default": 12, "min": 1, "max": 15, "tooltip": "The length of the audio. The same length as the video."}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096, "tooltip": "The number of latent audios in the batch."}),
},
"optional": {"video": ("VIDEO")}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "generate"
CATEGORY = "latent/audio"
def generate(self, length, batch_size, video = None):
if video is not None:
_, length = video.get_duration(return_frames = True)
length /= 25
shape = (batch_size, 128, int(50 * length))
latent = torch.randn(shape, device=comfy.model_management.intermediate_device())
return ({"samples": latent, "type": "hunyuan_foley"}, )
class HunyuanFoleyConditioning:
@classmethod
def INPUT_TYPES(s):
return {"required": {"video_encoding_siglip": ("CONDITIONING",),
"video_encoding_synchformer": ("CONDITIONING",),
"text_encoding": ("CONDITIONING",)
},
}
RETURN_TYPES = ("CONDITIONING", "CONDITIONING")
RETURN_NAMES = ("positive", "negative")
FUNCTION = "encode"
CATEGORY = "conditioning/video_models"
def encode(self, video_encoding_1, video_encoding_2, text_encoding):
embeds = torch.cat([video_encoding_1, video_encoding_2, text_encoding], dim = 0)
positive = [[embeds, {}]]
negative = [[torch.zeros_like(embeds), {}]]
return (positive, negative)
NODE_CLASS_MAPPINGS = {
"HunyuanFoleyConditioning": HunyuanFoleyConditioning,
"EmptyLatentHunyuanFoley": EmptyLatentHunyuanFoley,
}