mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-12-20 03:23:00 +08:00
Merge branch 'master' into dr-support-pip-cm
This commit is contained in:
commit
fa51f0c60a
@ -1551,6 +1551,9 @@ class HumoWanModel(WanModel):
|
|||||||
context_img_len = None
|
context_img_len = None
|
||||||
|
|
||||||
if audio_embed is not None:
|
if audio_embed is not None:
|
||||||
|
if reference_latent is not None:
|
||||||
|
zero_audio_pad = torch.zeros(audio_embed.shape[0], reference_latent.shape[-3], *audio_embed.shape[2:], device=audio_embed.device, dtype=audio_embed.dtype)
|
||||||
|
audio_embed = torch.cat([audio_embed, zero_audio_pad], dim=1)
|
||||||
audio = self.audio_proj(audio_embed).permute(0, 3, 1, 2).flatten(2).transpose(1, 2)
|
audio = self.audio_proj(audio_embed).permute(0, 3, 1, 2).flatten(2).transpose(1, 2)
|
||||||
else:
|
else:
|
||||||
audio = None
|
audio = None
|
||||||
|
|||||||
@ -348,7 +348,7 @@ try:
|
|||||||
# if any((a in arch) for a in ["gfx1201"]):
|
# if any((a in arch) for a in ["gfx1201"]):
|
||||||
# ENABLE_PYTORCH_ATTENTION = True
|
# ENABLE_PYTORCH_ATTENTION = True
|
||||||
if torch_version_numeric >= (2, 7) and rocm_version >= (6, 4):
|
if torch_version_numeric >= (2, 7) and rocm_version >= (6, 4):
|
||||||
if any((a in arch) for a in ["gfx1201", "gfx942", "gfx950"]): # TODO: more arches
|
if any((a in arch) for a in ["gfx1200", "gfx1201", "gfx942", "gfx950"]): # TODO: more arches
|
||||||
SUPPORT_FP8_OPS = True
|
SUPPORT_FP8_OPS = True
|
||||||
|
|
||||||
except:
|
except:
|
||||||
|
|||||||
@ -233,6 +233,7 @@ class Sharpen:
|
|||||||
|
|
||||||
kernel_size = sharpen_radius * 2 + 1
|
kernel_size = sharpen_radius * 2 + 1
|
||||||
kernel = gaussian_kernel(kernel_size, sigma, device=image.device) * -(alpha*10)
|
kernel = gaussian_kernel(kernel_size, sigma, device=image.device) * -(alpha*10)
|
||||||
|
kernel = kernel.to(dtype=image.dtype)
|
||||||
center = kernel_size // 2
|
center = kernel_size // 2
|
||||||
kernel[center, center] = kernel[center, center] - kernel.sum() + 1.0
|
kernel[center, center] = kernel[center, center] - kernel.sum() + 1.0
|
||||||
kernel = kernel.repeat(channels, 1, 1).unsqueeze(1)
|
kernel = kernel.repeat(channels, 1, 1).unsqueeze(1)
|
||||||
|
|||||||
@ -1095,10 +1095,6 @@ class WanHuMoImageToVideo(io.ComfyNode):
|
|||||||
audio_emb = torch.stack([feat0, feat1, feat2, feat3, feat4], dim=2)[0] # [T, 5, 1280]
|
audio_emb = torch.stack([feat0, feat1, feat2, feat3, feat4], dim=2)[0] # [T, 5, 1280]
|
||||||
audio_emb, _ = get_audio_emb_window(audio_emb, length, frame0_idx=0)
|
audio_emb, _ = get_audio_emb_window(audio_emb, length, frame0_idx=0)
|
||||||
|
|
||||||
# pad for ref latent
|
|
||||||
zero_audio_pad = torch.zeros(ref_latent.shape[2], *audio_emb.shape[1:], device=audio_emb.device, dtype=audio_emb.dtype)
|
|
||||||
audio_emb = torch.cat([audio_emb, zero_audio_pad], dim=0)
|
|
||||||
|
|
||||||
audio_emb = audio_emb.unsqueeze(0)
|
audio_emb = audio_emb.unsqueeze(0)
|
||||||
audio_emb_neg = torch.zeros_like(audio_emb)
|
audio_emb_neg = torch.zeros_like(audio_emb)
|
||||||
positive = node_helpers.conditioning_set_values(positive, {"audio_embed": audio_emb})
|
positive = node_helpers.conditioning_set_values(positive, {"audio_embed": audio_emb})
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user