mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-12-20 03:23:00 +08:00
Merge branch 'master' into dr-support-pip-cm
This commit is contained in:
commit
bc8418f55a
@ -1355,7 +1355,7 @@ class WanT2VCrossAttentionGather(WanSelfAttention):
|
|||||||
|
|
||||||
x = optimized_attention(q, k, v, heads=self.num_heads, skip_reshape=True, skip_output_reshape=True, transformer_options=transformer_options)
|
x = optimized_attention(q, k, v, heads=self.num_heads, skip_reshape=True, skip_output_reshape=True, transformer_options=transformer_options)
|
||||||
|
|
||||||
x = x.transpose(1, 2).view(b, -1, n, d).flatten(2)
|
x = x.transpose(1, 2).reshape(b, -1, n * d)
|
||||||
x = self.o(x)
|
x = self.o(x)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|||||||
@ -645,7 +645,9 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
|
|||||||
if loaded_model.model.is_clone(current_loaded_models[i].model):
|
if loaded_model.model.is_clone(current_loaded_models[i].model):
|
||||||
to_unload = [i] + to_unload
|
to_unload = [i] + to_unload
|
||||||
for i in to_unload:
|
for i in to_unload:
|
||||||
current_loaded_models.pop(i).model.detach(unpatch_all=False)
|
model_to_unload = current_loaded_models.pop(i)
|
||||||
|
model_to_unload.model.detach(unpatch_all=False)
|
||||||
|
model_to_unload.model_finalizer.detach()
|
||||||
|
|
||||||
total_memory_required = {}
|
total_memory_required = {}
|
||||||
for loaded_model in models_to_load:
|
for loaded_model in models_to_load:
|
||||||
|
|||||||
@ -11,6 +11,7 @@ import json
|
|||||||
import random
|
import random
|
||||||
import hashlib
|
import hashlib
|
||||||
import node_helpers
|
import node_helpers
|
||||||
|
import logging
|
||||||
from comfy.cli_args import args
|
from comfy.cli_args import args
|
||||||
from comfy.comfy_types import FileLocator
|
from comfy.comfy_types import FileLocator
|
||||||
|
|
||||||
@ -364,6 +365,216 @@ class RecordAudio:
|
|||||||
return (audio, )
|
return (audio, )
|
||||||
|
|
||||||
|
|
||||||
|
class TrimAudioDuration:
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(cls):
|
||||||
|
return {
|
||||||
|
"required": {
|
||||||
|
"audio": ("AUDIO",),
|
||||||
|
"start_index": ("FLOAT", {"default": 0.0, "min": -0xffffffffffffffff, "max": 0xffffffffffffffff, "step": 0.01, "tooltip": "Start time in seconds, can be negative to count from the end (supports sub-seconds)."}),
|
||||||
|
"duration": ("FLOAT", {"default": 60.0, "min": 0.0, "step": 0.01, "tooltip": "Duration in seconds"}),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
FUNCTION = "trim"
|
||||||
|
RETURN_TYPES = ("AUDIO",)
|
||||||
|
CATEGORY = "audio"
|
||||||
|
DESCRIPTION = "Trim audio tensor into chosen time range."
|
||||||
|
|
||||||
|
def trim(self, audio, start_index, duration):
|
||||||
|
waveform = audio["waveform"]
|
||||||
|
sample_rate = audio["sample_rate"]
|
||||||
|
audio_length = waveform.shape[-1]
|
||||||
|
|
||||||
|
if start_index < 0:
|
||||||
|
start_frame = audio_length + int(round(start_index * sample_rate))
|
||||||
|
else:
|
||||||
|
start_frame = int(round(start_index * sample_rate))
|
||||||
|
start_frame = max(0, min(start_frame, audio_length - 1))
|
||||||
|
|
||||||
|
end_frame = start_frame + int(round(duration * sample_rate))
|
||||||
|
end_frame = max(0, min(end_frame, audio_length))
|
||||||
|
|
||||||
|
if start_frame >= end_frame:
|
||||||
|
raise ValueError("AudioTrim: Start time must be less than end time and be within the audio length.")
|
||||||
|
|
||||||
|
return ({"waveform": waveform[..., start_frame:end_frame], "sample_rate": sample_rate},)
|
||||||
|
|
||||||
|
|
||||||
|
class SplitAudioChannels:
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {"required": {
|
||||||
|
"audio": ("AUDIO",),
|
||||||
|
}}
|
||||||
|
|
||||||
|
RETURN_TYPES = ("AUDIO", "AUDIO")
|
||||||
|
RETURN_NAMES = ("left", "right")
|
||||||
|
FUNCTION = "separate"
|
||||||
|
CATEGORY = "audio"
|
||||||
|
DESCRIPTION = "Separates the audio into left and right channels."
|
||||||
|
|
||||||
|
def separate(self, audio):
|
||||||
|
waveform = audio["waveform"]
|
||||||
|
sample_rate = audio["sample_rate"]
|
||||||
|
|
||||||
|
if waveform.shape[1] != 2:
|
||||||
|
raise ValueError("AudioSplit: Input audio has only one channel.")
|
||||||
|
|
||||||
|
left_channel = waveform[..., 0:1, :]
|
||||||
|
right_channel = waveform[..., 1:2, :]
|
||||||
|
|
||||||
|
return ({"waveform": left_channel, "sample_rate": sample_rate}, {"waveform": right_channel, "sample_rate": sample_rate})
|
||||||
|
|
||||||
|
|
||||||
|
def match_audio_sample_rates(waveform_1, sample_rate_1, waveform_2, sample_rate_2):
|
||||||
|
if sample_rate_1 != sample_rate_2:
|
||||||
|
if sample_rate_1 > sample_rate_2:
|
||||||
|
waveform_2 = torchaudio.functional.resample(waveform_2, sample_rate_2, sample_rate_1)
|
||||||
|
output_sample_rate = sample_rate_1
|
||||||
|
logging.info(f"Resampling audio2 from {sample_rate_2}Hz to {sample_rate_1}Hz for merging.")
|
||||||
|
else:
|
||||||
|
waveform_1 = torchaudio.functional.resample(waveform_1, sample_rate_1, sample_rate_2)
|
||||||
|
output_sample_rate = sample_rate_2
|
||||||
|
logging.info(f"Resampling audio1 from {sample_rate_1}Hz to {sample_rate_2}Hz for merging.")
|
||||||
|
else:
|
||||||
|
output_sample_rate = sample_rate_1
|
||||||
|
return waveform_1, waveform_2, output_sample_rate
|
||||||
|
|
||||||
|
|
||||||
|
class AudioConcat:
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {"required": {
|
||||||
|
"audio1": ("AUDIO",),
|
||||||
|
"audio2": ("AUDIO",),
|
||||||
|
"direction": (['after', 'before'], {"default": 'after', "tooltip": "Whether to append audio2 after or before audio1."}),
|
||||||
|
}}
|
||||||
|
|
||||||
|
RETURN_TYPES = ("AUDIO",)
|
||||||
|
FUNCTION = "concat"
|
||||||
|
CATEGORY = "audio"
|
||||||
|
DESCRIPTION = "Concatenates the audio1 to audio2 in the specified direction."
|
||||||
|
|
||||||
|
def concat(self, audio1, audio2, direction):
|
||||||
|
waveform_1 = audio1["waveform"]
|
||||||
|
waveform_2 = audio2["waveform"]
|
||||||
|
sample_rate_1 = audio1["sample_rate"]
|
||||||
|
sample_rate_2 = audio2["sample_rate"]
|
||||||
|
|
||||||
|
if waveform_1.shape[1] == 1:
|
||||||
|
waveform_1 = waveform_1.repeat(1, 2, 1)
|
||||||
|
logging.info("AudioConcat: Converted mono audio1 to stereo by duplicating the channel.")
|
||||||
|
if waveform_2.shape[1] == 1:
|
||||||
|
waveform_2 = waveform_2.repeat(1, 2, 1)
|
||||||
|
logging.info("AudioConcat: Converted mono audio2 to stereo by duplicating the channel.")
|
||||||
|
|
||||||
|
waveform_1, waveform_2, output_sample_rate = match_audio_sample_rates(waveform_1, sample_rate_1, waveform_2, sample_rate_2)
|
||||||
|
|
||||||
|
if direction == 'after':
|
||||||
|
concatenated_audio = torch.cat((waveform_1, waveform_2), dim=2)
|
||||||
|
elif direction == 'before':
|
||||||
|
concatenated_audio = torch.cat((waveform_2, waveform_1), dim=2)
|
||||||
|
|
||||||
|
return ({"waveform": concatenated_audio, "sample_rate": output_sample_rate},)
|
||||||
|
|
||||||
|
|
||||||
|
class AudioMerge:
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(cls):
|
||||||
|
return {
|
||||||
|
"required": {
|
||||||
|
"audio1": ("AUDIO",),
|
||||||
|
"audio2": ("AUDIO",),
|
||||||
|
"merge_method": (["add", "mean", "subtract", "multiply"], {"tooltip": "The method used to combine the audio waveforms."}),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
FUNCTION = "merge"
|
||||||
|
RETURN_TYPES = ("AUDIO",)
|
||||||
|
CATEGORY = "audio"
|
||||||
|
DESCRIPTION = "Combine two audio tracks by overlaying their waveforms."
|
||||||
|
|
||||||
|
def merge(self, audio1, audio2, merge_method):
|
||||||
|
waveform_1 = audio1["waveform"]
|
||||||
|
waveform_2 = audio2["waveform"]
|
||||||
|
sample_rate_1 = audio1["sample_rate"]
|
||||||
|
sample_rate_2 = audio2["sample_rate"]
|
||||||
|
|
||||||
|
waveform_1, waveform_2, output_sample_rate = match_audio_sample_rates(waveform_1, sample_rate_1, waveform_2, sample_rate_2)
|
||||||
|
|
||||||
|
length_1 = waveform_1.shape[-1]
|
||||||
|
length_2 = waveform_2.shape[-1]
|
||||||
|
|
||||||
|
if length_2 > length_1:
|
||||||
|
logging.info(f"AudioMerge: Trimming audio2 from {length_2} to {length_1} samples to match audio1 length.")
|
||||||
|
waveform_2 = waveform_2[..., :length_1]
|
||||||
|
elif length_2 < length_1:
|
||||||
|
logging.info(f"AudioMerge: Padding audio2 from {length_2} to {length_1} samples to match audio1 length.")
|
||||||
|
pad_shape = list(waveform_2.shape)
|
||||||
|
pad_shape[-1] = length_1 - length_2
|
||||||
|
pad_tensor = torch.zeros(pad_shape, dtype=waveform_2.dtype, device=waveform_2.device)
|
||||||
|
waveform_2 = torch.cat((waveform_2, pad_tensor), dim=-1)
|
||||||
|
|
||||||
|
if merge_method == "add":
|
||||||
|
waveform = waveform_1 + waveform_2
|
||||||
|
elif merge_method == "subtract":
|
||||||
|
waveform = waveform_1 - waveform_2
|
||||||
|
elif merge_method == "multiply":
|
||||||
|
waveform = waveform_1 * waveform_2
|
||||||
|
elif merge_method == "mean":
|
||||||
|
waveform = (waveform_1 + waveform_2) / 2
|
||||||
|
|
||||||
|
max_val = waveform.abs().max()
|
||||||
|
if max_val > 1.0:
|
||||||
|
waveform = waveform / max_val
|
||||||
|
|
||||||
|
return ({"waveform": waveform, "sample_rate": output_sample_rate},)
|
||||||
|
|
||||||
|
|
||||||
|
class AudioAdjustVolume:
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {"required": {
|
||||||
|
"audio": ("AUDIO",),
|
||||||
|
"volume": ("INT", {"default": 1.0, "min": -100, "max": 100, "tooltip": "Volume adjustment in decibels (dB). 0 = no change, +6 = double, -6 = half, etc"}),
|
||||||
|
}}
|
||||||
|
|
||||||
|
RETURN_TYPES = ("AUDIO",)
|
||||||
|
FUNCTION = "adjust_volume"
|
||||||
|
CATEGORY = "audio"
|
||||||
|
|
||||||
|
def adjust_volume(self, audio, volume):
|
||||||
|
if volume == 0:
|
||||||
|
return (audio,)
|
||||||
|
waveform = audio["waveform"]
|
||||||
|
sample_rate = audio["sample_rate"]
|
||||||
|
|
||||||
|
gain = 10 ** (volume / 20)
|
||||||
|
waveform = waveform * gain
|
||||||
|
|
||||||
|
return ({"waveform": waveform, "sample_rate": sample_rate},)
|
||||||
|
|
||||||
|
|
||||||
|
class EmptyAudio:
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {"required": {
|
||||||
|
"duration": ("FLOAT", {"default": 60.0, "min": 0.0, "max": 0xffffffffffffffff, "step": 0.01, "tooltip": "Duration of the empty audio clip in seconds"}),
|
||||||
|
"sample_rate": ("INT", {"default": 44100, "tooltip": "Sample rate of the empty audio clip."}),
|
||||||
|
"channels": ("INT", {"default": 2, "min": 1, "max": 2, "tooltip": "Number of audio channels (1 for mono, 2 for stereo)."}),
|
||||||
|
}}
|
||||||
|
|
||||||
|
RETURN_TYPES = ("AUDIO",)
|
||||||
|
FUNCTION = "create_empty_audio"
|
||||||
|
CATEGORY = "audio"
|
||||||
|
|
||||||
|
def create_empty_audio(self, duration, sample_rate, channels):
|
||||||
|
num_samples = int(round(duration * sample_rate))
|
||||||
|
waveform = torch.zeros((1, channels, num_samples), dtype=torch.float32)
|
||||||
|
return ({"waveform": waveform, "sample_rate": sample_rate},)
|
||||||
|
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
NODE_CLASS_MAPPINGS = {
|
||||||
"EmptyLatentAudio": EmptyLatentAudio,
|
"EmptyLatentAudio": EmptyLatentAudio,
|
||||||
"VAEEncodeAudio": VAEEncodeAudio,
|
"VAEEncodeAudio": VAEEncodeAudio,
|
||||||
@ -375,6 +586,12 @@ NODE_CLASS_MAPPINGS = {
|
|||||||
"PreviewAudio": PreviewAudio,
|
"PreviewAudio": PreviewAudio,
|
||||||
"ConditioningStableAudio": ConditioningStableAudio,
|
"ConditioningStableAudio": ConditioningStableAudio,
|
||||||
"RecordAudio": RecordAudio,
|
"RecordAudio": RecordAudio,
|
||||||
|
"TrimAudioDuration": TrimAudioDuration,
|
||||||
|
"SplitAudioChannels": SplitAudioChannels,
|
||||||
|
"AudioConcat": AudioConcat,
|
||||||
|
"AudioMerge": AudioMerge,
|
||||||
|
"AudioAdjustVolume": AudioAdjustVolume,
|
||||||
|
"EmptyAudio": EmptyAudio,
|
||||||
}
|
}
|
||||||
|
|
||||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||||
@ -387,4 +604,10 @@ NODE_DISPLAY_NAME_MAPPINGS = {
|
|||||||
"SaveAudioMP3": "Save Audio (MP3)",
|
"SaveAudioMP3": "Save Audio (MP3)",
|
||||||
"SaveAudioOpus": "Save Audio (Opus)",
|
"SaveAudioOpus": "Save Audio (Opus)",
|
||||||
"RecordAudio": "Record Audio",
|
"RecordAudio": "Record Audio",
|
||||||
|
"TrimAudioDuration": "Trim Audio Duration",
|
||||||
|
"SplitAudioChannels": "Split Audio Channels",
|
||||||
|
"AudioConcat": "Audio Concat",
|
||||||
|
"AudioMerge": "Audio Merge",
|
||||||
|
"AudioAdjustVolume": "Audio Adjust Volume",
|
||||||
|
"EmptyAudio": "Empty Audio",
|
||||||
}
|
}
|
||||||
|
|||||||
@ -12,35 +12,38 @@ from nodes import MAX_RESOLUTION
|
|||||||
def composite(destination, source, x, y, mask = None, multiplier = 8, resize_source = False):
|
def composite(destination, source, x, y, mask = None, multiplier = 8, resize_source = False):
|
||||||
source = source.to(destination.device)
|
source = source.to(destination.device)
|
||||||
if resize_source:
|
if resize_source:
|
||||||
source = torch.nn.functional.interpolate(source, size=(destination.shape[2], destination.shape[3]), mode="bilinear")
|
source = torch.nn.functional.interpolate(source, size=(destination.shape[-2], destination.shape[-1]), mode="bilinear")
|
||||||
|
|
||||||
source = comfy.utils.repeat_to_batch_size(source, destination.shape[0])
|
source = comfy.utils.repeat_to_batch_size(source, destination.shape[0])
|
||||||
|
|
||||||
x = max(-source.shape[3] * multiplier, min(x, destination.shape[3] * multiplier))
|
x = max(-source.shape[-1] * multiplier, min(x, destination.shape[-1] * multiplier))
|
||||||
y = max(-source.shape[2] * multiplier, min(y, destination.shape[2] * multiplier))
|
y = max(-source.shape[-2] * multiplier, min(y, destination.shape[-2] * multiplier))
|
||||||
|
|
||||||
left, top = (x // multiplier, y // multiplier)
|
left, top = (x // multiplier, y // multiplier)
|
||||||
right, bottom = (left + source.shape[3], top + source.shape[2],)
|
right, bottom = (left + source.shape[-1], top + source.shape[-2],)
|
||||||
|
|
||||||
if mask is None:
|
if mask is None:
|
||||||
mask = torch.ones_like(source)
|
mask = torch.ones_like(source)
|
||||||
else:
|
else:
|
||||||
mask = mask.to(destination.device, copy=True)
|
mask = mask.to(destination.device, copy=True)
|
||||||
mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(source.shape[2], source.shape[3]), mode="bilinear")
|
mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(source.shape[-2], source.shape[-1]), mode="bilinear")
|
||||||
mask = comfy.utils.repeat_to_batch_size(mask, source.shape[0])
|
mask = comfy.utils.repeat_to_batch_size(mask, source.shape[0])
|
||||||
|
|
||||||
# calculate the bounds of the source that will be overlapping the destination
|
# calculate the bounds of the source that will be overlapping the destination
|
||||||
# this prevents the source trying to overwrite latent pixels that are out of bounds
|
# this prevents the source trying to overwrite latent pixels that are out of bounds
|
||||||
# of the destination
|
# of the destination
|
||||||
visible_width, visible_height = (destination.shape[3] - left + min(0, x), destination.shape[2] - top + min(0, y),)
|
visible_width, visible_height = (destination.shape[-1] - left + min(0, x), destination.shape[-2] - top + min(0, y),)
|
||||||
|
|
||||||
mask = mask[:, :, :visible_height, :visible_width]
|
mask = mask[:, :, :visible_height, :visible_width]
|
||||||
|
if mask.ndim < source.ndim:
|
||||||
|
mask = mask.unsqueeze(1)
|
||||||
|
|
||||||
inverse_mask = torch.ones_like(mask) - mask
|
inverse_mask = torch.ones_like(mask) - mask
|
||||||
|
|
||||||
source_portion = mask * source[:, :, :visible_height, :visible_width]
|
source_portion = mask * source[..., :visible_height, :visible_width]
|
||||||
destination_portion = inverse_mask * destination[:, :, top:bottom, left:right]
|
destination_portion = inverse_mask * destination[..., top:bottom, left:right]
|
||||||
|
|
||||||
destination[:, :, top:bottom, left:right] = source_portion + destination_portion
|
destination[..., top:bottom, left:right] = source_portion + destination_portion
|
||||||
return destination
|
return destination
|
||||||
|
|
||||||
class LatentCompositeMasked:
|
class LatentCompositeMasked:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user