diff --git a/README.md b/README.md index 0e021a687..d004364ee 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,7 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith - Image Editing Models - [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/) - [Flux Kontext](https://comfyanonymous.github.io/ComfyUI_examples/flux/#flux-kontext-image-editing-model) + - [HiDream E1.1](https://comfyanonymous.github.io/ComfyUI_examples/hidream/#hidream-e11) - Video Models - [Stable Video Diffusion](https://comfyanonymous.github.io/ComfyUI_examples/video/) - [Mochi](https://comfyanonymous.github.io/ComfyUI_examples/mochi/) diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py index 8cd647846..a90b31779 100644 --- a/comfy_extras/nodes_audio.py +++ b/comfy_extras/nodes_audio.py @@ -278,6 +278,42 @@ class PreviewAudio(SaveAudio): "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"}, } +def f32_pcm(wav: torch.Tensor) -> torch.Tensor: + """Convert audio to float 32 bits PCM format.""" + if wav.dtype.is_floating_point: + return wav + elif wav.dtype == torch.int16: + return wav.float() / (2 ** 15) + elif wav.dtype == torch.int32: + return wav.float() / (2 ** 31) + raise ValueError(f"Unsupported wav dtype: {wav.dtype}") + +def load(filepath: str) -> tuple[torch.Tensor, int]: + with av.open(filepath) as af: + if not af.streams.audio: + raise ValueError("No audio stream found in the file.") + + stream = af.streams.audio[0] + sr = stream.codec_context.sample_rate + n_channels = stream.channels + + frames = [] + length = 0 + for frame in af.decode(streams=stream.index): + buf = torch.from_numpy(frame.to_ndarray()) + if buf.shape[0] != n_channels: + buf = buf.view(-1, n_channels).t() + + frames.append(buf) + length += buf.shape[1] + + if not frames: + raise ValueError("No audio frames decoded.") + + wav = torch.cat(frames, dim=1) + wav = f32_pcm(wav) + return wav, sr + class LoadAudio: @classmethod def INPUT_TYPES(s): @@ -292,7 +328,7 @@ class LoadAudio: def load(self, audio): audio_path = folder_paths.get_annotated_filepath(audio) - waveform, sample_rate = torchaudio.load(audio_path) + waveform, sample_rate = load(audio_path) audio = {"waveform": waveform.unsqueeze(0), "sample_rate": sample_rate} return (audio, ) diff --git a/comfyui_version.py b/comfyui_version.py index 7981fbaca..180ecaf8a 100644 --- a/comfyui_version.py +++ b/comfyui_version.py @@ -1,3 +1,3 @@ # This file is automatically generated by the build process when version is # updated in pyproject.toml. -__version__ = "0.3.44" +__version__ = "0.3.45" diff --git a/pyproject.toml b/pyproject.toml index 96ead2157..b1d6d9df6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ComfyUI" -version = "0.3.44" +version = "0.3.45" readme = "README.md" license = { file = "LICENSE" } requires-python = ">=3.9"