From 306fcbaa0e91102a6611f1e7612195875ac34a06 Mon Sep 17 00:00:00 2001 From: doctorpangloss <@hiddenswitch.com> Date: Tue, 26 Aug 2025 14:12:40 -0700 Subject: [PATCH] audio_encoders now a package correctly, make imports relative --- comfy/audio_encoders/__init__.py | 0 comfy/audio_encoders/audio_encoders.py | 23 ++++++++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) create mode 100644 comfy/audio_encoders/__init__.py diff --git a/comfy/audio_encoders/__init__.py b/comfy/audio_encoders/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/comfy/audio_encoders/audio_encoders.py b/comfy/audio_encoders/audio_encoders.py index 538c21bd5..df355acc8 100644 --- a/comfy/audio_encoders/audio_encoders.py +++ b/comfy/audio_encoders/audio_encoders.py @@ -1,17 +1,16 @@ from .wav2vec2 import Wav2Vec2Model -import comfy.model_management -import comfy.ops -import comfy.utils -import logging -import torchaudio +from ..model_management import text_encoder_offload_device, text_encoder_device, load_model_gpu, text_encoder_dtype +from ..ops import manual_cast +from ..utils import state_dict_prefix_replace +import logging class AudioEncoderModel(): def __init__(self, config): - self.load_device = comfy.model_management.text_encoder_device() - offload_device = comfy.model_management.text_encoder_offload_device() - self.dtype = comfy.model_management.text_encoder_dtype(self.load_device) - self.model = Wav2Vec2Model(dtype=self.dtype, device=offload_device, operations=comfy.ops.manual_cast) + self.load_device = text_encoder_device() + offload_device = text_encoder_offload_device() + self.dtype = text_encoder_dtype(self.load_device) + self.model = Wav2Vec2Model(dtype=self.dtype, device=offload_device, operations=manual_cast) self.model.eval() self.patcher = comfy.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device) self.model_sample_rate = 16000 @@ -23,7 +22,9 @@ class AudioEncoderModel(): return self.model.state_dict() def encode_audio(self, audio, sample_rate): - comfy.model_management.load_model_gpu(self.patcher) + # this one we will allow to just bubble up the exception + import torchaudio # pylint: disable=import-error + load_model_gpu(self.patcher) audio = torchaudio.functional.resample(audio, sample_rate, self.model_sample_rate) out, all_layers = self.model(audio.to(self.load_device)) outputs = {} @@ -34,7 +35,7 @@ class AudioEncoderModel(): def load_audio_encoder_from_sd(sd, prefix=""): audio_encoder = AudioEncoderModel(None) - sd = comfy.utils.state_dict_prefix_replace(sd, {"wav2vec2.": ""}) + sd = state_dict_prefix_replace(sd, {"wav2vec2.": ""}) m, u = audio_encoder.load_sd(sd) if len(m) > 0: logging.warning("missing audio encoder: {}".format(m))