import requests import io import librosa # Changed from librosa.core import torch import numpy # For type hinting, though librosa.load returns numpy array import warnings class AudioLoadPath: @classmethod def INPUT_TYPES(cls): # Changed s to cls for convention return { "required": { "path": ("STRING", {"default": "X://insert/path/here.mp4"}), "sample_rate": ("INT", {"default": 22050, "min": 6000, "max": 192000, "step": 1}), "offset": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1e6, "step": 0.001}), "duration": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1e6, "step": 0.001}) } } RETURN_TYPES = ("AUDIO",) CATEGORY = "EasyAI" # Or your preferred category FUNCTION = "load" def load(self, path: str, sample_rate: int, offset: float, duration: float | None): if duration == 0.0: duration = None audio_data_source = None try: if path.startswith(('http://', 'https://')): # For network paths, download and load from memory response = requests.get(path, timeout=10) # Added timeout response.raise_for_status() # Raises an exception for bad status codes audio_data_source = io.BytesIO(response.content) else: # For local paths (absolute or relative) audio_data_source = path # Use librosa to load audio. # mono=False ensures that the output numpy array is always 2D (channels, samples). # For mono audio, this will be (1, samples). # librosa.load will resample to the target 'sample_rate' if it's provided. with warnings.catch_warnings(): warnings.simplefilter("ignore") # Suppress librosa warnings if any audio_np, loaded_sr = librosa.load( audio_data_source, sr=sample_rate, offset=offset, duration=duration, mono=False # Ensures audio_np is 2D: (channels, samples) ) # Convert numpy array to PyTorch tensor audio_tensor = torch.from_numpy(audio_np) # Shape: (channels, samples) # Add a batch dimension to conform to (batch_size, channels, samples) # Here, batch_size is 1 as we are loading a single audio file. audio_tensor = audio_tensor.unsqueeze(0) # Shape: (1, channels, samples) # Prepare the output dictionary for the "AUDIO" type output_audio_dict = { "waveform": audio_tensor, "sample_rate": loaded_sr # Use the actual loaded sample rate (should match input 'sample_rate') } # Return as a tuple, as ComfyUI expects return (output_audio_dict,) except requests.exceptions.RequestException as e: # Handle network-specific errors raise Exception(f"Failed to load audio from URL: {str(e)}") except FileNotFoundError as e: # Handle local file not found errors raise Exception(f"Audio file not found: {path} - {str(e)}") except Exception as e: # Handle other potential errors (e.g., librosa failing to decode, invalid path) raise Exception(f"Failed to load audio: {str(e)}") # Node mappings for ComfyUI NODE_CLASS_MAPPINGS = { "AudioLoadPath": AudioLoadPath } NODE_DISPLAY_NAME_MAPPINGS = { "AudioLoadPath": "Load Audio (Path/URL)" }