ComfyUI-EasyAI/nodes.py

import requests
import io
import librosa # Changed from librosa.core
import torch
import numpy # For type hinting, though librosa.load returns numpy array
import warnings

class AudioLoadPath:
    @classmethod
    def INPUT_TYPES(cls): # Changed s to cls for convention
        return {
            "required": {
                "path": ("STRING", {"default": "X://insert/path/here.mp4"}),
                "sample_rate": ("INT", {"default": 22050, "min": 6000, "max": 192000, "step": 1}),
                "offset": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1e6, "step": 0.001}),
                "duration": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1e6, "step": 0.001})
            }
        }

    RETURN_TYPES = ("AUDIO",)
    CATEGORY = "EasyAI" # Or your preferred category
    FUNCTION = "load"

    def load(self, path: str, sample_rate: int, offset: float, duration: float | None):
        if duration == 0.0:
            duration = None

        audio_data_source = None
        try:
            if path.startswith(('http://', 'https://')):
                # For network paths, download and load from memory
                response = requests.get(path, timeout=10) # Added timeout
                response.raise_for_status() # Raises an exception for bad status codes
                audio_data_source = io.BytesIO(response.content)
            else:
                # For local paths (absolute or relative)
                audio_data_source = path

            # Use librosa to load audio.
            # mono=False ensures that the output numpy array is always 2D (channels, samples).
            # For mono audio, this will be (1, samples).
            # librosa.load will resample to the target 'sample_rate' if it's provided.
            with warnings.catch_warnings():
                warnings.simplefilter("ignore") # Suppress librosa warnings if any
                audio_np, loaded_sr = librosa.load(
                    audio_data_source,
                    sr=sample_rate,
                    offset=offset,
                    duration=duration,
                    mono=False # Ensures audio_np is 2D: (channels, samples)
                )

            # Convert numpy array to PyTorch tensor
            audio_tensor = torch.from_numpy(audio_np) # Shape: (channels, samples)

            # Add a batch dimension to conform to (batch_size, channels, samples)
            # Here, batch_size is 1 as we are loading a single audio file.
            audio_tensor = audio_tensor.unsqueeze(0) # Shape: (1, channels, samples)

            # Prepare the output dictionary for the "AUDIO" type
            output_audio_dict = {
                "waveform": audio_tensor,
                "sample_rate": loaded_sr # Use the actual loaded sample rate (should match input 'sample_rate')
            }

            # Return as a tuple, as ComfyUI expects
            return (output_audio_dict,)

        except requests.exceptions.RequestException as e:
            # Handle network-specific errors
            raise Exception(f"Failed to load audio from URL: {str(e)}")
        except FileNotFoundError as e:
            # Handle local file not found errors
            raise Exception(f"Audio file not found: {path} - {str(e)}")
        except Exception as e:
            # Handle other potential errors (e.g., librosa failing to decode, invalid path)
            raise Exception(f"Failed to load audio: {str(e)}")

# Node mappings for ComfyUI
NODE_CLASS_MAPPINGS = {
    "AudioLoadPath": AudioLoadPath
}

NODE_DISPLAY_NAME_MAPPINGS = {
    "AudioLoadPath": "Load Audio (Path/URL)"
}