fix audio 维度bug

This commit is contained in:
wangbo 2025-05-15 20:42:29 +08:00
parent cbe1fc951d
commit f93b880d3f

View File

@ -20,13 +20,11 @@ class AudioLoadPath:
duration = None
if path.startswith(('http://', 'https://')):
# 对于网络路径,直接从内存加载
try:
response = requests.get(path)
response.raise_for_status()
audio_data = io.BytesIO(response.content)
# 使用 librosa 直接从内存中读取音频数据
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore")
@ -35,11 +33,14 @@ class AudioLoadPath:
except Exception as e:
raise Exception(f"加载网络音频失败: {str(e)}")
else:
# 本地文件使用原有的 librosa 方式加载
audio, _ = core.load(path, sr=sample_rate, offset=offset, duration=duration)
# 转换为 torch tensor 并调整维度
audio = torch.from_numpy(audio)[None,:,None]
# 修改维度处理方式
audio = torch.from_numpy(audio).float()
# 确保音频是二维张量 [channels, samples]
if audio.dim() == 1:
audio = audio.unsqueeze(0) # 添加channel维度
return (audio,)
NODE_CLASS_MAPPINGS = {