diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py index 136ad615..49af1eae 100644 --- a/comfy_extras/nodes_audio.py +++ b/comfy_extras/nodes_audio.py @@ -1,5 +1,6 @@ from __future__ import annotations +import av import torchaudio import torch import comfy.model_management @@ -7,7 +8,6 @@ import folder_paths import os import io import json -import struct import random import hashlib import node_helpers @@ -90,60 +90,118 @@ class VAEDecodeAudio: return ({"waveform": audio, "sample_rate": 44100}, ) -def create_vorbis_comment_block(comment_dict, last_block): - vendor_string = b'ComfyUI' - vendor_length = len(vendor_string) +def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=None, extra_pnginfo=None, quality="128k"): - comments = [] - for key, value in comment_dict.items(): - comment = f"{key}={value}".encode('utf-8') - comments.append(struct.pack('I', len(comment_data))[1:] + comment_data + # Opus supported sample rates + OPUS_RATES = [8000, 12000, 16000, 24000, 48000] - return comment_block + for (batch_number, waveform) in enumerate(audio["waveform"].cpu()): + filename_with_batch_num = filename.replace("%batch_num%", str(batch_number)) + file = f"{filename_with_batch_num}_{counter:05}_.{format}" + output_path = os.path.join(full_output_folder, file) -def insert_or_replace_vorbis_comment(flac_io, comment_dict): - if len(comment_dict) == 0: - return flac_io + # Use original sample rate initially + sample_rate = audio["sample_rate"] - flac_io.seek(4) + # Handle Opus sample rate requirements + if format == "opus": + if sample_rate > 48000: + sample_rate = 48000 + elif sample_rate not in OPUS_RATES: + # Find the next highest supported rate + for rate in sorted(OPUS_RATES): + if rate > sample_rate: + sample_rate = rate + break + if sample_rate not in OPUS_RATES: # Fallback if still not supported + sample_rate = 48000 - blocks = [] - last_block = False + # Resample if necessary + if sample_rate != audio["sample_rate"]: + waveform = torchaudio.functional.resample(waveform, audio["sample_rate"], sample_rate) - while not last_block: - header = flac_io.read(4) - last_block = (header[0] & 0x80) != 0 - block_type = header[0] & 0x7F - block_length = struct.unpack('>I', b'\x00' + header[1:])[0] - block_data = flac_io.read(block_length) + # Create in-memory WAV buffer + wav_buffer = io.BytesIO() + torchaudio.save(wav_buffer, waveform, sample_rate, format="WAV") + wav_buffer.seek(0) # Rewind for reading - if block_type == 4 or block_type == 1: - pass - else: - header = bytes([(header[0] & (~0x80))]) + header[1:] - blocks.append(header + block_data) + # Use PyAV to convert and add metadata + input_container = av.open(wav_buffer) - blocks.append(create_vorbis_comment_block(comment_dict, last_block=True)) + # Create output with specified format + output_buffer = io.BytesIO() + output_container = av.open(output_buffer, mode='w', format=format) - new_flac_io = io.BytesIO() - new_flac_io.write(b'fLaC') - for block in blocks: - new_flac_io.write(block) + # Set metadata on the container + for key, value in metadata.items(): + output_container.metadata[key] = value - new_flac_io.write(flac_io.read()) - return new_flac_io + # Set up the output stream with appropriate properties + input_container.streams.audio[0] + if format == "opus": + out_stream = output_container.add_stream("libopus", rate=sample_rate) + if quality == "64k": + out_stream.bit_rate = 64000 + elif quality == "96k": + out_stream.bit_rate = 96000 + elif quality == "128k": + out_stream.bit_rate = 128000 + elif quality == "192k": + out_stream.bit_rate = 192000 + elif quality == "320k": + out_stream.bit_rate = 320000 + elif format == "mp3": + out_stream = output_container.add_stream("libmp3lame", rate=sample_rate) + if quality == "V0": + #TODO i would really love to support V3 and V5 but there doesn't seem to be a way to set the qscale level, the property below is a bool + out_stream.codec_context.qscale = 1 + elif quality == "128k": + out_stream.bit_rate = 128000 + elif quality == "320k": + out_stream.bit_rate = 320000 + else: #format == "flac": + out_stream = output_container.add_stream("flac", rate=sample_rate) + # Copy frames from input to output + for frame in input_container.decode(audio=0): + frame.pts = None # Let PyAV handle timestamps + output_container.mux(out_stream.encode(frame)) + + # Flush encoder + output_container.mux(out_stream.encode(None)) + + # Close containers + output_container.close() + input_container.close() + + # Write the output to file + output_buffer.seek(0) + with open(output_path, 'wb') as f: + f.write(output_buffer.getbuffer()) + + results.append({ + "filename": file, + "subfolder": subfolder, + "type": self.type + }) + counter += 1 + + return { "ui": { "audio": results } } + class SaveAudio: def __init__(self): self.output_dir = folder_paths.get_output_directory() @@ -153,50 +211,70 @@ class SaveAudio: @classmethod def INPUT_TYPES(s): return {"required": { "audio": ("AUDIO", ), - "filename_prefix": ("STRING", {"default": "audio/ComfyUI"})}, + "filename_prefix": ("STRING", {"default": "audio/ComfyUI"}), + }, "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"}, } RETURN_TYPES = () - FUNCTION = "save_audio" + FUNCTION = "save_flac" OUTPUT_NODE = True CATEGORY = "audio" - def save_audio(self, audio, filename_prefix="ComfyUI", prompt=None, extra_pnginfo=None): - filename_prefix += self.prefix_append - full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir) - results: list[FileLocator] = [] + def save_flac(self, audio, filename_prefix="ComfyUI", format="flac", prompt=None, extra_pnginfo=None): + return save_audio(self, audio, filename_prefix, format, prompt, extra_pnginfo) - metadata = {} - if not args.disable_metadata: - if prompt is not None: - metadata["prompt"] = json.dumps(prompt) - if extra_pnginfo is not None: - for x in extra_pnginfo: - metadata[x] = json.dumps(extra_pnginfo[x]) +class SaveAudioMP3: + def __init__(self): + self.output_dir = folder_paths.get_output_directory() + self.type = "output" + self.prefix_append = "" - for (batch_number, waveform) in enumerate(audio["waveform"].cpu()): - filename_with_batch_num = filename.replace("%batch_num%", str(batch_number)) - file = f"{filename_with_batch_num}_{counter:05}_.flac" + @classmethod + def INPUT_TYPES(s): + return {"required": { "audio": ("AUDIO", ), + "filename_prefix": ("STRING", {"default": "audio/ComfyUI"}), + "quality": (["V0", "128k", "320k"], {"default": "V0"}), + }, + "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"}, + } - buff = io.BytesIO() - torchaudio.save(buff, waveform, audio["sample_rate"], format="FLAC") + RETURN_TYPES = () + FUNCTION = "save_mp3" - buff = insert_or_replace_vorbis_comment(buff, metadata) + OUTPUT_NODE = True - with open(os.path.join(full_output_folder, file), 'wb') as f: - f.write(buff.getbuffer()) + CATEGORY = "audio" - results.append({ - "filename": file, - "subfolder": subfolder, - "type": self.type - }) - counter += 1 + def save_mp3(self, audio, filename_prefix="ComfyUI", format="mp3", prompt=None, extra_pnginfo=None, quality="128k"): + return save_audio(self, audio, filename_prefix, format, prompt, extra_pnginfo, quality) - return { "ui": { "audio": results } } +class SaveAudioOpus: + def __init__(self): + self.output_dir = folder_paths.get_output_directory() + self.type = "output" + self.prefix_append = "" + + @classmethod + def INPUT_TYPES(s): + return {"required": { "audio": ("AUDIO", ), + "filename_prefix": ("STRING", {"default": "audio/ComfyUI"}), + "quality": (["64k", "96k", "128k", "192k", "320k"], {"default": "128k"}), + }, + "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"}, + } + + RETURN_TYPES = () + FUNCTION = "save_opus" + + OUTPUT_NODE = True + + CATEGORY = "audio" + + def save_opus(self, audio, filename_prefix="ComfyUI", format="opus", prompt=None, extra_pnginfo=None, quality="V3"): + return save_audio(self, audio, filename_prefix, format, prompt, extra_pnginfo, quality) class PreviewAudio(SaveAudio): def __init__(self): @@ -248,7 +326,20 @@ NODE_CLASS_MAPPINGS = { "VAEEncodeAudio": VAEEncodeAudio, "VAEDecodeAudio": VAEDecodeAudio, "SaveAudio": SaveAudio, + "SaveAudioMP3": SaveAudioMP3, + "SaveAudioOpus": SaveAudioOpus, "LoadAudio": LoadAudio, "PreviewAudio": PreviewAudio, "ConditioningStableAudio": ConditioningStableAudio, } + +NODE_DISPLAY_NAME_MAPPINGS = { + "EmptyLatentAudio": "Empty Latent Audio", + "VAEEncodeAudio": "VAE Encode Audio", + "VAEDecodeAudio": "VAE Decode Audio", + "PreviewAudio": "Preview Audio", + "LoadAudio": "Load Audio", + "SaveAudio": "Save Audio (FLAC)", + "SaveAudioMP3": "Save Audio (MP3)", + "SaveAudioOpus": "Save Audio (Opus)", +}