Innitial commit
This commit is contained in:
commit
166962deaf
3 changed files with 323 additions and 0 deletions
66
README.md
Normal file
66
README.md
Normal file
|
|
@ -0,0 +1,66 @@
|
||||||
|
# ComfyUI Custom Nodes
|
||||||
|
|
||||||
|
A collection of custom nodes for ComfyUI, focusing on audio manipulation and processing.
|
||||||
|
|
||||||
|
## Nodes
|
||||||
|
|
||||||
|
### Audio Repeat (with Crossfade)
|
||||||
|
|
||||||
|
Repeats an audio clip multiple times with optional crossfading between repetitions.
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
- Repeat audio clips any number of times (1-500)
|
||||||
|
- Configurable crossfade duration between repetitions
|
||||||
|
- MP3 and WAV output support
|
||||||
|
- Adjustable MP3 quality settings
|
||||||
|
- Direct file output to specified path
|
||||||
|
|
||||||
|
**Inputs:**
|
||||||
|
- `audio` (AUDIO): Input audio to repeat
|
||||||
|
- `repeat_count` (INT): Number of times to repeat (default: 20, range: 1-500)
|
||||||
|
- `output_audio_path` (STRING): Path where the output file will be saved
|
||||||
|
- `overwrite` (BOOLEAN, optional): Whether to overwrite existing files (default: True)
|
||||||
|
- `mp3_quality` (INT, optional): MP3 encoding quality (default: 0, range: 0-9, lower is better)
|
||||||
|
- `crossfade_seconds` (FLOAT, optional): Duration of crossfade between repetitions (default: 0.15s, range: 0.0-5.0s)
|
||||||
|
|
||||||
|
**Outputs:**
|
||||||
|
- `audio_out` (AUDIO): The repeated audio as a ComfyUI AUDIO object
|
||||||
|
- `output_audio_path` (STRING): Path to the saved output file
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- ComfyUI
|
||||||
|
- FFmpeg (must be available in PATH)
|
||||||
|
- NumPy
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
1. Clone or download this repository to your ComfyUI `custom_nodes` folder:
|
||||||
|
```
|
||||||
|
cd ComfyUI/custom_nodes
|
||||||
|
git clone <repository-url> ComfyUI-CustomNodes
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Restart ComfyUI
|
||||||
|
|
||||||
|
3. The nodes will appear in the "audio" category
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
The Audio Repeat node is useful for:
|
||||||
|
- Creating looping audio backgrounds
|
||||||
|
- Extending short audio clips
|
||||||
|
- Creating seamless audio loops with crossfades
|
||||||
|
- Batch processing audio files
|
||||||
|
|
||||||
|
## Technical Details
|
||||||
|
|
||||||
|
- Supports multiple audio input formats through FFmpeg
|
||||||
|
- Internally normalizes audio to consistent channel/frame format
|
||||||
|
- Uses FFmpeg's `acrossfade` filter for smooth transitions
|
||||||
|
- Handles mono and multichannel audio
|
||||||
|
- Outputs at 44.1kHz sample rate by default
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
This project is provided as-is for use with ComfyUI.
|
||||||
8
__init__.py
Normal file
8
__init__.py
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
from .repeat_node import AudioRepeatFromAudioNode
|
||||||
|
|
||||||
|
NODE_CLASS_MAPPINGS = {
|
||||||
|
"AudioRepeatFromAudioNode": AudioRepeatFromAudioNode
|
||||||
|
}
|
||||||
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||||
|
"AudioRepeatFromAudioNode": "Audio Repeat (with Crossfade)"
|
||||||
|
}
|
||||||
249
repeat_node.py
Normal file
249
repeat_node.py
Normal file
|
|
@ -0,0 +1,249 @@
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
from shutil import which
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_audio(audio):
|
||||||
|
"""
|
||||||
|
Returns (samples_ch_first, sr)
|
||||||
|
samples_ch_first: np.ndarray shape (channels, frames)
|
||||||
|
"""
|
||||||
|
sr = None
|
||||||
|
samples = None
|
||||||
|
|
||||||
|
if isinstance(audio, dict):
|
||||||
|
# sample rate key variants
|
||||||
|
sr = audio.get("sample_rate", None)
|
||||||
|
if sr is None:
|
||||||
|
sr = audio.get("sr", None)
|
||||||
|
if sr is None:
|
||||||
|
sr = audio.get("rate", None)
|
||||||
|
|
||||||
|
# avoid tensor truthiness checks (no `or` chaining)
|
||||||
|
samples = audio.get("samples", None)
|
||||||
|
if samples is None:
|
||||||
|
samples = audio.get("waveform", None)
|
||||||
|
if samples is None:
|
||||||
|
samples = audio.get("audio", None)
|
||||||
|
|
||||||
|
elif isinstance(audio, (tuple, list)) and len(audio) >= 2:
|
||||||
|
samples, sr = audio[0], audio[1]
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise TypeError(f"Unsupported AUDIO type: {type(audio)}")
|
||||||
|
|
||||||
|
if sr is None or samples is None:
|
||||||
|
raise ValueError(
|
||||||
|
f"Could not extract samples/sample_rate from AUDIO: "
|
||||||
|
f"keys={list(audio.keys()) if isinstance(audio, dict) else 'n/a'}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# torch -> numpy
|
||||||
|
try:
|
||||||
|
import torch
|
||||||
|
if isinstance(samples, torch.Tensor):
|
||||||
|
samples = samples.detach().cpu().numpy()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
samples = np.asarray(samples)
|
||||||
|
|
||||||
|
# Normalize to (channels, frames)
|
||||||
|
# Possible shapes:
|
||||||
|
# (frames,) -> (1, frames)
|
||||||
|
# (channels, frames) -> ok
|
||||||
|
# (frames, channels) -> transpose
|
||||||
|
# (batch, channels, frames) -> take batch[0]
|
||||||
|
# (batch, frames, channels) -> take batch[0] then transpose
|
||||||
|
|
||||||
|
if samples.ndim == 1:
|
||||||
|
samples = samples[None, :]
|
||||||
|
|
||||||
|
elif samples.ndim == 3:
|
||||||
|
# take first batch
|
||||||
|
samples = samples[0]
|
||||||
|
|
||||||
|
elif samples.ndim > 3:
|
||||||
|
# super defensive: reduce until <= 3
|
||||||
|
while samples.ndim > 3:
|
||||||
|
samples = samples[0]
|
||||||
|
if samples.ndim == 3:
|
||||||
|
samples = samples[0]
|
||||||
|
|
||||||
|
if samples.ndim != 2:
|
||||||
|
raise ValueError(f"Unsupported samples shape after normalization: {samples.shape}")
|
||||||
|
|
||||||
|
# If it's (frames, channels) transpose -> (channels, frames)
|
||||||
|
if samples.shape[0] > 8 and samples.shape[1] <= 8:
|
||||||
|
samples = samples.T
|
||||||
|
|
||||||
|
return samples, int(sr)
|
||||||
|
|
||||||
|
|
||||||
|
def _write_wav_int16(path, samples_ch_first, sample_rate):
|
||||||
|
import wave
|
||||||
|
|
||||||
|
s = np.clip(samples_ch_first, -1.0, 1.0)
|
||||||
|
s_i16 = (s * 32767.0).astype(np.int16)
|
||||||
|
|
||||||
|
if s_i16.ndim != 2:
|
||||||
|
raise ValueError(f"_write_wav_int16 expects 2D (ch, frames), got {s_i16.shape}")
|
||||||
|
|
||||||
|
channels, frames = s_i16.shape
|
||||||
|
interleaved = s_i16.T.reshape(-1)
|
||||||
|
|
||||||
|
with wave.open(path, "wb") as wf:
|
||||||
|
wf.setnchannels(channels)
|
||||||
|
wf.setsampwidth(2)
|
||||||
|
wf.setframerate(int(sample_rate))
|
||||||
|
wf.writeframes(interleaved.tobytes())
|
||||||
|
|
||||||
|
|
||||||
|
def _load_audio_to_comfy(path, target_sr=44100, target_channels=2):
|
||||||
|
"""
|
||||||
|
Decode audio file -> ComfyUI AUDIO dict.
|
||||||
|
|
||||||
|
ComfyUI save nodes expect:
|
||||||
|
audio["waveform"] : torch.Tensor [batch, channels, frames]
|
||||||
|
audio["sample_rate"] : int
|
||||||
|
"""
|
||||||
|
ffmpeg = which("ffmpeg")
|
||||||
|
if ffmpeg is None:
|
||||||
|
raise RuntimeError("ffmpeg not found in PATH inside the ComfyUI container.")
|
||||||
|
|
||||||
|
# Force known SR/ch so parsing is deterministic
|
||||||
|
cmd = [
|
||||||
|
ffmpeg,
|
||||||
|
"-i", path,
|
||||||
|
"-f", "f32le",
|
||||||
|
"-ac", str(int(target_channels)),
|
||||||
|
"-ar", str(int(target_sr)),
|
||||||
|
"pipe:1",
|
||||||
|
]
|
||||||
|
|
||||||
|
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise RuntimeError(
|
||||||
|
"ffmpeg decode failed:\n" + proc.stderr.decode("utf-8", errors="ignore")
|
||||||
|
)
|
||||||
|
|
||||||
|
raw = np.frombuffer(proc.stdout, dtype=np.float32)
|
||||||
|
|
||||||
|
ch = int(target_channels)
|
||||||
|
if raw.size % ch != 0:
|
||||||
|
raw = raw[: raw.size - (raw.size % ch)]
|
||||||
|
|
||||||
|
samples = raw.reshape(-1, ch).T # (channels, frames)
|
||||||
|
|
||||||
|
import torch
|
||||||
|
waveform = torch.from_numpy(samples).unsqueeze(0) # (1, channels, frames)
|
||||||
|
|
||||||
|
return {"waveform": waveform, "sample_rate": int(target_sr)}
|
||||||
|
|
||||||
|
|
||||||
|
class AudioRepeatFromAudioNode:
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(cls):
|
||||||
|
return {
|
||||||
|
"required": {
|
||||||
|
"audio": ("AUDIO",),
|
||||||
|
"repeat_count": ("INT", {"default": 20, "min": 1, "max": 500}),
|
||||||
|
"output_audio_path": ("STRING", {"default": "/basedir/output/repeated.mp3"}),
|
||||||
|
},
|
||||||
|
"optional": {
|
||||||
|
"overwrite": ("BOOLEAN", {"default": True}),
|
||||||
|
"mp3_quality": ("INT", {"default": 0, "min": 0, "max": 9}),
|
||||||
|
"crossfade_seconds": ("FLOAT", {"default": 0.15, "min": 0.0, "max": 5.0, "step": 0.01}),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
RETURN_TYPES = ("AUDIO", "STRING")
|
||||||
|
RETURN_NAMES = ("audio_out", "output_audio_path")
|
||||||
|
FUNCTION = "repeat_audio"
|
||||||
|
CATEGORY = "audio"
|
||||||
|
|
||||||
|
def repeat_audio(self, audio, repeat_count, output_audio_path, overwrite=True, mp3_quality=0, crossfade_seconds=0.15):
|
||||||
|
ffmpeg = which("ffmpeg")
|
||||||
|
if ffmpeg is None:
|
||||||
|
raise RuntimeError("ffmpeg not found in PATH inside the ComfyUI container.")
|
||||||
|
|
||||||
|
repeat_count = int(repeat_count)
|
||||||
|
if repeat_count < 1:
|
||||||
|
raise ValueError("repeat_count must be >= 1")
|
||||||
|
|
||||||
|
crossfade_seconds = float(crossfade_seconds or 0.0)
|
||||||
|
|
||||||
|
out_dir = os.path.dirname(output_audio_path)
|
||||||
|
if out_dir and not os.path.isdir(out_dir):
|
||||||
|
os.makedirs(out_dir, exist_ok=True)
|
||||||
|
|
||||||
|
if os.path.exists(output_audio_path) and not overwrite:
|
||||||
|
audio_out = _load_audio_to_comfy(output_audio_path, target_sr=44100, target_channels=2)
|
||||||
|
return (audio_out, output_audio_path)
|
||||||
|
|
||||||
|
samples, sr = _extract_audio(audio)
|
||||||
|
|
||||||
|
frames = int(samples.shape[1])
|
||||||
|
duration_sec = frames / float(sr)
|
||||||
|
|
||||||
|
# Safety: crossfade must be shorter than clip
|
||||||
|
if crossfade_seconds >= duration_sec:
|
||||||
|
crossfade_seconds = max(0.0, duration_sec * 0.25)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as td:
|
||||||
|
in_wav = os.path.join(td, "input.wav")
|
||||||
|
_write_wav_int16(in_wav, samples, sr)
|
||||||
|
|
||||||
|
ext = os.path.splitext(output_audio_path)[1].lower()
|
||||||
|
|
||||||
|
# No crossfade (or only 1 repeat): simple transcode
|
||||||
|
if repeat_count == 1 or crossfade_seconds <= 0.0:
|
||||||
|
cmd = [ffmpeg, "-y" if overwrite else "-n", "-i", in_wav]
|
||||||
|
if ext == ".mp3":
|
||||||
|
cmd += ["-c:a", "libmp3lame", "-q:a", str(int(mp3_quality))]
|
||||||
|
elif ext == ".wav":
|
||||||
|
cmd += ["-c:a", "pcm_s16le"]
|
||||||
|
else:
|
||||||
|
cmd += ["-c:a", "libmp3lame", "-q:a", str(int(mp3_quality))]
|
||||||
|
cmd += [output_audio_path]
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Chain acrossfade between repeated inputs
|
||||||
|
cmd = [ffmpeg, "-y" if overwrite else "-n"]
|
||||||
|
|
||||||
|
for _ in range(repeat_count):
|
||||||
|
cmd += ["-i", in_wav]
|
||||||
|
|
||||||
|
xf = crossfade_seconds
|
||||||
|
parts = [f"[0:a][1:a]acrossfade=d={xf}:c1=tri:c2=tri[a1]"]
|
||||||
|
for i in range(2, repeat_count):
|
||||||
|
parts.append(f"[a{i-1}][{i}:a]acrossfade=d={xf}:c1=tri:c2=tri[a{i}]")
|
||||||
|
|
||||||
|
aout = f"a{repeat_count-1}"
|
||||||
|
filter_complex = ";".join(parts)
|
||||||
|
|
||||||
|
cmd += ["-filter_complex", filter_complex, "-map", f"[{aout}]"]
|
||||||
|
|
||||||
|
if ext == ".mp3":
|
||||||
|
cmd += ["-c:a", "libmp3lame", "-q:a", str(int(mp3_quality))]
|
||||||
|
elif ext == ".wav":
|
||||||
|
cmd += ["-c:a", "pcm_s16le"]
|
||||||
|
else:
|
||||||
|
cmd += ["-c:a", "libmp3lame", "-q:a", str(int(mp3_quality))]
|
||||||
|
|
||||||
|
cmd += [output_audio_path]
|
||||||
|
|
||||||
|
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise RuntimeError(
|
||||||
|
"ffmpeg processing failed.\n"
|
||||||
|
f"Command: {' '.join(cmd)}\n\n"
|
||||||
|
f"STDERR:\n{proc.stderr}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Return a Comfy-compatible AUDIO dict
|
||||||
|
audio_out = _load_audio_to_comfy(output_audio_path, target_sr=sr, target_channels=int(samples.shape[0]))
|
||||||
|
return (audio_out, output_audio_path)
|
||||||
Loading…
Reference in a new issue