import os
import subprocess
import tempfile
from shutil import which

import numpy as np


def _extract_audio(audio):
    """
    Returns (samples_ch_first, sr)
    samples_ch_first: np.ndarray shape (channels, frames)
    """
    sr = None
    samples = None

    if isinstance(audio, dict):
        # sample rate key variants
        sr = audio.get("sample_rate", None)
        if sr is None:
            sr = audio.get("sr", None)
        if sr is None:
            sr = audio.get("rate", None)

        # avoid tensor truthiness checks (no `or` chaining)
        samples = audio.get("samples", None)
        if samples is None:
            samples = audio.get("waveform", None)
        if samples is None:
            samples = audio.get("audio", None)

    elif isinstance(audio, (tuple, list)) and len(audio) >= 2:
        samples, sr = audio[0], audio[1]

    else:
        raise TypeError(f"Unsupported AUDIO type: {type(audio)}")

    if sr is None or samples is None:
        raise ValueError(
            f"Could not extract samples/sample_rate from AUDIO: "
            f"keys={list(audio.keys()) if isinstance(audio, dict) else 'n/a'}"
        )

    # torch -> numpy
    try:
        import torch
        if isinstance(samples, torch.Tensor):
            samples = samples.detach().cpu().numpy()
    except Exception:
        pass

    samples = np.asarray(samples)

    # Normalize to (channels, frames)
    # Possible shapes:
    # (frames,) -> (1, frames)
    # (channels, frames) -> ok
    # (frames, channels) -> transpose
    # (batch, channels, frames) -> take batch[0]
    # (batch, frames, channels) -> take batch[0] then transpose

    if samples.ndim == 1:
        samples = samples[None, :]

    elif samples.ndim == 3:
        # take first batch
        samples = samples[0]

    elif samples.ndim > 3:
        # super defensive: reduce until <= 3
        while samples.ndim > 3:
            samples = samples[0]
        if samples.ndim == 3:
            samples = samples[0]

    if samples.ndim != 2:
        raise ValueError(f"Unsupported samples shape after normalization: {samples.shape}")

    # If it's (frames, channels) transpose -> (channels, frames)
    if samples.shape[0] > 8 and samples.shape[1] <= 8:
        samples = samples.T

    return samples, int(sr)


def _write_wav_int16(path, samples_ch_first, sample_rate):
    import wave

    s = np.clip(samples_ch_first, -1.0, 1.0)
    s_i16 = (s * 32767.0).astype(np.int16)

    if s_i16.ndim != 2:
        raise ValueError(f"_write_wav_int16 expects 2D (ch, frames), got {s_i16.shape}")

    channels, frames = s_i16.shape
    interleaved = s_i16.T.reshape(-1)

    with wave.open(path, "wb") as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(2)
        wf.setframerate(int(sample_rate))
        wf.writeframes(interleaved.tobytes())


def _load_audio_to_comfy(path, target_sr=44100, target_channels=2):
    """
    Decode audio file -> ComfyUI AUDIO dict.

    ComfyUI save nodes expect:
      audio["waveform"] : torch.Tensor [batch, channels, frames]
      audio["sample_rate"] : int
    """
    ffmpeg = which("ffmpeg")
    if ffmpeg is None:
        raise RuntimeError("ffmpeg not found in PATH inside the ComfyUI container.")

    # Force known SR/ch so parsing is deterministic
    cmd = [
        ffmpeg,
        "-i", path,
        "-f", "f32le",
        "-ac", str(int(target_channels)),
        "-ar", str(int(target_sr)),
        "pipe:1",
    ]

    proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    if proc.returncode != 0:
        raise RuntimeError(
            "ffmpeg decode failed:\n" + proc.stderr.decode("utf-8", errors="ignore")
        )

    raw = np.frombuffer(proc.stdout, dtype=np.float32)

    ch = int(target_channels)
    if raw.size % ch != 0:
        raw = raw[: raw.size - (raw.size % ch)]

    samples = raw.reshape(-1, ch).T  # (channels, frames)

    import torch
    waveform = torch.from_numpy(samples).unsqueeze(0)  # (1, channels, frames)

    return {"waveform": waveform, "sample_rate": int(target_sr)}


class AudioRepeatFromAudioNode:
    @classmethod
    def INPUT_TYPES(cls):
        return {
            "required": {
                "audio": ("AUDIO",),
                "repeat_count": ("INT", {"default": 20, "min": 1, "max": 500}),
                "output_audio_path": ("STRING", {"default": "/basedir/output/repeated.mp3"}),
            },
            "optional": {
                "overwrite": ("BOOLEAN", {"default": True}),
                "mp3_quality": ("INT", {"default": 0, "min": 0, "max": 9}),
                "crossfade_seconds": ("FLOAT", {"default": 0.15, "min": 0.0, "max": 5.0, "step": 0.01}),
            },
        }

    RETURN_TYPES = ("AUDIO", "STRING")
    RETURN_NAMES = ("audio_out", "output_audio_path")
    FUNCTION = "repeat_audio"
    CATEGORY = "audio"

    def repeat_audio(self, audio, repeat_count, output_audio_path, overwrite=True, mp3_quality=0, crossfade_seconds=0.15):
        ffmpeg = which("ffmpeg")
        if ffmpeg is None:
            raise RuntimeError("ffmpeg not found in PATH inside the ComfyUI container.")

        repeat_count = int(repeat_count)
        if repeat_count < 1:
            raise ValueError("repeat_count must be >= 1")

        crossfade_seconds = float(crossfade_seconds or 0.0)

        out_dir = os.path.dirname(output_audio_path)
        if out_dir and not os.path.isdir(out_dir):
            os.makedirs(out_dir, exist_ok=True)

        if os.path.exists(output_audio_path) and not overwrite:
            audio_out = _load_audio_to_comfy(output_audio_path, target_sr=44100, target_channels=2)
            return (audio_out, output_audio_path)

        samples, sr = _extract_audio(audio)

        frames = int(samples.shape[1])
        duration_sec = frames / float(sr)

        # Safety: crossfade must be shorter than clip
        if crossfade_seconds >= duration_sec:
            crossfade_seconds = max(0.0, duration_sec * 0.25)

        with tempfile.TemporaryDirectory() as td:
            in_wav = os.path.join(td, "input.wav")
            _write_wav_int16(in_wav, samples, sr)

            ext = os.path.splitext(output_audio_path)[1].lower()

            # No crossfade (or only 1 repeat): simple transcode
            if repeat_count == 1 or crossfade_seconds <= 0.0:
                cmd = [ffmpeg, "-y" if overwrite else "-n", "-i", in_wav]
                if ext == ".mp3":
                    cmd += ["-c:a", "libmp3lame", "-q:a", str(int(mp3_quality))]
                elif ext == ".wav":
                    cmd += ["-c:a", "pcm_s16le"]
                else:
                    cmd += ["-c:a", "libmp3lame", "-q:a", str(int(mp3_quality))]
                cmd += [output_audio_path]

            else:
                # Chain acrossfade between repeated inputs
                cmd = [ffmpeg, "-y" if overwrite else "-n"]

                for _ in range(repeat_count):
                    cmd += ["-i", in_wav]

                xf = crossfade_seconds
                parts = [f"[0:a][1:a]acrossfade=d={xf}:c1=tri:c2=tri[a1]"]
                for i in range(2, repeat_count):
                    parts.append(f"[a{i-1}][{i}:a]acrossfade=d={xf}:c1=tri:c2=tri[a{i}]")

                aout = f"a{repeat_count-1}"
                filter_complex = ";".join(parts)

                cmd += ["-filter_complex", filter_complex, "-map", f"[{aout}]"]

                if ext == ".mp3":
                    cmd += ["-c:a", "libmp3lame", "-q:a", str(int(mp3_quality))]
                elif ext == ".wav":
                    cmd += ["-c:a", "pcm_s16le"]
                else:
                    cmd += ["-c:a", "libmp3lame", "-q:a", str(int(mp3_quality))]

                cmd += [output_audio_path]

            proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
            if proc.returncode != 0:
                raise RuntimeError(
                    "ffmpeg processing failed.\n"
                    f"Command: {' '.join(cmd)}\n\n"
                    f"STDERR:\n{proc.stderr}"
                )

        # Return a Comfy-compatible AUDIO dict
        audio_out = _load_audio_to_comfy(output_audio_path, target_sr=sr, target_channels=int(samples.shape[0]))
        return (audio_out, output_audio_path)