{!isLoaded ? ( ) : ( <> {/* Presets */}

{PRESETS.map((p, i) => ( applyPreset(i)} /> ))}

{/* Caption */}

Description

setCaption(e.target.value)}
                  onInput={() => setActiveIdx(-1)}
                  rows={2}
                  className="w-full bg-transparent text-sm resize-none outline-none"
                  style={{ color: "var(--text)" }}
                  placeholder="Describe the music — style, instruments, key, BPM, mood…"
                />
              </div>

{/* Lyrics */}
              <div className="rounded-2xl p-4" style={{ background: "var(--bg-elev)", border: "1px solid var(--border)" }}>
                <label className="text-[10px] uppercase tracking-widest mb-2 block" style={{ color: "var(--text-dim)" }}>
                  Lyrics (use [verse] / [chorus] tags, or [instrumental])
                </label>
                <textarea
                  value={lyrics}
                  onChange={(e) => setLyrics(e.target.value)}
                  onInput={() => setActiveIdx(-1)}
                  rows={6}
                  className="w-full bg-transparent text-sm resize-none outline-none font-mono"
                  style={{ color: "var(--text)" }}
                />
              </div>

{/* Controls — pill row */}
              <div className="flex items-center gap-3 flex-wrap">
                <div className="flex items-center gap-2 px-3 py-1.5 rounded-full text-xs"
                  style={{ background: "var(--bg-elev)", border: "1px solid var(--border)" }}>
                  <span style={{ color: "var(--text-muted)" }}>Duration</span>
                  <input
                    type="range"
                    min={10}
                    max={90}
                    step={10}
                    value={duration}
                    onChange={(e) => setDuration(Number(e.target.value))}
                    className="w-24"
                  />
                  <span className="font-mono w-8 text-right" style={{ color: "var(--text)" }}>{duration}s</span>
                </div>

<div className="flex items-center gap-2 px-3 py-1.5 rounded-full text-xs"
                  style={{ background: "var(--bg-elev)", border: "1px solid var(--border)" }}>
                  <span style={{ color: "var(--text-muted)" }}>Steps</span>
                  <select
                    value={numSteps}
                    onChange={(e) => setNumSteps(Number(e.target.value))}
                    className="bg-transparent outline-none cursor-pointer"
                    style={{ color: "var(--text)" }}
                  >
                    <option value={8}>8 (turbo)</option>
                  </select>
                </div>

<div className="flex items-center gap-2 px-3 py-1.5 rounded-full text-xs"
                  style={{ background: "var(--bg-elev)", border: "1px solid var(--border)" }}>
                  <span style={{ color: "var(--text-muted)" }}>Shift</span>
                  <select
                    value={shift}
                    onChange={(e) => setShift(Number(e.target.value))}
                    className="bg-transparent outline-none cursor-pointer"
                    style={{ color: "var(--text)" }}
                  >
                    <option value={1.0}>1.0</option>
                    <option value={2.0}>2.0</option>
                    <option value={3.0}>3.0</option>
                  </select>
                </div>
              </div>

{/* Generate */}
              <button
                onClick={() => generate({ caption, lyrics, duration, shift, numSteps })}
                disabled={isWorking}
                className="w-full py-3.5 rounded-full font-medium text-base transition disabled:opacity-50 disabled:cursor-not-allowed hover:scale-[1.01] cursor-pointer"
                style={{
                  background: "var(--accent)",
                  color: "var(--bg)",
                  letterSpacing: "-0.01em",
                  boxShadow: "0 0 40px oklch(0.72 0.17 305 / 0.25)",
                }}
              >
                {status === "generating" ? "Generating music…" : "Generate"}
              </button>

{error && (
                <div className="rounded-lg p-3 text-sm" style={{ background: "oklch(0.25 0.08 22 / 0.3)", color: "var(--danger)" }}>
                  {error}
                </div>
              )}
            </>
          )}
        </main>

{/* About / methodology */}
        <section className="w-full max-w-2xl mt-12 text-sm" style={{ color: "var(--text-muted)" }}>
          <details className="rounded-xl px-4 py-3"
            style={{ background: "var(--bg-elev)", border: "1px solid var(--border)" }}>
            <summary className="cursor-pointer font-medium select-none" style={{ color: "var(--text)" }}>
              How it works & known limitations
            </summary>
            <div className="mt-4 space-y-4 leading-relaxed">
              <div>
                <h3 className="text-[13px] uppercase tracking-widest mb-2" style={{ color: "var(--text-dim)" }}>Pipeline</h3>
                <ol className="list-decimal list-inside space-y-1">
                  <li><span style={{ color: "var(--text)" }}>Text encoder</span> (Qwen3-Embedding-0.6B, fp16) turns the caption into conditioning hidden states; the same model provides token embeddings for the lyric path.</li>
                  <li><span style={{ color: "var(--text)" }}>5 Hz LM</span> (ACE-Step acestep-5Hz-lm-0.6B, 4-bit MatMulNBits) writes a short chain-of-thought, then emits ~50 audio codes per 10 s of output.</li>
                  <li><span style={{ color: "var(--text)" }}>FSQ → detokenizer</span> expands the codes into 25 Hz acoustic features used as cross-attention hints.</li>
                  <li><span style={{ color: "var(--text)" }}>DiT decoder</span> (2B parameters, fp16) runs 8 Euler flow-matching steps (shift=3.0) over a random latent conditioned on text, lyrics, and hints.</li>
                  <li><span style={{ color: "var(--text)" }}>Oobleck VAE</span> (fp16) decodes the 25 Hz latent into stereo 48 kHz audio.</li>
                </ol>
              </div>

<div>
                <h3 className="text-[13px] uppercase tracking-widest mb-2" style={{ color: "var(--text-dim)" }}>Why it runs in the browser</h3>
                <p>
                  Everything executes on-device via <code className="font-mono text-xs">onnxruntime-web</code> with the WebGPU execution provider. Two Web Workers keep the LM and the diffusion+VAE graphs in separate WASM heaps so neither hits the 4 GB single-heap limit. Total download is ~2 GB (cached in the browser after the first load).
                </p>
              </div>

<div>
                <h3 className="text-[13px] uppercase tracking-widest mb-2" style={{ color: "var(--text-dim)" }}>Methodology notes</h3>
                <ul className="list-disc list-inside space-y-1">
                  <li>Compared stage-by-stage against the PyTorch fp32 reference: every tensor agrees to within 0.2% relative L2, and the generated waveforms sound identical.</li>
                  <li>FP16 DiT is exported natively (<code className="font-mono text-xs">model.half()</code> + dynamo). An earlier fp32→fp16 conversion with post-hoc Cast insertion produced a 25 Hz helicopter artifact, now resolved.</li>
                  <li>4-bit quantization is MatMulNBits with <code className="font-mono text-xs">block_size=64</code>, asymmetric, <code className="font-mono text-xs">accuracy_level=1</code> (fp32 accumulate).</li>
                </ul>
              </div>

<div>
                <h3 className="text-[13px] uppercase tracking-widest mb-2" style={{ color: "var(--text-dim)" }}>Known limitations</h3>
                <ul className="list-disc list-inside space-y-1">
                  <li><span style={{ color: "var(--text)" }}>First load is slow.</span> ~2 GB of weights must be fetched and cached; subsequent runs start fast.</li>
                  <li><span style={{ color: "var(--text)" }}>Vocals need ≥60 s.</span> The 0.6B LM often refuses to emit lyric-aligned audio codes for short durations — instrumentals work at any length.</li>
                  <li><span style={{ color: "var(--text)" }}>Turbo quality ceiling.</span> We run 8 diffusion steps (shift=3.0). More steps nudge quality up but aren't supported by the turbo weights we ship.</li>
                  <li><span style={{ color: "var(--text)" }}>Condition-encoder drift.</span> The ONNX condition_encoder has a small drift (~0.4 max_diff) vs PyTorch on real inputs — inaudible today but a known residual we haven’t closed.</li>
                  <li><span style={{ color: "var(--text)" }}>WebGPU only.</span> No fallback path; the demo gates on WebGPU support (Chrome/Edge 113+, Safari 26+ desktop).</li>
                  <li><span style={{ color: "var(--text)" }}>Memory.</span> Two workers each hold ~1–2 GB; low-RAM devices may hit <code className="font-mono text-xs">std::bad_alloc</code> during model creation.</li>
                  <li><span style={{ color: "var(--text)" }}>No seed control.</span> Each generation uses a fresh RNG, so re-runs with the same prompt will differ.</li>
                </ul>
              </div>
            </div>
          </details>
        </section>

{/* Footer */}
        <footer className="mt-12 mb-6 text-center text-xs space-y-2" style={{ color: "var(--text-dim)" }}>
          <div>
            <a href="https://huggingface.co/shreyask/ACE-Step-v1.5-ONNX" target="_blank" rel="noreferrer" className="hover:opacity-80 transition" style={{ color: "var(--text-muted)" }}>
              shreyask/ACE-Step-v1.5-ONNX
            </a>
            <span className="mx-2">·</span>
            <a href="https://huggingface.co/ACE-Step/Ace-Step1.5" target="_blank" rel="noreferrer" className="hover:opacity-80 transition" style={{ color: "var(--text-muted)" }}>
              ACE-Step 1.5
            </a>
            <span className="mx-2">·</span>
            <span>Apache 2.0</span>
          </div>
          <div>
            Made with <a href="https://huggingface.co/docs/transformers.js" target="_blank" rel="noreferrer" className="underline" style={{ color: "var(--text-muted)" }}>🤗 Transformers.js</a>
          </div>
        </footer>
      </div>
    </WebGPUGate>
  );
}

WebGPU not available

Load models

ACE-Step WebGPU