From 11ee02c0d269865bbee2a3a0e9c2216e85658efa Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Mon, 29 Dec 2025 03:09:08 -0500 Subject: [PATCH 01/18] VCO: Increase frequency range to about 3 to 20,000 Hz. Make sine pure instead of quadratic. Add DC blocker to all outputs. Improve performance. --- src/VCO.cpp | 411 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 267 insertions(+), 144 deletions(-) diff --git a/src/VCO.cpp b/src/VCO.cpp index 57b5683..f65237a 100644 --- a/src/VCO.cpp +++ b/src/VCO.cpp @@ -1,66 +1,222 @@ #include "plugin.hpp" -using simd::float_4; - +// TODO: Remove these DSP classes after released in Rack SDK -// Accurate only on [0, 1] -template -T sin2pi_pade_05_7_6(T x) { - x -= 0.5f; - return (T(-6.28319) * x + T(35.353) * simd::pow(x, 3) - T(44.9043) * simd::pow(x, 5) + T(16.0951) * simd::pow(x, 7)) - / (1 + T(0.953136) * simd::pow(x, 2) + T(0.430238) * simd::pow(x, 4) + T(0.0981408) * simd::pow(x, 6)); +inline simd::float_4 ifelse(simd::float_4 mask, simd::float_4 a, simd::float_4 b) { + return simd::float_4(_mm_blendv_ps(b.v, a.v, mask.v)); } +/** Approximates sin(2pi x) in the domain [0, 1]. +Evaluates a 7th order odd polynomial on the range [0, 0.25] after folding the argument. +Optimized for THD and max error. +THD -84 dB, max error 1e-04 +*/ template -T sin2pi_pade_05_5_4(T x) { - x -= 0.5f; - return (T(-6.283185307) * x + T(33.19863968) * simd::pow(x, 3) - T(32.44191367) * simd::pow(x, 5)) - / (1 + T(1.296008659) * simd::pow(x, 2) + T(0.7028072946) * simd::pow(x, 4)); +T sin2pi_fast(T x) { + // Get sign of result + T sign = ::ifelse(x >= T(0.5), -1, 1); + // Shift if negative + x = ::ifelse(x >= T(0.5), x - T(0.5), x); + // Flip if >0.25 + x = ::ifelse(x >= T(0.25), T(0.5) - x, x); + T x2 = x * x; + const T c1 = 6.281422578399; + const T c3 = -41.122058514502; + const T c5 = 74.010711837743; + return sign * x * (c1 + x2 * (c3 + x2 * c5)); } -template -T expCurve(T x) { - return (3 + x * (-13 + 5 * x)) / (3 + 2 * x); -} +/** One-pole low-pass filter, exponential moving average. +-6 dB/octave slope. +Useful for leaky integrators and smoothing control signals. +Has a pole at (1 - alpha) and a zero at 0. +*/ +struct OnePoleLowpass { + float alpha = 0.f; -template -struct VoltageControlledOscillator { - bool analog = false; - bool soft = false; - bool syncEnabled = false; - // For optimizing in serial code - int channels = 0; + /** Sets the cutoff frequency where gain is -3 dB. + f = f_c / f_s, the normalized frequency in the range [0, 0.5]. + */ + void setCutoff(float f) { + alpha = 1.f - std::exp(-2.f * M_PI * f); + } - T lastSyncValue = 0.f; + template + struct State { + T y = 0.f; + }; + + /** Advances the state with input x. Returns the output. */ + template + T process(State& s, T x) { + s.y += alpha * (x - s.y); + return s.y; + } + + /** Computes the frequency response at normalized frequency f. */ + std::complex getResponse(float f) const { + float omega = 2.f * M_PI * f; + std::complex z = std::exp(std::complex(0.f, omega)); + return alpha * z / (z - (1.f - alpha)); + } + float getMagnitude(float f) const { + return std::abs(getResponse(f)); + } + float getPhase(float f) const { + return std::arg(getResponse(f)); + } +}; + + +/** One-pole high-pass filter. +6 dB/octave slope. +Useful for DC-blocking. +Has a pole at (1 - alpha) and a zero at 1. +*/ +struct OnePoleHighpass : OnePoleLowpass { + template + T process(State& s, T x) { + return x - OnePoleLowpass::process(s, x); + } + + std::complex getResponse(float f) const { + return 1.f - OnePoleLowpass::getResponse(f); + } + float getMagnitude(float f) const { + return std::abs(getResponse(f)); + } + float getPhase(float f) const { + return std::arg(getResponse(f)); + } +}; + + +/** Computes the minimum-phase bandlimited step (MinBLEP) +Z: number of zero-crossings on each side of the original symmetric sync signal +O: oversample factor +output: must be length `(2 * Z) * O`. +First sample is >0. Last sample is 1. + +Algorithm from "Hard Sync Without Aliasing" by Eli Brandt (2001). +https://www.cs.cmu.edu/~eli/papers/icmc01-hardsync.pdf +*/ +void minBlepImpulse(int Z, int O, float* output); + + +template +struct MinBlepGenerator { + T buffer[2 * Z] = {}; + int bufferIndex = 0; + + /** Reordered impulse response for adjacent access, minus 1.0. + Padded at end with zeros so impulseReordered[o][2 * Z] can be loaded into a simd::float_4. + */ + alignas(16) float impulseReordered[O][2 * Z + 4] = {}; + + MinBlepGenerator() { + float impulse[2 * Z][O]; + dsp::minBlepImpulse(Z, O, &impulse[0][0]); + + // Transpose array and pre-subtract 1. + for (int o = 0; o < O; o++) { + for (int z = 0; z < 2 * Z; z++) { + impulseReordered[o][z] = impulse[z][o] - 1.f; + } + } + } + + /** Places a discontinuity with magnitude `x` at -1 < p <= 0 relative to the current frame. + For example if a square wave jumps from 1 to -1 at the subsample time 0.1 frames ago, use insertDiscontinuity(-0.1, -2.0) + */ + void insertDiscontinuity(float p, T x) { + if (!(-1 < p && p <= 0)) + return; + + // Calculate oversampling index and fractional part + float subsample = -p * O; + int o = (int) subsample; + float frac = subsample - o; + + // For each zero crossing, interpolate impulse response between oversample indices + // Compute 4 at a time with SIMD + for (int z = 0; z < 2 * Z; z += 4) { + simd::float_4 ir0 = simd::float_4::load(&impulseReordered[o][z]); + // If oversample index reaches next zero crossing, wrap to next z + simd::float_4 ir1 = simd::float_4::load((o + 1 < O) ? &impulseReordered[o + 1][z] : &impulseReordered[0][z + 1]); + simd::float_4 ir = (ir0 + (ir1 - ir0) * frac); + + for (int zz = 0; zz < 4; zz++) { + buffer[(bufferIndex + z + zz) % (2 * Z)] += ir[zz] * x; + } + } + // for (int z = 0; z < 2 * Z; z++) { + // float ir0 = impulseReordered[o][z]; + // float ir1 = (o + 1 < O) ? impulseReordered[o + 1][z] : impulseReordered[0][z + 1]; + // float ir = (ir0 + (ir1 - ir0) * frac); + // buffer[(bufferIndex + z) % (2 * Z)] += ir * x; + // } + } + + /** Should be called every frame after inserting any discontinuities. */ + T process() { + T v = buffer[bufferIndex]; + buffer[bufferIndex] = T(0); + bufferIndex = (bufferIndex + 1) % (2 * Z); + return v; + } +}; + + +template +struct VCOProcessor { T phase = 0.f; - T freq = 0.f; - T pulseWidth = 0.5f; + T lastSyncValue = 0.f; T syncDirection = 1.f; T sqrState = 1.f; - dsp::TRCFilter sqrFilter; - - dsp::MinBlepGenerator sqrMinBlep; - dsp::MinBlepGenerator sawMinBlep; - dsp::MinBlepGenerator triMinBlep; - dsp::MinBlepGenerator sinMinBlep; + OnePoleHighpass dcFilter; + OnePoleHighpass::State dcFilterStateSqr; + OnePoleHighpass::State dcFilterStateSaw; + OnePoleHighpass::State dcFilterStateTri; + OnePoleHighpass::State dcFilterStateSin; - T sqrValue = 0.f; - T sawValue = 0.f; - T triValue = 0.f; - T sinValue = 0.f; + MinBlepGenerator<16, 16, T> sqrMinBlep; + MinBlepGenerator<16, 16, T> sawMinBlep; + MinBlepGenerator<16, 16, T> triMinBlep; + MinBlepGenerator<16, 16, T> sinMinBlep; - void setPulseWidth(T pulseWidth) { - const float pwMin = 0.01f; - this->pulseWidth = simd::clamp(pulseWidth, pwMin, 1.f - pwMin); + void setSampleTime(float sampleTime) { + dcFilter.setCutoff(std::min(0.4f, 40.f * sampleTime)); } - void process(float deltaTime, T syncValue) { + struct Frame { + /** Number of channels valid in SIMD type + For optimizing serial operations. + */ + uint8_t channels = 0; + bool soft = false; + bool syncEnabled = false; + bool sqrEnabled = false; + bool sawEnabled = false; + bool triEnabled = false; + bool sinEnabled = false; + T pulseWidth = 0.5f; + T sync = 0.f; + T freq = 0.f; + + // Outputs + T sqr = 0.f; + T saw = 0.f; + T tri = 0.f; + T sin = 0.f; + }; + + void process(Frame& frame, float deltaTime) { // Advance phase - T deltaPhase = simd::clamp(freq * deltaTime, 0.f, 0.35f); - if (soft) { + T deltaPhase = simd::clamp(frame.freq * deltaTime, 0.f, 0.49f); + if (frame.soft) { // Reverse direction deltaPhase *= syncDirection; } @@ -80,7 +236,7 @@ struct VoltageControlledOscillator { if (wrapM) { T wrapPhase = (syncDirection == -1.f) & 1.f; T wrapCrossing = (wrapPhase - (phase - deltaPhase)) / deltaPhase; - for (int i = 0; i < channels; i++) { + for (int i = 0; i < frame.channels; i++) { if (wrapM & (1 << i)) { T mask = simd::movemaskInverse(1 << i); // TODO: MinBlepGenerator::insertDiscontinuity() should handle subframes outside the range -1 < p <= 0 instead of failing silently. @@ -92,12 +248,16 @@ struct VoltageControlledOscillator { } sqrState = simd::ifelse(wrapMask, syncDirection, sqrState); + // Pulse width + const float pwMin = 0.01f; + T pulseWidth = simd::clamp(frame.pulseWidth, pwMin, 1.f - pwMin); + // Jump sqr when crossing `pulseWidth` T pwMask = (syncDirection == sqrState) & ((syncDirection == 1.f) ^ (phase < pulseWidth)); int pw = simd::movemask(pwMask); if (pw) { T pulseCrossing = (pulseWidth - (phase - deltaPhase)) / deltaPhase; - for (int i = 0; i < channels; i++) { + for (int i = 0; i < frame.channels; i++) { if (pw & (1 << i)) { T mask = simd::movemaskInverse(1 << i); float p = clamp(pulseCrossing[i] - 1.f, -1.f, 0.f); @@ -112,7 +272,7 @@ struct VoltageControlledOscillator { T halfCrossing = (0.5f - (phase - deltaPhase)) / deltaPhase; int halfMask = simd::movemask((0 < halfCrossing) & (halfCrossing <= 1.f)); if (halfMask) { - for (int i = 0; i < channels; i++) { + for (int i = 0; i < frame.channels; i++) { if (halfMask & (1 << i)) { T mask = simd::movemaskInverse(1 << i); float p = halfCrossing[i] - 1.f; @@ -124,20 +284,20 @@ struct VoltageControlledOscillator { // Detect sync // Might be NAN or outside of [0, 1) range - if (syncEnabled) { - T deltaSync = syncValue - lastSyncValue; + if (frame.syncEnabled) { + T deltaSync = frame.sync - lastSyncValue; T syncCrossing = -lastSyncValue / deltaSync; - lastSyncValue = syncValue; - T sync = (0.f < syncCrossing) & (syncCrossing <= 1.f) & (syncValue >= 0.f); + lastSyncValue = frame.sync; + T sync = (0.f < syncCrossing) & (syncCrossing <= 1.f) & (frame.sync >= 0.f); int syncMask = simd::movemask(sync); if (syncMask) { - if (soft) { + if (frame.soft) { syncDirection = simd::ifelse(sync, -syncDirection, syncDirection); } else { T newPhase = simd::ifelse(sync, (1.f - syncCrossing) * deltaPhase, phase); // Insert minBLEP for sync - for (int i = 0; i < channels; i++) { + for (int i = 0; i < frame.channels; i++) { if (syncMask & (1 << i)) { T mask = simd::movemaskInverse(1 << i); float p = syncCrossing[i] - 1.f; @@ -160,93 +320,56 @@ struct VoltageControlledOscillator { } // Square - sqrValue = sqrState; - sqrValue += sqrMinBlep.process(); - - if (analog) { - sqrFilter.setCutoffFreq(20.f * deltaTime); - sqrFilter.process(sqrValue); - sqrValue = sqrFilter.highpass() * 0.95f; + if (frame.sqrEnabled) { + frame.sqr = sqrState; + frame.sqr += sqrMinBlep.process(); + frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr); } // Saw - sawValue = saw(phase); - sawValue += sawMinBlep.process(); + if (frame.sawEnabled) { + frame.saw = saw(phase); + frame.saw += sawMinBlep.process(); + frame.saw = dcFilter.process(dcFilterStateSaw, frame.saw); + } // Tri - triValue = tri(phase); - triValue += triMinBlep.process(); + if (frame.triEnabled) { + frame.tri = tri(phase); + frame.tri += triMinBlep.process(); + frame.tri = dcFilter.process(dcFilterStateTri, frame.tri); + } // Sin - sinValue = sin(phase); - sinValue += sinMinBlep.process(); - } - - T sin(T phase) { - T v; - if (analog) { - // Quadratic approximation of sine, slightly richer harmonics - T halfPhase = (phase < 0.5f); - T x = phase - simd::ifelse(halfPhase, 0.25f, 0.75f); - v = 1.f - 16.f * simd::pow(x, 2); - v *= simd::ifelse(halfPhase, 1.f, -1.f); + if (frame.sinEnabled) { + frame.sin = sin(phase); + frame.sin += sinMinBlep.process(); + frame.sin = dcFilter.process(dcFilterStateSin, frame.sin); } - else { - v = sin2pi_pade_05_5_4(phase); - // v = sin2pi_pade_05_7_6(phase); - // v = simd::sin(2 * T(M_PI) * phase); - } - return v; - } - T sin() { - return sinValue; } - T tri(T phase) { - T v; - if (analog) { - T x = phase + 0.25f; - x -= simd::trunc(x); - T halfX = (x >= 0.5f); - x *= 2; - x -= simd::trunc(x); - v = expCurve(x) * simd::ifelse(halfX, 1.f, -1.f); - } - else { - v = 1 - 4 * simd::fmin(simd::fabs(phase - 0.25f), simd::fabs(phase - 1.25f)); - } - return v; - } - T tri() { - return triValue; + T light() const { + return sin(phase); } - T saw(T phase) { - T v; + static T saw(T phase) { T x = phase + 0.5f; x -= simd::trunc(x); - if (analog) { - v = -expCurve(x); - } - else { - v = 2 * x - 1; - } - return v; + return 2 * x - 1; } - T saw() { - return sawValue; + static T tri(T phase) { + return 1 - 4 * simd::fmin(simd::fabs(phase - 0.25f), simd::fabs(phase - 1.25f)); } - - T sqr() { - return sqrValue; - } - - T light() { - return simd::sin(2 * T(M_PI) * phase); + static T sin(T phase) { + return sin2pi_fast(phase); + // return simd::sin(2.f * M_PI * phase); } }; +using simd::float_4; + + struct VCO : Module { enum ParamIds { MODE_PARAM, // removed @@ -281,14 +404,14 @@ struct VCO : Module { NUM_LIGHTS }; - VoltageControlledOscillator<16, 16, float_4> oscillators[4]; + VCOProcessor processors[4]; dsp::ClockDivider lightDivider; VCO() { config(NUM_PARAMS, NUM_INPUTS, NUM_OUTPUTS, NUM_LIGHTS); configSwitch(LINEAR_PARAM, 0.f, 1.f, 0.f, "FM mode", {"1V/octave", "Linear"}); configSwitch(SYNC_PARAM, 0.f, 1.f, 1.f, "Sync mode", {"Soft", "Hard"}); - configParam(FREQ_PARAM, -54.f, 54.f, 0.f, "Frequency", " Hz", dsp::FREQ_SEMITONE, dsp::FREQ_C4); + configParam(FREQ_PARAM, -75.f, 75.f, 0.f, "Frequency", " Hz", dsp::FREQ_SEMITONE, dsp::FREQ_C4); configParam(FM_PARAM, -1.f, 1.f, 0.f, "Frequency modulation", "%", 0.f, 100.f); getParamQuantity(FM_PARAM)->randomizeEnabled = false; configParam(PW_PARAM, 0.01f, 0.99f, 0.5f, "Pulse width", "%", 0.f, 100.f); @@ -308,22 +431,29 @@ struct VCO : Module { lightDivider.setDivision(16); } + void onSampleRateChange(const SampleRateChangeEvent& e) override { + for (int c = 0; c < 16; c += 4) { + processors[c / 4].setSampleTime(e.sampleTime); + } + } + void process(const ProcessArgs& args) override { + VCOProcessor::Frame frame; float freqParam = params[FREQ_PARAM].getValue() / 12.f; float fmParam = params[FM_PARAM].getValue(); float pwParam = params[PW_PARAM].getValue(); float pwCvParam = params[PW_CV_PARAM].getValue(); bool linear = params[LINEAR_PARAM].getValue() > 0.f; - bool soft = params[SYNC_PARAM].getValue() <= 0.f; - + frame.soft = params[SYNC_PARAM].getValue() <= 0.f; + frame.syncEnabled = inputs[SYNC_INPUT].isConnected(); + frame.sqrEnabled = outputs[SQR_OUTPUT].isConnected(); + frame.sawEnabled = outputs[SAW_OUTPUT].isConnected(); + frame.triEnabled = outputs[TRI_OUTPUT].isConnected(); + frame.sinEnabled = outputs[SIN_OUTPUT].isConnected(); int channels = std::max(inputs[PITCH_INPUT].getChannels(), 1); for (int c = 0; c < channels; c += 4) { - auto& oscillator = oscillators[c / 4]; - oscillator.channels = std::min(channels - c, 4); - // removed - oscillator.analog = true; - oscillator.soft = soft; + frame.channels = std::min(channels - c, 4); // Get frequency float_4 pitch = freqParam + inputs[PITCH_INPUT].getPolyVoltageSimd(c); @@ -336,26 +466,19 @@ struct VCO : Module { freq = dsp::FREQ_C4 * dsp::exp2_taylor5(pitch); freq += dsp::FREQ_C4 * inputs[FM_INPUT].getPolyVoltageSimd(c) * fmParam; } - freq = clamp(freq, 0.f, args.sampleRate / 2.f); - oscillator.freq = freq; + frame.freq = clamp(freq, 0.f, args.sampleRate / 2.f); // Get pulse width - float_4 pw = pwParam + inputs[PW_INPUT].getPolyVoltageSimd(c) / 10.f * pwCvParam; - oscillator.setPulseWidth(pw); + frame.pulseWidth = pwParam + inputs[PW_INPUT].getPolyVoltageSimd(c) / 10.f * pwCvParam; - oscillator.syncEnabled = inputs[SYNC_INPUT].isConnected(); - float_4 sync = inputs[SYNC_INPUT].getPolyVoltageSimd(c); - oscillator.process(args.sampleTime, sync); + frame.sync = inputs[SYNC_INPUT].getPolyVoltageSimd(c); + processors[c / 4].process(frame, args.sampleTime); // Set output - if (outputs[SIN_OUTPUT].isConnected()) - outputs[SIN_OUTPUT].setVoltageSimd(5.f * oscillator.sin(), c); - if (outputs[TRI_OUTPUT].isConnected()) - outputs[TRI_OUTPUT].setVoltageSimd(5.f * oscillator.tri(), c); - if (outputs[SAW_OUTPUT].isConnected()) - outputs[SAW_OUTPUT].setVoltageSimd(5.f * oscillator.saw(), c); - if (outputs[SQR_OUTPUT].isConnected()) - outputs[SQR_OUTPUT].setVoltageSimd(5.f * oscillator.sqr(), c); + outputs[SQR_OUTPUT].setVoltageSimd(5.f * frame.sqr, c); + outputs[SAW_OUTPUT].setVoltageSimd(5.f * frame.saw, c); + outputs[TRI_OUTPUT].setVoltageSimd(5.f * frame.tri, c); + outputs[SIN_OUTPUT].setVoltageSimd(5.f * frame.sin, c); } outputs[SIN_OUTPUT].setChannels(channels); @@ -366,7 +489,7 @@ struct VCO : Module { // Light if (lightDivider.process()) { if (channels == 1) { - float lightValue = oscillators[0].light()[0]; + float lightValue = processors[0].light()[0]; lights[PHASE_LIGHT + 0].setSmoothBrightness(-lightValue, args.sampleTime * lightDivider.getDivision()); lights[PHASE_LIGHT + 1].setSmoothBrightness(lightValue, args.sampleTime * lightDivider.getDivision()); lights[PHASE_LIGHT + 2].setBrightness(0.f); @@ -377,7 +500,7 @@ struct VCO : Module { lights[PHASE_LIGHT + 2].setBrightness(1.f); } lights[LINEAR_LIGHT].setBrightness(linear); - lights[SOFT_LIGHT].setBrightness(soft); + lights[SOFT_LIGHT].setBrightness(frame.soft); } } }; From b71bf3abcfc21a0b3670cbc8fffc58974ce926af Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Mon, 29 Dec 2025 04:05:31 -0500 Subject: [PATCH 02/18] VCO: Make triangle output an integrator of the square. Don't insert minBLEPs unless an output is enabled (connected). --- src/VCO.cpp | 146 +++++++++++++++++++++++++++++----------------------- 1 file changed, 81 insertions(+), 65 deletions(-) diff --git a/src/VCO.cpp b/src/VCO.cpp index f65237a..b1eea10 100644 --- a/src/VCO.cpp +++ b/src/VCO.cpp @@ -175,6 +175,7 @@ struct VCOProcessor { T lastSyncValue = 0.f; T syncDirection = 1.f; T sqrState = 1.f; + T triFilterState = 0.f; OnePoleHighpass dcFilter; OnePoleHighpass::State dcFilterStateSqr; @@ -184,7 +185,6 @@ struct VCOProcessor { MinBlepGenerator<16, 16, T> sqrMinBlep; MinBlepGenerator<16, 16, T> sawMinBlep; - MinBlepGenerator<16, 16, T> triMinBlep; MinBlepGenerator<16, 16, T> sinMinBlep; void setSampleTime(float sampleTime) { @@ -230,54 +230,58 @@ struct VCOProcessor { T phaseFloor = simd::floor(phase); phase -= phaseFloor; - // Jump sqr when phase crosses 1, or crosses 0 if running backwards - T wrapMask = (phaseFloor != 0.f); - int wrapM = simd::movemask(wrapMask); - if (wrapM) { - T wrapPhase = (syncDirection == -1.f) & 1.f; - T wrapCrossing = (wrapPhase - (phase - deltaPhase)) / deltaPhase; - for (int i = 0; i < frame.channels; i++) { - if (wrapM & (1 << i)) { - T mask = simd::movemaskInverse(1 << i); - // TODO: MinBlepGenerator::insertDiscontinuity() should handle subframes outside the range -1 < p <= 0 instead of failing silently. - float p = clamp(wrapCrossing[i] - 1.f, -1.f, 0.f); - T x = mask & (2.f * syncDirection); - sqrMinBlep.insertDiscontinuity(p, x); + if (frame.sqrEnabled || frame.triEnabled) { + // Jump sqr when phase crosses 1, or crosses 0 if running backwards + T wrapMask = (phaseFloor != 0.f); + int wrapM = simd::movemask(wrapMask); + if (wrapM) { + T wrapPhase = (syncDirection == -1.f) & 1.f; + T wrapCrossing = (wrapPhase - (phase - deltaPhase)) / deltaPhase; + for (int i = 0; i < frame.channels; i++) { + if (wrapM & (1 << i)) { + T mask = simd::movemaskInverse(1 << i); + // TODO: MinBlepGenerator::insertDiscontinuity() should handle subframes outside the range -1 < p <= 0 instead of failing silently. + float p = clamp(wrapCrossing[i] - 1.f, -1.f, 0.f); + T x = mask & (2.f * syncDirection); + sqrMinBlep.insertDiscontinuity(p, x); + } } } - } - sqrState = simd::ifelse(wrapMask, syncDirection, sqrState); - - // Pulse width - const float pwMin = 0.01f; - T pulseWidth = simd::clamp(frame.pulseWidth, pwMin, 1.f - pwMin); - - // Jump sqr when crossing `pulseWidth` - T pwMask = (syncDirection == sqrState) & ((syncDirection == 1.f) ^ (phase < pulseWidth)); - int pw = simd::movemask(pwMask); - if (pw) { - T pulseCrossing = (pulseWidth - (phase - deltaPhase)) / deltaPhase; - for (int i = 0; i < frame.channels; i++) { - if (pw & (1 << i)) { - T mask = simd::movemaskInverse(1 << i); - float p = clamp(pulseCrossing[i] - 1.f, -1.f, 0.f); - T x = mask & (-2.f * syncDirection); - sqrMinBlep.insertDiscontinuity(p, x); + sqrState = simd::ifelse(wrapMask, syncDirection, sqrState); + + // Pulse width + const float pwMin = 0.01f; + T pulseWidth = simd::clamp(frame.pulseWidth, pwMin, 1.f - pwMin); + + // Jump sqr when crossing `pulseWidth` + T pwMask = (syncDirection == sqrState) & ((syncDirection == 1.f) ^ (phase < pulseWidth)); + int pw = simd::movemask(pwMask); + if (pw) { + T pulseCrossing = (pulseWidth - (phase - deltaPhase)) / deltaPhase; + for (int i = 0; i < frame.channels; i++) { + if (pw & (1 << i)) { + T mask = simd::movemaskInverse(1 << i); + float p = clamp(pulseCrossing[i] - 1.f, -1.f, 0.f); + T x = mask & (-2.f * syncDirection); + sqrMinBlep.insertDiscontinuity(p, x); + } } } + sqrState = simd::ifelse(pwMask, -syncDirection, sqrState); } - sqrState = simd::ifelse(pwMask, -syncDirection, sqrState); - - // Jump saw when crossing 0.5 - T halfCrossing = (0.5f - (phase - deltaPhase)) / deltaPhase; - int halfMask = simd::movemask((0 < halfCrossing) & (halfCrossing <= 1.f)); - if (halfMask) { - for (int i = 0; i < frame.channels; i++) { - if (halfMask & (1 << i)) { - T mask = simd::movemaskInverse(1 << i); - float p = halfCrossing[i] - 1.f; - T x = mask & (-2.f * syncDirection); - sawMinBlep.insertDiscontinuity(p, x); + + if (frame.sawEnabled) { + // Jump saw when crossing 0.5 + T halfCrossing = (0.5f - (phase - deltaPhase)) / deltaPhase; + int halfMask = simd::movemask((0 < halfCrossing) & (halfCrossing <= 1.f)); + if (halfMask) { + for (int i = 0; i < frame.channels; i++) { + if (halfMask & (1 << i)) { + T mask = simd::movemaskInverse(1 << i); + float p = halfCrossing[i] - 1.f; + T x = mask & (-2.f * syncDirection); + sawMinBlep.insertDiscontinuity(p, x); + } } } } @@ -303,15 +307,19 @@ struct VCOProcessor { float p = syncCrossing[i] - 1.f; T x; // Assume that hard-syncing a square always resets it to HIGH - x = mask & (1.f - sqrState); - sqrState = simd::ifelse(mask, 1.f, sqrState); - sqrMinBlep.insertDiscontinuity(p, x); - x = mask & (saw(newPhase) - saw(phase)); - sawMinBlep.insertDiscontinuity(p, x); - x = mask & (tri(newPhase) - tri(phase)); - triMinBlep.insertDiscontinuity(p, x); - x = mask & (sin(newPhase) - sin(phase)); - sinMinBlep.insertDiscontinuity(p, x); + if (frame.sqrEnabled || frame.triEnabled) { + x = mask & (1.f - sqrState); + sqrState = simd::ifelse(mask, 1.f, sqrState); + sqrMinBlep.insertDiscontinuity(p, x); + } + if (frame.sawEnabled) { + x = mask & (saw(newPhase) - saw(phase)); + sawMinBlep.insertDiscontinuity(p, x); + } + if (frame.sinEnabled) { + x = mask & (sin(newPhase) - sin(phase)); + sinMinBlep.insertDiscontinuity(p, x); + } } } phase = newPhase; @@ -319,13 +327,6 @@ struct VCOProcessor { } } - // Square - if (frame.sqrEnabled) { - frame.sqr = sqrState; - frame.sqr += sqrMinBlep.process(); - frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr); - } - // Saw if (frame.sawEnabled) { frame.saw = saw(phase); @@ -333,11 +334,26 @@ struct VCOProcessor { frame.saw = dcFilter.process(dcFilterStateSaw, frame.saw); } - // Tri - if (frame.triEnabled) { - frame.tri = tri(phase); - frame.tri += triMinBlep.process(); - frame.tri = dcFilter.process(dcFilterStateTri, frame.tri); + // Square + if (frame.sqrEnabled || frame.triEnabled) { + frame.sqr = sqrState; + frame.sqr += sqrMinBlep.process(); + T triSqr = frame.sqr; + frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr); + + // Tri + if (frame.triEnabled) { + // Integrate square wave + const float triShape = 0.2f; + T triFreq = deltaTime * triShape * frame.freq; + // T alpha = 1.f - simd::exp(-2.f * M_PI * triFreq); + // Use bilinear transform to derive alpha + T alpha = 1 / (1 + 1 / (M_PI * triFreq)); + triFilterState += alpha * (triSqr - triFilterState); + // Apply gain to roughly have unit amplitude at 0.5 pulseWidth. Depends on triShape. + frame.tri = triFilterState * 6.6f; + frame.tri = dcFilter.process(dcFilterStateTri, frame.tri); + } } // Sin From e819498fd388755efcb876b37d1e33fddf4a29ac Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Tue, 30 Dec 2025 22:19:42 -0500 Subject: [PATCH 03/18] VCO: Use cubic interpolation for minBLEP impulse. Use more accurate sine. --- src/VCO.cpp | 204 ++++++++++++++++++++++++++++------------------------ 1 file changed, 111 insertions(+), 93 deletions(-) diff --git a/src/VCO.cpp b/src/VCO.cpp index b1eea10..5cb955e 100644 --- a/src/VCO.cpp +++ b/src/VCO.cpp @@ -3,30 +3,27 @@ // TODO: Remove these DSP classes after released in Rack SDK -inline simd::float_4 ifelse(simd::float_4 mask, simd::float_4 a, simd::float_4 b) { - return simd::float_4(_mm_blendv_ps(b.v, a.v, mask.v)); -} - -/** Approximates sin(2pi x) in the domain [0, 1]. -Evaluates a 7th order odd polynomial on the range [0, 0.25] after folding the argument. -Optimized for THD and max error. -THD -84 dB, max error 1e-04 +/** Evaluates sin(pi x) for x in [-1, 1]. +9th order polynomial with roots at 0, -1, and 1. +Optimized coefficients for best THD (-103.7 dB) and max absolute error (6.68e-06). */ template -T sin2pi_fast(T x) { - // Get sign of result - T sign = ::ifelse(x >= T(0.5), -1, 1); - // Shift if negative - x = ::ifelse(x >= T(0.5), x - T(0.5), x); - // Flip if >0.25 - x = ::ifelse(x >= T(0.25), T(0.5) - x, x); +inline T sin_pi_9(T x) { T x2 = x * x; - const T c1 = 6.281422578399; - const T c3 = -41.122058514502; - const T c5 = 74.010711837743; - return sign * x * (c1 + x2 * (c3 + x2 * c5)); + return x * (T(1) - x2) * (T(3.141521108990) + x2 * (T(-2.024773594411) + x2 * (T(0.517493161073) + x2 * T(-0.063691343423)))); } +/** Catmull-Rom cubic interpolation +t is the fractional position between y1 and y2. +*/ +template +T cubicInterp(T y0, T y1, T y2, T y3, T t) { + T t2 = t * t; + T t3 = t2 * t; + return y1 + T(0.5) * t * (y2 - y0) + + t2 * (y0 - T(2.5)*y1 + T(2)*y2 - T(0.5)*y3) + + t3 * (T(-0.5)*y0 + T(1.5)*y1 - T(1.5)*y2 + T(0.5)*y3); +} /** One-pole low-pass filter, exponential moving average. -6 dB/octave slope. @@ -50,7 +47,7 @@ struct OnePoleLowpass { /** Advances the state with input x. Returns the output. */ template - T process(State& s, T x) { + T process(State& s, T x) const { s.y += alpha * (x - s.y); return s.y; } @@ -77,7 +74,7 @@ Has a pole at (1 - alpha) and a zero at 1. */ struct OnePoleHighpass : OnePoleLowpass { template - T process(State& s, T x) { + T process(State& s, T x) const { return x - OnePoleLowpass::process(s, x); } @@ -93,82 +90,103 @@ struct OnePoleHighpass : OnePoleLowpass { }; -/** Computes the minimum-phase bandlimited step (MinBLEP) -Z: number of zero-crossings on each side of the original symmetric sync signal -O: oversample factor -output: must be length `(2 * Z) * O`. -First sample is >0. Last sample is 1. - -Algorithm from "Hard Sync Without Aliasing" by Eli Brandt (2001). -https://www.cs.cmu.edu/~eli/papers/icmc01-hardsync.pdf -*/ -void minBlepImpulse(int Z, int O, float* output); - - -template -struct MinBlepGenerator { - T buffer[2 * Z] = {}; - int bufferIndex = 0; - - /** Reordered impulse response for adjacent access, minus 1.0. - Padded at end with zeros so impulseReordered[o][2 * Z] can be loaded into a simd::float_4. +template +struct MinBlep { + /** Reordered impulse response for cubic interpolation, minus 1.0. + Z dimension has +1 padding at start (for z-1 wrap) and +4 at end (for SIMD + z+1 wrap). + Access via getImpulse() which handles o wrapping and z offset. */ - alignas(16) float impulseReordered[O][2 * Z + 4] = {}; + float impulseReordered[O][1 + 2 * Z + 4] = {}; - MinBlepGenerator() { + MinBlep() { float impulse[2 * Z][O]; dsp::minBlepImpulse(Z, O, &impulse[0][0]); - // Transpose array and pre-subtract 1. + // Fill impulseReordered with transposed data, minus 1. + // Storage index = z + 1 (to allow z = -1 access at index 0) for (int o = 0; o < O; o++) { + // z = -1: before impulse starts + impulseReordered[o][0] = -1.f; + // z = 0 to 2*Z-1: actual impulse data for (int z = 0; z < 2 * Z; z++) { - impulseReordered[o][z] = impulse[z][o] - 1.f; + impulseReordered[o][1 + z] = impulse[z][o] - 1.f; + } + // z = 2*Z to 2*Z+3: after impulse ends (for SIMD padding) + for (int z = 2 * Z; z < 2 * Z + 4; z++) { + impulseReordered[o][1 + z] = 0.f; } } } - /** Places a discontinuity with magnitude `x` at -1 < p <= 0 relative to the current frame. - For example if a square wave jumps from 1 to -1 at the subsample time 0.1 frames ago, use insertDiscontinuity(-0.1, -2.0) + /** Get pointer to impulse data, handling o wrapping and z offset. */ + const float* getImpulse(int o, int z) const { + int index = z * O + o; + z = index / O; + o = index % O; + if (o < 0) { + z -= 1; + o += O; + } + return &impulseReordered[o][z + 1]; + } + + template + struct State { + // Double buffer of length 2 * Z + T buffer[4 * Z] = {}; + int32_t bufferIndex = 0; + }; + + /** Places a discontinuity with magnitude `x` at 0 < p <= 1 relative to the current frame. + For example if a square wave will jump from 1 to -1 in 0.1 frames, use insertDiscontinuity(0.1, -2.0). + + Note: In the deprecated MinBlepGenerator, `p` was in the range (-1, 0], so add 1 to p if updating to MinBlep. */ - void insertDiscontinuity(float p, T x) { - if (!(-1 < p && p <= 0)) + template + void insertDiscontinuity(State& s, float p, T x) const { + if (!(0 < p && p <= 1)) return; - // Calculate oversampling index and fractional part - float subsample = -p * O; + // Calculate impulse array index and fractional part + float subsample = (1 - p) * O; int o = (int) subsample; - float frac = subsample - o; + float t = subsample - o; - // For each zero crossing, interpolate impulse response between oversample indices - // Compute 4 at a time with SIMD + // For each zero crossing, cubic interpolate impulse response for (int z = 0; z < 2 * Z; z += 4) { - simd::float_4 ir0 = simd::float_4::load(&impulseReordered[o][z]); - // If oversample index reaches next zero crossing, wrap to next z - simd::float_4 ir1 = simd::float_4::load((o + 1 < O) ? &impulseReordered[o + 1][z] : &impulseReordered[0][z + 1]); - simd::float_4 ir = (ir0 + (ir1 - ir0) * frac); + simd::float_4 y0 = simd::float_4::load(getImpulse(o - 1, z)); + simd::float_4 y1 = simd::float_4::load(getImpulse(o, z)); + simd::float_4 y2 = simd::float_4::load(getImpulse(o + 1, z)); + simd::float_4 y3 = simd::float_4::load(getImpulse(o + 2, z)); + simd::float_4 ir = cubicInterp(y0, y1, y2, y3, T(t)); + // Write to double buffer for (int zz = 0; zz < 4; zz++) { - buffer[(bufferIndex + z + zz) % (2 * Z)] += ir[zz] * x; + s.buffer[s.bufferIndex + z + zz] += ir[zz] * x; } } - // for (int z = 0; z < 2 * Z; z++) { - // float ir0 = impulseReordered[o][z]; - // float ir1 = (o + 1 < O) ? impulseReordered[o + 1][z] : impulseReordered[0][z + 1]; - // float ir = (ir0 + (ir1 - ir0) * frac); - // buffer[(bufferIndex + z) % (2 * Z)] += ir * x; - // } } /** Should be called every frame after inserting any discontinuities. */ - T process() { - T v = buffer[bufferIndex]; - buffer[bufferIndex] = T(0); - bufferIndex = (bufferIndex + 1) % (2 * Z); + template + T process(State& s) const { + T v = s.buffer[s.bufferIndex]; + s.buffer[s.bufferIndex] = T(0); + s.bufferIndex++; + if (s.bufferIndex >= 2 * Z) { + // Move second half of buffer to beginning + std::memcpy(s.buffer, s.buffer + 2 * Z, 2 * Z * sizeof(T)); + std::memset(s.buffer + 2 * Z, 0, 2 * Z * sizeof(T)); + s.bufferIndex = 0; + } return v; } }; +static MinBlep<16, 16> minBlep; + + template struct VCOProcessor { T phase = 0.f; @@ -183,12 +201,12 @@ struct VCOProcessor { OnePoleHighpass::State dcFilterStateTri; OnePoleHighpass::State dcFilterStateSin; - MinBlepGenerator<16, 16, T> sqrMinBlep; - MinBlepGenerator<16, 16, T> sawMinBlep; - MinBlepGenerator<16, 16, T> sinMinBlep; + MinBlep<16, 16>::State sqrMinBlep; + MinBlep<16, 16>::State sawMinBlep; + MinBlep<16, 16>::State sinMinBlep; void setSampleTime(float sampleTime) { - dcFilter.setCutoff(std::min(0.4f, 40.f * sampleTime)); + dcFilter.setCutoff(std::min(0.4f, 20.f * sampleTime)); } struct Frame { @@ -241,9 +259,9 @@ struct VCOProcessor { if (wrapM & (1 << i)) { T mask = simd::movemaskInverse(1 << i); // TODO: MinBlepGenerator::insertDiscontinuity() should handle subframes outside the range -1 < p <= 0 instead of failing silently. - float p = clamp(wrapCrossing[i] - 1.f, -1.f, 0.f); + float p = clamp(wrapCrossing[i], 0.f, 1.f); T x = mask & (2.f * syncDirection); - sqrMinBlep.insertDiscontinuity(p, x); + minBlep.insertDiscontinuity(sqrMinBlep, p, x); } } } @@ -261,9 +279,9 @@ struct VCOProcessor { for (int i = 0; i < frame.channels; i++) { if (pw & (1 << i)) { T mask = simd::movemaskInverse(1 << i); - float p = clamp(pulseCrossing[i] - 1.f, -1.f, 0.f); + float p = clamp(pulseCrossing[i], 0.f, 1.f); T x = mask & (-2.f * syncDirection); - sqrMinBlep.insertDiscontinuity(p, x); + minBlep.insertDiscontinuity(sqrMinBlep, p, x); } } } @@ -278,18 +296,19 @@ struct VCOProcessor { for (int i = 0; i < frame.channels; i++) { if (halfMask & (1 << i)) { T mask = simd::movemaskInverse(1 << i); - float p = halfCrossing[i] - 1.f; + float p = halfCrossing[i]; T x = mask & (-2.f * syncDirection); - sawMinBlep.insertDiscontinuity(p, x); + minBlep.insertDiscontinuity(sawMinBlep, p, x); } } } } // Detect sync - // Might be NAN or outside of [0, 1) range if (frame.syncEnabled) { T deltaSync = frame.sync - lastSyncValue; + // Sample position where sync crosses 0. + // Might be NAN or outside of (0, 1] range T syncCrossing = -lastSyncValue / deltaSync; lastSyncValue = frame.sync; T sync = (0.f < syncCrossing) & (syncCrossing <= 1.f) & (frame.sync >= 0.f); @@ -304,21 +323,21 @@ struct VCOProcessor { for (int i = 0; i < frame.channels; i++) { if (syncMask & (1 << i)) { T mask = simd::movemaskInverse(1 << i); - float p = syncCrossing[i] - 1.f; - T x; - // Assume that hard-syncing a square always resets it to HIGH + float p = syncCrossing[i]; if (frame.sqrEnabled || frame.triEnabled) { - x = mask & (1.f - sqrState); + // Assume that hard-syncing a square always resets it to HIGH + T x = mask & (1.f - sqrState); sqrState = simd::ifelse(mask, 1.f, sqrState); - sqrMinBlep.insertDiscontinuity(p, x); + minBlep.insertDiscontinuity(sqrMinBlep, p, x); + DEBUG("%f %f %f", p, x[i], sqrState[i]); } if (frame.sawEnabled) { - x = mask & (saw(newPhase) - saw(phase)); - sawMinBlep.insertDiscontinuity(p, x); + T x = mask & (saw(newPhase) - saw(phase)); + minBlep.insertDiscontinuity(sawMinBlep, p, x); } if (frame.sinEnabled) { - x = mask & (sin(newPhase) - sin(phase)); - sinMinBlep.insertDiscontinuity(p, x); + T x = mask & (sin(newPhase) - sin(phase)); + minBlep.insertDiscontinuity(sinMinBlep, p, x); } } } @@ -330,14 +349,14 @@ struct VCOProcessor { // Saw if (frame.sawEnabled) { frame.saw = saw(phase); - frame.saw += sawMinBlep.process(); + frame.saw += minBlep.process(sawMinBlep); frame.saw = dcFilter.process(dcFilterStateSaw, frame.saw); } // Square if (frame.sqrEnabled || frame.triEnabled) { frame.sqr = sqrState; - frame.sqr += sqrMinBlep.process(); + frame.sqr += minBlep.process(sqrMinBlep); T triSqr = frame.sqr; frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr); @@ -359,7 +378,7 @@ struct VCOProcessor { // Sin if (frame.sinEnabled) { frame.sin = sin(phase); - frame.sin += sinMinBlep.process(); + frame.sin += minBlep.process(sinMinBlep); frame.sin = dcFilter.process(dcFilterStateSin, frame.sin); } } @@ -377,8 +396,7 @@ struct VCOProcessor { return 1 - 4 * simd::fmin(simd::fabs(phase - 0.25f), simd::fabs(phase - 1.25f)); } static T sin(T phase) { - return sin2pi_fast(phase); - // return simd::sin(2.f * M_PI * phase); + return sin_pi_9(2 * phase - 1); } }; From eae28b72c040b9ece5fb85f634f6c826c79f7399 Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Wed, 31 Dec 2025 07:35:00 -0500 Subject: [PATCH 04/18] VCO: Correctly handle events like phase crossing, pulse width crossing, 0.5 saw crossing, and sync in any order. WIP. --- src/VCO.cpp | 321 ++++++++++++++++++++++++++++------------------------ 1 file changed, 176 insertions(+), 145 deletions(-) diff --git a/src/VCO.cpp b/src/VCO.cpp index 5cb955e..bd5c7e1 100644 --- a/src/VCO.cpp +++ b/src/VCO.cpp @@ -17,7 +17,7 @@ inline T sin_pi_9(T x) { t is the fractional position between y1 and y2. */ template -T cubicInterp(T y0, T y1, T y2, T y3, T t) { +T catmullRomInterpolate(T y0, T y1, T y2, T y3, T t) { T t2 = t * t; T t3 = t2 * t; return y1 + T(0.5) * t * (y2 - y0) @@ -90,11 +90,13 @@ struct OnePoleHighpass : OnePoleLowpass { }; +/** Holds a precomputed minBLEP impulse response, reordered to efficiently insert into an audio buffer. +*/ template struct MinBlep { /** Reordered impulse response for cubic interpolation, minus 1.0. Z dimension has +1 padding at start (for z-1 wrap) and +4 at end (for SIMD + z+1 wrap). - Access via getImpulse() which handles o wrapping and z offset. + Access via getImpulse() which handles O wrapping and Z offset. */ float impulseReordered[O][1 + 2 * Z + 4] = {}; @@ -102,7 +104,7 @@ struct MinBlep { float impulse[2 * Z][O]; dsp::minBlepImpulse(Z, O, &impulse[0][0]); - // Fill impulseReordered with transposed data, minus 1. + // Fill impulseReordered with transposed and pre-subtracted data // Storage index = z + 1 (to allow z = -1 access at index 0) for (int o = 0; o < O; o++) { // z = -1: before impulse starts @@ -120,30 +122,25 @@ struct MinBlep { /** Get pointer to impulse data, handling o wrapping and z offset. */ const float* getImpulse(int o, int z) const { - int index = z * O + o; - z = index / O; - o = index % O; + z += o / O; + o = o % O; if (o < 0) { z -= 1; o += O; } + // assert(0 <= o && o < O); + // assert(-1 <= z && z < 2 * Z + 4); return &impulseReordered[o][z + 1]; } - template - struct State { - // Double buffer of length 2 * Z - T buffer[4 * Z] = {}; - int32_t bufferIndex = 0; - }; - - /** Places a discontinuity with magnitude `x` at 0 < p <= 1 relative to the current frame. - For example if a square wave will jump from 1 to -1 in 0.1 frames, use insertDiscontinuity(0.1, -2.0). + /** Places a discontinuity at 0 < p <= 1 relative to the current frame. + `out` must have enough space to write 2*Z floats, spaced by `stride` floats. + `p` is the subsample position to insert a discontinuity of `magnitude`. + For example if a square wave will jump from 1 to -1 in 0.1 frames, use insertDiscontinuity(out, 1, 0.1f, -2.f). Note: In the deprecated MinBlepGenerator, `p` was in the range (-1, 0], so add 1 to p if updating to MinBlep. */ - template - void insertDiscontinuity(State& s, float p, T x) const { + void insertDiscontinuity(float* out, int stride, float p, float magnitude) const { if (!(0 < p && p <= 1)) return; @@ -152,35 +149,61 @@ struct MinBlep { int o = (int) subsample; float t = subsample - o; - // For each zero crossing, cubic interpolate impulse response + // For each zero crossing, interpolate impulse response from oversample points + using simd::float_4; for (int z = 0; z < 2 * Z; z += 4) { - simd::float_4 y0 = simd::float_4::load(getImpulse(o - 1, z)); - simd::float_4 y1 = simd::float_4::load(getImpulse(o, z)); - simd::float_4 y2 = simd::float_4::load(getImpulse(o + 1, z)); - simd::float_4 y3 = simd::float_4::load(getImpulse(o + 2, z)); - simd::float_4 ir = cubicInterp(y0, y1, y2, y3, T(t)); - - // Write to double buffer - for (int zz = 0; zz < 4; zz++) { - s.buffer[s.bufferIndex + z + zz] += ir[zz] * x; + float_4 y0 = float_4::load(getImpulse(o - 1, z)); + float_4 y1 = float_4::load(getImpulse(o, z)); + float_4 y2 = float_4::load(getImpulse(o + 1, z)); + float_4 y3 = float_4::load(getImpulse(o + 2, z)); + float_4 y = catmullRomInterpolate(y0, y1, y2, y3, float_4(t)); + y *= magnitude; + + // Write all 4 samples to buffer + for (int zi = 0; zi < 4; zi++) { + out[(z + zi) * stride] += y[zi]; } } } +}; - /** Should be called every frame after inserting any discontinuities. */ - template - T process(State& s) const { - T v = s.buffer[s.bufferIndex]; - s.buffer[s.bufferIndex] = T(0); - s.bufferIndex++; - if (s.bufferIndex >= 2 * Z) { + +/** Buffer that allows reading/writing up to N future elements contiguously. +*/ +template +struct MinBlepBuffer { + T buffer[2 * N] = {}; + int32_t bufferIndex = 0; + + T* startData() { + return &buffer[bufferIndex]; + } + + /** Returns the current element and advances the buffer. + */ + T shift() { + T v = buffer[bufferIndex]; + bufferIndex++; + if (bufferIndex >= N) { // Move second half of buffer to beginning - std::memcpy(s.buffer, s.buffer + 2 * Z, 2 * Z * sizeof(T)); - std::memset(s.buffer + 2 * Z, 0, 2 * Z * sizeof(T)); - s.bufferIndex = 0; + std::memcpy(buffer, buffer + N, N * sizeof(T)); + std::memset(buffer + N, 0, N * sizeof(T)); + bufferIndex = 0; } return v; } + + /** Copies `n` elements to `out` and advances the buffer. + */ + void shiftBuffer(T* out, size_t n) { + std::memcpy(out, buffer + bufferIndex, n * sizeof(T)); + bufferIndex += n; + if (bufferIndex >= N) { + std::memcpy(buffer, buffer + N, N * sizeof(T)); + std::memset(buffer + N, 0, N * sizeof(T)); + bufferIndex = 0; + } + } }; @@ -190,9 +213,8 @@ static MinBlep<16, 16> minBlep; template struct VCOProcessor { T phase = 0.f; - T lastSyncValue = 0.f; + T lastSync = 0.f; T syncDirection = 1.f; - T sqrState = 1.f; T triFilterState = 0.f; OnePoleHighpass dcFilter; @@ -201,9 +223,9 @@ struct VCOProcessor { OnePoleHighpass::State dcFilterStateTri; OnePoleHighpass::State dcFilterStateSin; - MinBlep<16, 16>::State sqrMinBlep; - MinBlep<16, 16>::State sawMinBlep; - MinBlep<16, 16>::State sinMinBlep; + MinBlepBuffer<16*2, T> sqrMinBlep; + MinBlepBuffer<16*2, T> sawMinBlep; + MinBlepBuffer<16*2, T> sinMinBlep; void setSampleTime(float sampleTime) { dcFilter.setCutoff(std::min(0.4f, 20.f * sampleTime)); @@ -231,143 +253,153 @@ struct VCOProcessor { T sin = 0.f; }; - void process(Frame& frame, float deltaTime) { - // Advance phase - T deltaPhase = simd::clamp(frame.freq * deltaTime, 0.f, 0.49f); + void process(Frame& frame, float sampleTime) { + // Compute deltaPhase for full frame + T deltaPhase = simd::clamp(frame.freq * sampleTime, 0.f, 0.49f); if (frame.soft) { - // Reverse direction deltaPhase *= syncDirection; } else { - // Reset back to forward syncDirection = 1.f; } - phase += deltaPhase; - - // Wrap phase - T phaseFloor = simd::floor(phase); - phase -= phaseFloor; - if (frame.sqrEnabled || frame.triEnabled) { - // Jump sqr when phase crosses 1, or crosses 0 if running backwards - T wrapMask = (phaseFloor != 0.f); - int wrapM = simd::movemask(wrapMask); - if (wrapM) { - T wrapPhase = (syncDirection == -1.f) & 1.f; - T wrapCrossing = (wrapPhase - (phase - deltaPhase)) / deltaPhase; - for (int i = 0; i < frame.channels; i++) { - if (wrapM & (1 << i)) { - T mask = simd::movemaskInverse(1 << i); - // TODO: MinBlepGenerator::insertDiscontinuity() should handle subframes outside the range -1 < p <= 0 instead of failing silently. - float p = clamp(wrapCrossing[i], 0.f, 1.f); - T x = mask & (2.f * syncDirection); - minBlep.insertDiscontinuity(sqrMinBlep, p, x); - } + T prevPhase = phase; + const float pwMin = 0.01f; + T pulseWidth = simd::clamp(frame.pulseWidth, pwMin, 1.f - pwMin); + + // Inserts minBLEP for each channel where mask is true. + // Serial but does nothing if mask is all zero. + auto insertMinBlep = [&](T mask, T p, T magnitude, MinBlepBuffer<16*2, T>& minBlepBuffer) { + int m = simd::movemask(mask); + if (!m) + return; + float* buffer = (float*) minBlepBuffer.startData(); + for (int i = 0; i < frame.channels; i++) { + if (m & (1 << i)) { + minBlep.insertDiscontinuity(&buffer[i], 4, p[i], magnitude[i]); } } - sqrState = simd::ifelse(wrapMask, syncDirection, sqrState); - - // Pulse width - const float pwMin = 0.01f; - T pulseWidth = simd::clamp(frame.pulseWidth, pwMin, 1.f - pwMin); - - // Jump sqr when crossing `pulseWidth` - T pwMask = (syncDirection == sqrState) & ((syncDirection == 1.f) ^ (phase < pulseWidth)); - int pw = simd::movemask(pwMask); - if (pw) { - T pulseCrossing = (pulseWidth - (phase - deltaPhase)) / deltaPhase; - for (int i = 0; i < frame.channels; i++) { - if (pw & (1 << i)) { - T mask = simd::movemaskInverse(1 << i); - float p = clamp(pulseCrossing[i], 0.f, 1.f); - T x = mask & (-2.f * syncDirection); - minBlep.insertDiscontinuity(sqrMinBlep, p, x); - } - } + }; + + // Computes subsample time where phase crosses threshold. + auto getCrossing = [](T threshold, T startPhase, T endPhase, T startSubsample, T endSubsample) -> T { + // Wrap threshold mod 1 to be in (startPhase, startPhase + 1] + threshold -= simd::floor(threshold - startPhase); + // Map to (0, 1] + T p = (threshold - startPhase) / (endPhase - startPhase); + // Map to (startSubsample, endSubsample] + return startSubsample + p * (endSubsample - startSubsample); + }; + + // Processes wrap/pulse/saw crossings between startPhase and endPhase. + // startSubsample and endSubsample define the time range within the frame. + // channelMask limits processing to specific channels. + auto processCrossings = [&](T startPhase, T endPhase, T startSubsample, T endSubsample, T channelMask) { + if (frame.sqrEnabled || frame.triEnabled) { + // Wrap crossing (phase crosses 0 mod 1) + T wrapSubsample = getCrossing(1.f, startPhase, endPhase, startSubsample, endSubsample); + T wrapMask = channelMask & (startSubsample < wrapSubsample) & (wrapSubsample <= endSubsample); + insertMinBlep(wrapMask, wrapSubsample, 2.f * syncDirection, sqrMinBlep); + + // Pulse width crossing + T pulseSubsample = getCrossing(pulseWidth, startPhase, endPhase, startSubsample, endSubsample); + T pulseMask = channelMask & (startSubsample < pulseSubsample) & (pulseSubsample <= endSubsample); + insertMinBlep(pulseMask, pulseSubsample, -2.f * syncDirection, sqrMinBlep); } - sqrState = simd::ifelse(pwMask, -syncDirection, sqrState); - } - if (frame.sawEnabled) { - // Jump saw when crossing 0.5 - T halfCrossing = (0.5f - (phase - deltaPhase)) / deltaPhase; - int halfMask = simd::movemask((0 < halfCrossing) & (halfCrossing <= 1.f)); - if (halfMask) { - for (int i = 0; i < frame.channels; i++) { - if (halfMask & (1 << i)) { - T mask = simd::movemaskInverse(1 << i); - float p = halfCrossing[i]; - T x = mask & (-2.f * syncDirection); - minBlep.insertDiscontinuity(sawMinBlep, p, x); - } - } + if (frame.sawEnabled) { + // Saw crossing at 0.5 + T sawSubsample = getCrossing(0.5f, startPhase, endPhase, startSubsample, endSubsample); + T sawMask = channelMask & (startSubsample < sawSubsample) & (sawSubsample <= endSubsample); + insertMinBlep(sawMask, sawSubsample, -2.f * syncDirection, sawMinBlep); } + }; + + // Main logic + if (!frame.syncEnabled) { + // No sync. Process full frame + T endPhase = prevPhase + deltaPhase; + processCrossings(prevPhase, endPhase, 0.f, 1.f, T::mask()); + phase = endPhase; } + else { + // Compute sync subsample position + T deltaSync = frame.sync - lastSync; + T syncSubsample = -lastSync / deltaSync; + lastSync = frame.sync; + T syncOccurred = (0.f < syncSubsample) & (syncSubsample <= 1.f) & (frame.sync >= 0.f); + T noSync = ~syncOccurred; + + if (simd::movemask(noSync)) { + // No sync for these channels. Process full frame + T endPhase = prevPhase + deltaPhase; + processCrossings(prevPhase, endPhase, 0.f, 1.f, noSync); + phase = simd::ifelse(noSync, endPhase, phase); + } + + if (simd::movemask(syncOccurred)) { + // Process crossings before sync + T syncPhase = prevPhase + deltaPhase * syncSubsample; + processCrossings(prevPhase, syncPhase, 0.f, syncSubsample, syncOccurred); - // Detect sync - if (frame.syncEnabled) { - T deltaSync = frame.sync - lastSyncValue; - // Sample position where sync crosses 0. - // Might be NAN or outside of (0, 1] range - T syncCrossing = -lastSyncValue / deltaSync; - lastSyncValue = frame.sync; - T sync = (0.f < syncCrossing) & (syncCrossing <= 1.f) & (frame.sync >= 0.f); - int syncMask = simd::movemask(sync); - if (syncMask) { if (frame.soft) { - syncDirection = simd::ifelse(sync, -syncDirection, syncDirection); + // Soft sync: Reverse direction, continue from syncPhase + syncDirection = simd::ifelse(syncOccurred, -syncDirection, syncDirection); + T endPhase = syncPhase + (-deltaPhase) * (1.f - syncSubsample); + processCrossings(syncPhase, endPhase, syncSubsample, 1.f, syncOccurred); + phase = simd::ifelse(syncOccurred, endPhase, phase); } else { - T newPhase = simd::ifelse(sync, (1.f - syncCrossing) * deltaPhase, phase); - // Insert minBLEP for sync - for (int i = 0; i < frame.channels; i++) { - if (syncMask & (1 << i)) { - T mask = simd::movemaskInverse(1 << i); - float p = syncCrossing[i]; - if (frame.sqrEnabled || frame.triEnabled) { - // Assume that hard-syncing a square always resets it to HIGH - T x = mask & (1.f - sqrState); - sqrState = simd::ifelse(mask, 1.f, sqrState); - minBlep.insertDiscontinuity(sqrMinBlep, p, x); - DEBUG("%f %f %f", p, x[i], sqrState[i]); - } - if (frame.sawEnabled) { - T x = mask & (saw(newPhase) - saw(phase)); - minBlep.insertDiscontinuity(sawMinBlep, p, x); - } - if (frame.sinEnabled) { - T x = mask & (sin(newPhase) - sin(phase)); - minBlep.insertDiscontinuity(sinMinBlep, p, x); - } - } + // Hard sync: reset to 0, insert discontinuities + if (frame.sqrEnabled || frame.triEnabled) { + // Sqr jumps from current state to +1 + T sqrJump = (syncPhase - simd::floor(syncPhase) < pulseWidth); + insertMinBlep(syncOccurred & sqrJump, syncSubsample, 2.f, sqrMinBlep); + } + if (frame.triEnabled) { + // TODO Insert minBLEP to Tri + } + if (frame.sawEnabled) { + // Saw jumps from saw(syncPhase) to saw(0) = 0 + insertMinBlep(syncOccurred, syncSubsample, -saw(syncPhase), sawMinBlep); + } + if (frame.sinEnabled) { + // sin jumps from sin(syncPhase) to sin(0) = 0 + insertMinBlep(syncOccurred, syncSubsample, -sin(syncPhase), sinMinBlep); } - phase = newPhase; + + // Process crossings after sync (starting from phase 0) + T endPhase = deltaPhase * (1.f - syncSubsample); + processCrossings(0.f, endPhase, syncSubsample, 1.f, syncOccurred); + phase = simd::ifelse(syncOccurred, endPhase, phase); } } } - // Saw + // Wrap phase to [0, 1) + phase -= simd::floor(phase); + + // Generate outputs if (frame.sawEnabled) { frame.saw = saw(phase); - frame.saw += minBlep.process(sawMinBlep); + frame.saw += sawMinBlep.shift(); frame.saw = dcFilter.process(dcFilterStateSaw, frame.saw); } - // Square if (frame.sqrEnabled || frame.triEnabled) { - frame.sqr = sqrState; - frame.sqr += minBlep.process(sqrMinBlep); + T sqrHigh = (phase < pulseWidth) ^ syncDirection; + frame.sqr = simd::ifelse(sqrHigh, 1.f, -1.f); + frame.sqr += sqrMinBlep.shift(); T triSqr = frame.sqr; frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr); - // Tri if (frame.triEnabled) { // Integrate square wave const float triShape = 0.2f; - T triFreq = deltaTime * triShape * frame.freq; - // T alpha = 1.f - simd::exp(-2.f * M_PI * triFreq); + T triFreq = sampleTime * triShape * frame.freq; // Use bilinear transform to derive alpha T alpha = 1 / (1 + 1 / (M_PI * triFreq)); + // T alpha = 1.f - simd::exp(-2.f * M_PI * triFreq); triFilterState += alpha * (triSqr - triFilterState); // Apply gain to roughly have unit amplitude at 0.5 pulseWidth. Depends on triShape. frame.tri = triFilterState * 6.6f; @@ -375,10 +407,9 @@ struct VCOProcessor { } } - // Sin if (frame.sinEnabled) { frame.sin = sin(phase); - frame.sin += minBlep.process(sinMinBlep); + frame.sin += sinMinBlep.shift(); frame.sin = dcFilter.process(dcFilterStateSin, frame.sin); } } From a3bc9a9e4b453c2fd76e2772d8402e028c59e3b2 Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Thu, 1 Jan 2026 23:52:41 -0500 Subject: [PATCH 05/18] VCO: Fix square glitching when pulse width changes. --- src/VCO.cpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/VCO.cpp b/src/VCO.cpp index bd5c7e1..cf0f2b7 100644 --- a/src/VCO.cpp +++ b/src/VCO.cpp @@ -213,8 +213,12 @@ static MinBlep<16, 16> minBlep; template struct VCOProcessor { T phase = 0.f; - T lastSync = 0.f; + /** 1 for forward, -1 for backward. */ T syncDirection = 1.f; + T lastSync = 0.f; + /** 1 or -1 */ + T lastSqrState = 1.f; + /** Leaky integrator result. */ T triFilterState = 0.f; OnePoleHighpass dcFilter; @@ -315,7 +319,14 @@ struct VCOProcessor { } }; - // Main logic + // Check if square value changed due to pulseWidth changing since last frame + if (frame.sqrEnabled || frame.triEnabled) { + T sqrState = simd::ifelse(prevPhase < pulseWidth, 1.f, -1.f); + T changed = (sqrState != lastSqrState); + T magnitude = (sqrState - lastSqrState) * syncDirection; + insertMinBlep(changed, 1e-6f, magnitude, sqrMinBlep); + } + if (!frame.syncEnabled) { // No sync. Process full frame T endPhase = prevPhase + deltaPhase; @@ -387,8 +398,8 @@ struct VCOProcessor { } if (frame.sqrEnabled || frame.triEnabled) { - T sqrHigh = (phase < pulseWidth) ^ syncDirection; - frame.sqr = simd::ifelse(sqrHigh, 1.f, -1.f); + frame.sqr = simd::ifelse(phase < pulseWidth, 1.f, -1.f); + lastSqrState = frame.sqr; frame.sqr += sqrMinBlep.shift(); T triSqr = frame.sqr; frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr); From dec738ffde090bd4d912218c7551739040604dd1 Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Fri, 2 Jan 2026 01:59:46 -0500 Subject: [PATCH 06/18] VCO: Fix minBLEP cross detection for backwards phase and narrow pulse width. --- src/VCO.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/VCO.cpp b/src/VCO.cpp index cf0f2b7..e987b97 100644 --- a/src/VCO.cpp +++ b/src/VCO.cpp @@ -286,12 +286,13 @@ struct VCOProcessor { }; // Computes subsample time where phase crosses threshold. - auto getCrossing = [](T threshold, T startPhase, T endPhase, T startSubsample, T endSubsample) -> T { - // Wrap threshold mod 1 to be in (startPhase, startPhase + 1] - threshold -= simd::floor(threshold - startPhase); - // Map to (0, 1] - T p = (threshold - startPhase) / (endPhase - startPhase); - // Map to (startSubsample, endSubsample] + auto getCrossing = [](T thresholdPhase, T startPhase, T endPhase, T startSubsample, T endSubsample) -> T { + T delta = endPhase - startPhase; + T diff = thresholdPhase - startPhase; + // Forward: wrap thresholdPhase to (startPhase, startPhase+1] + // Backward: wrap thresholdPhase to [startPhase-1, startPhase) + thresholdPhase -= simd::ifelse(delta >= 0.f, simd::floor(diff), simd::ceil(diff)); + T p = (thresholdPhase - startPhase) / delta; return startSubsample + p * (endSubsample - startSubsample); }; @@ -300,19 +301,19 @@ struct VCOProcessor { // channelMask limits processing to specific channels. auto processCrossings = [&](T startPhase, T endPhase, T startSubsample, T endSubsample, T channelMask) { if (frame.sqrEnabled || frame.triEnabled) { - // Wrap crossing (phase crosses 0 mod 1) + // Insert minBLEP to square when phase crosses 0 (mod 1) T wrapSubsample = getCrossing(1.f, startPhase, endPhase, startSubsample, endSubsample); T wrapMask = channelMask & (startSubsample < wrapSubsample) & (wrapSubsample <= endSubsample); insertMinBlep(wrapMask, wrapSubsample, 2.f * syncDirection, sqrMinBlep); - // Pulse width crossing + // Insert minBLEP to square when phase crosses pulse width T pulseSubsample = getCrossing(pulseWidth, startPhase, endPhase, startSubsample, endSubsample); T pulseMask = channelMask & (startSubsample < pulseSubsample) & (pulseSubsample <= endSubsample); insertMinBlep(pulseMask, pulseSubsample, -2.f * syncDirection, sqrMinBlep); } if (frame.sawEnabled) { - // Saw crossing at 0.5 + // Insert minBLEP to saw when crossing 0.5 T sawSubsample = getCrossing(0.5f, startPhase, endPhase, startSubsample, endSubsample); T sawMask = channelMask & (startSubsample < sawSubsample) & (sawSubsample <= endSubsample); insertMinBlep(sawMask, sawSubsample, -2.f * syncDirection, sawMinBlep); From ebfd4fc64a2264f56e208ef3872f86b24fa3fc6c Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Fri, 2 Jan 2026 04:30:33 -0500 Subject: [PATCH 07/18] VCO: Fix sine phase. --- src/VCO.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/VCO.cpp b/src/VCO.cpp index e987b97..65d7e20 100644 --- a/src/VCO.cpp +++ b/src/VCO.cpp @@ -324,7 +324,7 @@ struct VCOProcessor { if (frame.sqrEnabled || frame.triEnabled) { T sqrState = simd::ifelse(prevPhase < pulseWidth, 1.f, -1.f); T changed = (sqrState != lastSqrState); - T magnitude = (sqrState - lastSqrState) * syncDirection; + T magnitude = sqrState - lastSqrState; insertMinBlep(changed, 1e-6f, magnitude, sqrMinBlep); } @@ -339,7 +339,8 @@ struct VCOProcessor { T deltaSync = frame.sync - lastSync; T syncSubsample = -lastSync / deltaSync; lastSync = frame.sync; - T syncOccurred = (0.f < syncSubsample) & (syncSubsample <= 1.f) & (frame.sync >= 0.f); + // Check if sync rises through 0 + T syncOccurred = (0.f < syncSubsample) & (syncSubsample <= 1.f) & (frame.sync >= lastSync); T noSync = ~syncOccurred; if (simd::movemask(noSync)) { @@ -353,6 +354,8 @@ struct VCOProcessor { // Process crossings before sync T syncPhase = prevPhase + deltaPhase * syncSubsample; processCrossings(prevPhase, syncPhase, 0.f, syncSubsample, syncOccurred); + // Wrap sync phase + syncPhase -= simd::floor(syncPhase); if (frame.soft) { // Soft sync: Reverse direction, continue from syncPhase @@ -362,10 +365,10 @@ struct VCOProcessor { phase = simd::ifelse(syncOccurred, endPhase, phase); } else { - // Hard sync: reset to 0, insert discontinuities + // Hard sync: Reset phase from syncPhase to 0 at syncSubsample, insert discontinuities if (frame.sqrEnabled || frame.triEnabled) { - // Sqr jumps from current state to +1 - T sqrJump = (syncPhase - simd::floor(syncPhase) < pulseWidth); + // Check if square jumps from -1 to +1 + T sqrJump = (syncPhase >= pulseWidth); insertMinBlep(syncOccurred & sqrJump, syncSubsample, 2.f, sqrMinBlep); } if (frame.triEnabled) { @@ -439,7 +442,7 @@ struct VCOProcessor { return 1 - 4 * simd::fmin(simd::fabs(phase - 0.25f), simd::fabs(phase - 1.25f)); } static T sin(T phase) { - return sin_pi_9(2 * phase - 1); + return sin_pi_9(1 - 2 * phase); } }; From c8ccf9162b1d06ca17fcca2cef2351ad527407c0 Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Fri, 2 Jan 2026 04:44:34 -0500 Subject: [PATCH 08/18] VCO: Fix sync triggering when falling through 0. --- src/VCO.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/VCO.cpp b/src/VCO.cpp index 65d7e20..6a612bc 100644 --- a/src/VCO.cpp +++ b/src/VCO.cpp @@ -340,7 +340,7 @@ struct VCOProcessor { T syncSubsample = -lastSync / deltaSync; lastSync = frame.sync; // Check if sync rises through 0 - T syncOccurred = (0.f < syncSubsample) & (syncSubsample <= 1.f) & (frame.sync >= lastSync); + T syncOccurred = (0.f < syncSubsample) & (syncSubsample <= 1.f) & (deltaSync >= 0.f); T noSync = ~syncOccurred; if (simd::movemask(noSync)) { From 6e908fd93b716f1249d2de4fee7d51677bdbe14b Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Sun, 4 Jan 2026 02:14:05 -0500 Subject: [PATCH 09/18] VCO: Use linear interpolation instead of Catmull-Rom. Generate minBLEP impulse with doubles instead of floats. --- src/VCO.cpp | 166 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 136 insertions(+), 30 deletions(-) diff --git a/src/VCO.cpp b/src/VCO.cpp index 6a612bc..71f6c0a 100644 --- a/src/VCO.cpp +++ b/src/VCO.cpp @@ -13,18 +13,6 @@ inline T sin_pi_9(T x) { return x * (T(1) - x2) * (T(3.141521108990) + x2 * (T(-2.024773594411) + x2 * (T(0.517493161073) + x2 * T(-0.063691343423)))); } -/** Catmull-Rom cubic interpolation -t is the fractional position between y1 and y2. -*/ -template -T catmullRomInterpolate(T y0, T y1, T y2, T y3, T t) { - T t2 = t * t; - T t3 = t2 * t; - return y1 + T(0.5) * t * (y2 - y0) - + t2 * (y0 - T(2.5)*y1 + T(2)*y2 - T(0.5)*y3) - + t3 * (T(-0.5)*y0 + T(1.5)*y1 - T(1.5)*y2 + T(0.5)*y3); -} - /** One-pole low-pass filter, exponential moving average. -6 dB/octave slope. Useful for leaky integrators and smoothing control signals. @@ -90,37 +78,157 @@ struct OnePoleHighpass : OnePoleLowpass { }; +/** Simple radix-2 Cooley-Tukey FFT. n must be a power of 2. */ +template +void fft(std::complex* x, int n, bool inverse = false) { + // Bit reversal permutation + for (int i = 1, j = 0; i < n; i++) { + int bit = n >> 1; + for (; j & bit; bit >>= 1) + j ^= bit; + j ^= bit; + if (i < j) + std::swap(x[i], x[j]); + } + + // Cooley-Tukey butterflies + for (int len = 2; len <= n; len <<= 1) { + T ang = (inverse ? 1 : -1) * 2 * M_PI / len; + std::complex wn(std::cos(ang), std::sin(ang)); + for (int i = 0; i < n; i += len) { + std::complex w(1); + for (int j = 0; j < len / 2; j++) { + std::complex u = x[i + j]; + std::complex v = x[i + j + len / 2] * w; + x[i + j] = u + v; + x[i + j + len / 2] = u - v; + w *= wn; + } + } + } + + if (inverse) { + for (int i = 0; i < n; i++) + x[i] /= T(n); + } +} + + +template +inline T blackmanHarris(T p) { + return + + T(0.35875) + - T(0.48829) * simd::cos(T(2 * M_PI) * p) + + T(0.14128) * simd::cos(T(4 * M_PI) * p) + - T(0.01168) * simd::cos(T(6 * M_PI) * p); +} + + +/** Computes the minimum-phase bandlimited step (minBLEP), an impulse step response that rises from 0 to 1. +Z: number of zero-crossings on each side of the original symmetric sync signal +O: oversample factor +output: must be length `(2 * Z) * O`. + +Algorithm from "Hard Sync Without Aliasing" by Eli Brandt (2001). +https://www.cs.cmu.edu/~eli/papers/icmc01-hardsync.pdf +*/ +inline void minBlepImpulse(int z, int o, float* output) { + // Symmetric sinc impulse with `z` zero-crossings on each side + int n = 2 * z * o; + std::complex* x = new std::complex[n]; + for (int i = 0; i < n; i++) { + double p = (double) i / o - z; + x[i] = (p == 0.0) ? 1.0 : std::sin(M_PI * p) / (M_PI * p); + } + + // Apply window + for (int i = 0; i < n; i++) { + x[i] *= blackmanHarris(i / (double) (n - 1)); + } + + // Reconstruct impulse response with minimum phase + fft(x, n); + + // Take log of magnitude and set phase to zero. + // Limit frequency bin to e^-10 + for (int i = 0; i < n; i++) { + x[i] = std::log(std::abs(x[i])); + } + + + // Transform to cepstral domain. + fft(x, n, true); + + // Apply minimum-phase window in cepstral domain + // Double positive quefrencies, zero negative quefrencies + for (int i = 1; i < n / 2; i++) { + x[i] *= 2.0; + } + for (int i = n / 2; i < n; i++) { + x[i] = 0.0; + } + + // Transform back to frequency domain + fft(x, n); + + // Take complex exponential of each bin + for (int i = 0; i < n; i++) { + x[i] = std::exp(x[i]); + } + + // Transform to time domain + fft(x, n, true); + + // Integrate + double total = 0.0; + for (int i = 0; i < n; i++) { + total += x[i].real(); + x[i] = total; + } + + // Renormalize so last sample is 1 + for (int i = 0; i < n; i++) { + output[i] = x[i].real() / total; + } + + FILE* f = fopen("plugins/Fundamental/minblep.txt", "w"); + DEFER({fclose(f);}); + for (int i = 0; i < n; i++) { + fprintf(f, "%.12f\n", output[i]); + } + + delete[] x; +} + + /** Holds a precomputed minBLEP impulse response, reordered to efficiently insert into an audio buffer. */ template struct MinBlep { - /** Reordered impulse response for cubic interpolation, minus 1.0. - Z dimension has +1 padding at start (for z-1 wrap) and +4 at end (for SIMD + z+1 wrap). - Access via getImpulse() which handles O wrapping and Z offset. + /** Reordered impulse response for linear interpolation, minus 1.0. + Z dimension has +4 padding at end for SIMD and o+1 wrap. + Access via getImpulse() which handles O wrapping. */ - float impulseReordered[O][1 + 2 * Z + 4] = {}; + float impulseReordered[O][2 * Z + 4] = {}; MinBlep() { float impulse[2 * Z][O]; - dsp::minBlepImpulse(Z, O, &impulse[0][0]); + minBlepImpulse(Z, O, &impulse[0][0]); // Fill impulseReordered with transposed and pre-subtracted data - // Storage index = z + 1 (to allow z = -1 access at index 0) for (int o = 0; o < O; o++) { - // z = -1: before impulse starts - impulseReordered[o][0] = -1.f; // z = 0 to 2*Z-1: actual impulse data for (int z = 0; z < 2 * Z; z++) { - impulseReordered[o][1 + z] = impulse[z][o] - 1.f; + impulseReordered[o][z] = impulse[z][o] - 1.f; } // z = 2*Z to 2*Z+3: after impulse ends (for SIMD padding) for (int z = 2 * Z; z < 2 * Z + 4; z++) { - impulseReordered[o][1 + z] = 0.f; + impulseReordered[o][z] = 0.f; } } } - /** Get pointer to impulse data, handling o wrapping and z offset. */ + /** Get pointer to impulse data, handling o wrapping. */ const float* getImpulse(int o, int z) const { z += o / O; o = o % O; @@ -129,8 +237,8 @@ struct MinBlep { o += O; } // assert(0 <= o && o < O); - // assert(-1 <= z && z < 2 * Z + 4); - return &impulseReordered[o][z + 1]; + // assert(0 <= z && z < 2 * Z + 4); + return &impulseReordered[o][z]; } /** Places a discontinuity at 0 < p <= 1 relative to the current frame. @@ -149,14 +257,12 @@ struct MinBlep { int o = (int) subsample; float t = subsample - o; - // For each zero crossing, interpolate impulse response from oversample points - using simd::float_4; + // For each zero crossing, linearly interpolate impulse response from oversample points for (int z = 0; z < 2 * Z; z += 4) { - float_4 y0 = float_4::load(getImpulse(o - 1, z)); + using simd::float_4; float_4 y1 = float_4::load(getImpulse(o, z)); float_4 y2 = float_4::load(getImpulse(o + 1, z)); - float_4 y3 = float_4::load(getImpulse(o + 2, z)); - float_4 y = catmullRomInterpolate(y0, y1, y2, y3, float_4(t)); + float_4 y = y1 + t * (y2 - y1); y *= magnitude; // Write all 4 samples to buffer From 2f3892bc71f3662db642dd549af90ceab3c4f997 Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Mon, 5 Jan 2026 01:33:51 -0500 Subject: [PATCH 10/18] VCO: Make triangle output based on phase, not a square integrator. Add minBLAMP impulse to MinBlep class. --- src/VCO.cpp | 186 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 106 insertions(+), 80 deletions(-) diff --git a/src/VCO.cpp b/src/VCO.cpp index 71f6c0a..fda2f17 100644 --- a/src/VCO.cpp +++ b/src/VCO.cpp @@ -179,22 +179,16 @@ inline void minBlepImpulse(int z, int o, float* output) { // Transform to time domain fft(x, n, true); - // Integrate + // Integrate. First sample x[0] should be 0. double total = 0.0; for (int i = 0; i < n; i++) { - total += x[i].real(); - x[i] = total; + output[i] = total; + total += x[i].real() / o; } - // Renormalize so last sample is 1 + // Renormalize so last virtual sample x[n] should be exactly 1 for (int i = 0; i < n; i++) { - output[i] = x[i].real() / total; - } - - FILE* f = fopen("plugins/Fundamental/minblep.txt", "w"); - DEFER({fclose(f);}); - for (int i = 0; i < n; i++) { - fprintf(f, "%.12f\n", output[i]); + output[i] /= total; } delete[] x; @@ -207,61 +201,76 @@ template struct MinBlep { /** Reordered impulse response for linear interpolation, minus 1.0. Z dimension has +4 padding at end for SIMD and o+1 wrap. - Access via getImpulse() which handles O wrapping. */ float impulseReordered[O][2 * Z + 4] = {}; + float rampReordered[O][2 * Z + 4] = {}; MinBlep() { - float impulse[2 * Z][O]; - minBlepImpulse(Z, O, &impulse[0][0]); + float impulse[2 * Z * O]; + minBlepImpulse(Z, O, impulse); + + // Subtract 1 so our step impulse goes from -1 to 0. + for (int i = 0; i < 2*Z*O; i++) { + impulse[i] -= 1.f; + } - // Fill impulseReordered with transposed and pre-subtracted data + // Integrate minBLEP to obtain minBLAMP + float ramp[2 * Z * O]; + double total = 0.0; + for (int i = 0; i < 2*Z*O; i++) { + ramp[i] = total; + total += impulse[i] / O; + } + // Subtract ideal ramp so ramp[0] and the virtual ramp[n] are 0 + for (int i = 0; i < 2*Z*O; i++) { + ramp[i] -= (float) i / (2*Z*O) * total; + } + + // Transpose samples by making z values contiguous for each o for (int o = 0; o < O; o++) { - // z = 0 to 2*Z-1: actual impulse data for (int z = 0; z < 2 * Z; z++) { - impulseReordered[o][z] = impulse[z][o] - 1.f; - } - // z = 2*Z to 2*Z+3: after impulse ends (for SIMD padding) - for (int z = 2 * Z; z < 2 * Z + 4; z++) { - impulseReordered[o][z] = 0.f; + impulseReordered[o][z] = impulse[z * O + o]; } } - } - - /** Get pointer to impulse data, handling o wrapping. */ - const float* getImpulse(int o, int z) const { - z += o / O; - o = o % O; - if (o < 0) { - z -= 1; - o += O; + for (int o = 0; o < O; o++) { + for (int z = 0; z < 2 * Z; z++) { + rampReordered[o][z] = ramp[z * O + o]; + } } - // assert(0 <= o && o < O); - // assert(0 <= z && z < 2 * Z + 4); - return &impulseReordered[o][z]; } - /** Places a discontinuity at 0 < p <= 1 relative to the current frame. + /** Places a discontinuity at 0 < subsample <= 1 relative to the current frame. `out` must have enough space to write 2*Z floats, spaced by `stride` floats. - `p` is the subsample position to insert a discontinuity of `magnitude`. + `subsample` is the subsample position to insert a discontinuity of `magnitude`. For example if a square wave will jump from 1 to -1 in 0.1 frames, use insertDiscontinuity(out, 1, 0.1f, -2.f). - Note: In the deprecated MinBlepGenerator, `p` was in the range (-1, 0], so add 1 to p if updating to MinBlep. + Note: In the deprecated MinBlepGenerator, `subsample` was in the range (-1, 0], so add 1 to subsample if updating to MinBlep. */ - void insertDiscontinuity(float* out, int stride, float p, float magnitude) const { - if (!(0 < p && p <= 1)) + void insertDiscontinuity(float subsample, float magnitude, float* out, int stride = 1) const { + insert(impulseReordered, subsample, magnitude, out, stride); + } + + void insertRampDiscontinuity(float subsample, float magnitude, float* out, int stride = 1) const { + insert(rampReordered, subsample, magnitude, out, stride); + } + +private: + void insert(const float table[O][2 * Z + 4], float subsample, float magnitude, float* out, int stride = 1) const { + if (!(0.f < subsample && subsample <= 1.f)) return; // Calculate impulse array index and fractional part - float subsample = (1 - p) * O; - int o = (int) subsample; - float t = subsample - o; + float t = (1.f - subsample) * O; + int o = (int) t; + t -= o; // For each zero crossing, linearly interpolate impulse response from oversample points for (int z = 0; z < 2 * Z; z += 4) { using simd::float_4; - float_4 y1 = float_4::load(getImpulse(o, z)); - float_4 y2 = float_4::load(getImpulse(o + 1, z)); + float_4 y1 = float_4::load(&table[o][z]); + int o2 = (o + 1) % O; + int z2 = z + (o + 1) / O; + float_4 y2 = float_4::load(&table[o2][z2]); float_4 y = y1 + t * (y2 - y1); y *= magnitude; @@ -313,7 +322,10 @@ struct MinBlepBuffer { }; -static MinBlep<16, 16> minBlep; +static const MinBlep<16, 16>& getMinBlep() { + static MinBlep<16, 16> minBlep; + return minBlep; +} template @@ -324,8 +336,6 @@ struct VCOProcessor { T lastSync = 0.f; /** 1 or -1 */ T lastSqrState = 1.f; - /** Leaky integrator result. */ - T triFilterState = 0.f; OnePoleHighpass dcFilter; OnePoleHighpass::State dcFilterStateSqr; @@ -335,6 +345,7 @@ struct VCOProcessor { MinBlepBuffer<16*2, T> sqrMinBlep; MinBlepBuffer<16*2, T> sawMinBlep; + MinBlepBuffer<16*2, T> triMinBlep; MinBlepBuffer<16*2, T> sinMinBlep; void setSampleTime(float sampleTime) { @@ -379,14 +390,26 @@ struct VCOProcessor { // Inserts minBLEP for each channel where mask is true. // Serial but does nothing if mask is all zero. - auto insertMinBlep = [&](T mask, T p, T magnitude, MinBlepBuffer<16*2, T>& minBlepBuffer) { + auto insertDiscontinuity = [&](T mask, T subsample, T magnitude, MinBlepBuffer<16*2, T>& buffer) { int m = simd::movemask(mask); if (!m) return; - float* buffer = (float*) minBlepBuffer.startData(); for (int i = 0; i < frame.channels; i++) { if (m & (1 << i)) { - minBlep.insertDiscontinuity(&buffer[i], 4, p[i], magnitude[i]); + float* x = (float*) buffer.startData(); + getMinBlep().insertDiscontinuity(subsample[i], magnitude[i], &x[i], 4); + } + } + }; + + auto insertRampDiscontinuity = [&](T mask, T subsample, T magnitude, MinBlepBuffer<16*2, T>& buffer) { + int m = simd::movemask(mask); + if (!m) + return; + for (int i = 0; i < frame.channels; i++) { + if (m & (1 << i)) { + float* x = (float*) buffer.startData(); + getMinBlep().insertRampDiscontinuity(subsample[i], magnitude[i], &x[i], 4); } } }; @@ -406,32 +429,43 @@ struct VCOProcessor { // startSubsample and endSubsample define the time range within the frame. // channelMask limits processing to specific channels. auto processCrossings = [&](T startPhase, T endPhase, T startSubsample, T endSubsample, T channelMask) { - if (frame.sqrEnabled || frame.triEnabled) { + if (frame.sqrEnabled) { // Insert minBLEP to square when phase crosses 0 (mod 1) T wrapSubsample = getCrossing(1.f, startPhase, endPhase, startSubsample, endSubsample); - T wrapMask = channelMask & (startSubsample < wrapSubsample) & (wrapSubsample <= endSubsample); - insertMinBlep(wrapMask, wrapSubsample, 2.f * syncDirection, sqrMinBlep); + T mask = channelMask & (startSubsample < wrapSubsample) & (wrapSubsample <= endSubsample); + insertDiscontinuity(mask, wrapSubsample, 2.f * syncDirection, sqrMinBlep); // Insert minBLEP to square when phase crosses pulse width T pulseSubsample = getCrossing(pulseWidth, startPhase, endPhase, startSubsample, endSubsample); - T pulseMask = channelMask & (startSubsample < pulseSubsample) & (pulseSubsample <= endSubsample); - insertMinBlep(pulseMask, pulseSubsample, -2.f * syncDirection, sqrMinBlep); + mask = channelMask & (startSubsample < pulseSubsample) & (pulseSubsample <= endSubsample); + insertDiscontinuity(mask, pulseSubsample, -2.f * syncDirection, sqrMinBlep); } if (frame.sawEnabled) { // Insert minBLEP to saw when crossing 0.5 T sawSubsample = getCrossing(0.5f, startPhase, endPhase, startSubsample, endSubsample); - T sawMask = channelMask & (startSubsample < sawSubsample) & (sawSubsample <= endSubsample); - insertMinBlep(sawMask, sawSubsample, -2.f * syncDirection, sawMinBlep); + T mask = channelMask & (startSubsample < sawSubsample) & (sawSubsample <= endSubsample); + insertDiscontinuity(mask, sawSubsample, -2.f * syncDirection, sawMinBlep); + } + + if (frame.triEnabled) { + // Insert minBLAMP to tri when crossing 0.25, slope from +4 to -4 + T triSubsample = getCrossing(0.25f, startPhase, endPhase, startSubsample, endSubsample); + T mask = channelMask & (startSubsample < triSubsample) & (triSubsample <= endSubsample); + insertRampDiscontinuity(mask, triSubsample, -8.f * deltaPhase, triMinBlep); + // Insert minBLAMP to tri when crossing 0.75, slope from -4 to +4 + triSubsample = getCrossing(0.75f, startPhase, endPhase, startSubsample, endSubsample); + mask = channelMask & (startSubsample < triSubsample) & (triSubsample <= endSubsample); + insertRampDiscontinuity(mask, triSubsample, 8.f * deltaPhase, triMinBlep); } }; // Check if square value changed due to pulseWidth changing since last frame - if (frame.sqrEnabled || frame.triEnabled) { + if (frame.sqrEnabled) { T sqrState = simd::ifelse(prevPhase < pulseWidth, 1.f, -1.f); - T changed = (sqrState != lastSqrState); T magnitude = sqrState - lastSqrState; - insertMinBlep(changed, 1e-6f, magnitude, sqrMinBlep); + T changed = (magnitude != 0.f); + insertDiscontinuity(changed, 1e-6f, magnitude, sqrMinBlep); } if (!frame.syncEnabled) { @@ -472,21 +506,21 @@ struct VCOProcessor { } else { // Hard sync: Reset phase from syncPhase to 0 at syncSubsample, insert discontinuities - if (frame.sqrEnabled || frame.triEnabled) { + if (frame.sqrEnabled) { // Check if square jumps from -1 to +1 T sqrJump = (syncPhase >= pulseWidth); - insertMinBlep(syncOccurred & sqrJump, syncSubsample, 2.f, sqrMinBlep); - } - if (frame.triEnabled) { - // TODO Insert minBLEP to Tri + insertDiscontinuity(syncOccurred & sqrJump, syncSubsample, 2.f, sqrMinBlep); } if (frame.sawEnabled) { // Saw jumps from saw(syncPhase) to saw(0) = 0 - insertMinBlep(syncOccurred, syncSubsample, -saw(syncPhase), sawMinBlep); + insertDiscontinuity(syncOccurred, syncSubsample, -saw(syncPhase), sawMinBlep); + } + if (frame.triEnabled) { + insertDiscontinuity(syncOccurred, syncSubsample, -tri(syncPhase), triMinBlep); } if (frame.sinEnabled) { // sin jumps from sin(syncPhase) to sin(0) = 0 - insertMinBlep(syncOccurred, syncSubsample, -sin(syncPhase), sinMinBlep); + insertDiscontinuity(syncOccurred, syncSubsample, -sin(syncPhase), sinMinBlep); } // Process crossings after sync (starting from phase 0) @@ -507,25 +541,17 @@ struct VCOProcessor { frame.saw = dcFilter.process(dcFilterStateSaw, frame.saw); } - if (frame.sqrEnabled || frame.triEnabled) { + if (frame.sqrEnabled) { frame.sqr = simd::ifelse(phase < pulseWidth, 1.f, -1.f); lastSqrState = frame.sqr; frame.sqr += sqrMinBlep.shift(); - T triSqr = frame.sqr; frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr); + } - if (frame.triEnabled) { - // Integrate square wave - const float triShape = 0.2f; - T triFreq = sampleTime * triShape * frame.freq; - // Use bilinear transform to derive alpha - T alpha = 1 / (1 + 1 / (M_PI * triFreq)); - // T alpha = 1.f - simd::exp(-2.f * M_PI * triFreq); - triFilterState += alpha * (triSqr - triFilterState); - // Apply gain to roughly have unit amplitude at 0.5 pulseWidth. Depends on triShape. - frame.tri = triFilterState * 6.6f; - frame.tri = dcFilter.process(dcFilterStateTri, frame.tri); - } + if (frame.triEnabled) { + frame.tri = tri(phase); + frame.tri += triMinBlep.shift(); + frame.tri = dcFilter.process(dcFilterStateTri, frame.tri); } if (frame.sinEnabled) { From 81b0db8a37181b567234141d21b5ae398d3250c7 Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Mon, 5 Jan 2026 03:23:18 -0500 Subject: [PATCH 11/18] VCO: When hard or soft syncing and the slope of tri/saw change, insert a minBLAMP. --- src/VCO.cpp | 64 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/src/VCO.cpp b/src/VCO.cpp index fda2f17..d858967 100644 --- a/src/VCO.cpp +++ b/src/VCO.cpp @@ -146,6 +146,7 @@ inline void minBlepImpulse(int z, int o, float* output) { x[i] *= blackmanHarris(i / (double) (n - 1)); } +#if 1 // Reconstruct impulse response with minimum phase fft(x, n); @@ -178,6 +179,7 @@ inline void minBlepImpulse(int z, int o, float* output) { // Transform to time domain fft(x, n, true); +#endif // Integrate. First sample x[0] should be 0. double total = 0.0; @@ -226,6 +228,12 @@ struct MinBlep { ramp[i] -= (float) i / (2*Z*O) * total; } + // FILE* f = fopen("plugins/Fundamental/minblep.txt", "w"); + // for (int i = 0; i < 2*Z*O; i++) { + // fprintf(f, "%.12f\n", ramp[i]); + // } + // fclose(f); + // Transpose samples by making z values contiguous for each o for (int o = 0; o < O; o++) { for (int z = 0; z < 2 * Z; z++) { @@ -250,7 +258,7 @@ struct MinBlep { insert(impulseReordered, subsample, magnitude, out, stride); } - void insertRampDiscontinuity(float subsample, float magnitude, float* out, int stride = 1) const { + void insertSlopeDiscontinuity(float subsample, float magnitude, float* out, int stride = 1) const { insert(rampReordered, subsample, magnitude, out, stride); } @@ -402,14 +410,14 @@ struct VCOProcessor { } }; - auto insertRampDiscontinuity = [&](T mask, T subsample, T magnitude, MinBlepBuffer<16*2, T>& buffer) { + auto insertSlopeDiscontinuity = [&](T mask, T subsample, T magnitude, MinBlepBuffer<16*2, T>& buffer) { int m = simd::movemask(mask); if (!m) return; for (int i = 0; i < frame.channels; i++) { if (m & (1 << i)) { float* x = (float*) buffer.startData(); - getMinBlep().insertRampDiscontinuity(subsample[i], magnitude[i], &x[i], 4); + getMinBlep().insertSlopeDiscontinuity(subsample[i], magnitude[i], &x[i], 4); } } }; @@ -449,20 +457,23 @@ struct VCOProcessor { } if (frame.triEnabled) { - // Insert minBLAMP to tri when crossing 0.25, slope from +4 to -4 + // Insert minBLAMP to tri when crossing 0.25 T triSubsample = getCrossing(0.25f, startPhase, endPhase, startSubsample, endSubsample); T mask = channelMask & (startSubsample < triSubsample) & (triSubsample <= endSubsample); - insertRampDiscontinuity(mask, triSubsample, -8.f * deltaPhase, triMinBlep); + // Slope goes from +4 to -4, so slope jump is -8 * abs(deltaPhase) + insertSlopeDiscontinuity(mask, triSubsample, -8.f * deltaPhase * syncDirection, triMinBlep); + // Insert minBLAMP to tri when crossing 0.75, slope from -4 to +4 triSubsample = getCrossing(0.75f, startPhase, endPhase, startSubsample, endSubsample); mask = channelMask & (startSubsample < triSubsample) & (triSubsample <= endSubsample); - insertRampDiscontinuity(mask, triSubsample, 8.f * deltaPhase, triMinBlep); + // Slope goes from -4 to +4, so slope jump is 8 * abs(deltaPhase) + insertSlopeDiscontinuity(mask, triSubsample, 8.f * deltaPhase * syncDirection, triMinBlep); } }; // Check if square value changed due to pulseWidth changing since last frame if (frame.sqrEnabled) { - T sqrState = simd::ifelse(prevPhase < pulseWidth, 1.f, -1.f); + T sqrState = sqr(prevPhase, pulseWidth); T magnitude = sqrState - lastSqrState; T changed = (magnitude != 0.f); insertDiscontinuity(changed, 1e-6f, magnitude, sqrMinBlep); @@ -499,13 +510,31 @@ struct VCOProcessor { if (frame.soft) { // Soft sync: Reverse direction, continue from syncPhase + + if (frame.sawEnabled) { + // Saw slope reverses + // +2 slope becomes -2 if deltaPhase > 0 + // -2 slope becomes +2 if deltaPhase < 0 + insertSlopeDiscontinuity(syncOccurred, syncSubsample, -4.f * deltaPhase, sawMinBlep); + } + if (frame.triEnabled) { + // Tri slope reverses + // -4 slope becomes +4 if 0.25 < phase < 0.75 + // -4 slope becomes +4 otherwise + T descending = (0.25f < syncPhase) & (syncPhase < 0.75f); + T slopeJump = simd::ifelse(descending, 8.f, -8.f); + insertSlopeDiscontinuity(syncOccurred, syncSubsample, slopeJump * deltaPhase, triMinBlep); + } + syncDirection = simd::ifelse(syncOccurred, -syncDirection, syncDirection); - T endPhase = syncPhase + (-deltaPhase) * (1.f - syncSubsample); + deltaPhase = simd::ifelse(syncOccurred, -deltaPhase, deltaPhase); + T endPhase = syncPhase + deltaPhase * (1.f - syncSubsample); processCrossings(syncPhase, endPhase, syncSubsample, 1.f, syncOccurred); phase = simd::ifelse(syncOccurred, endPhase, phase); } else { // Hard sync: Reset phase from syncPhase to 0 at syncSubsample, insert discontinuities + if (frame.sqrEnabled) { // Check if square jumps from -1 to +1 T sqrJump = (syncPhase >= pulseWidth); @@ -516,11 +545,18 @@ struct VCOProcessor { insertDiscontinuity(syncOccurred, syncSubsample, -saw(syncPhase), sawMinBlep); } if (frame.triEnabled) { + // Tri jumps from tri(syncPhase) to tri(0) = 0 insertDiscontinuity(syncOccurred, syncSubsample, -tri(syncPhase), triMinBlep); + // If descending slope (-4), reset to ascending slope (+4) + T wasDescending = (0.25f < syncPhase) & (syncPhase < 0.75f); + insertSlopeDiscontinuity(syncOccurred & wasDescending, syncSubsample, 8.f * deltaPhase, triMinBlep); } if (frame.sinEnabled) { // sin jumps from sin(syncPhase) to sin(0) = 0 insertDiscontinuity(syncOccurred, syncSubsample, -sin(syncPhase), sinMinBlep); + // Slope changes from sinDerivative(syncPhase) to sinDerivative(0) = 2pi + // T slopeJump = T(2 * M_PI) - sinDerivative(syncPhase); + // insertSlopeDiscontinuity(syncOccurred, syncSubsample, slopeJump * deltaPhase, sinMinBlep); } // Process crossings after sync (starting from phase 0) @@ -542,7 +578,7 @@ struct VCOProcessor { } if (frame.sqrEnabled) { - frame.sqr = simd::ifelse(phase < pulseWidth, 1.f, -1.f); + frame.sqr = sqr(phase, pulseWidth); lastSqrState = frame.sqr; frame.sqr += sqrMinBlep.shift(); frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr); @@ -565,17 +601,25 @@ struct VCOProcessor { return sin(phase); } + static T sqr(T phase, T pulseWidth) { + return simd::ifelse(phase < pulseWidth, 1.f, -1.f); + } static T saw(T phase) { T x = phase + 0.5f; x -= simd::trunc(x); return 2 * x - 1; } static T tri(T phase) { - return 1 - 4 * simd::fmin(simd::fabs(phase - 0.25f), simd::fabs(phase - 1.25f)); + return 1 - 4 * simd::fmin(simd::fabs(phase - 0.25f), 1.25f - phase); } static T sin(T phase) { return sin_pi_9(1 - 2 * phase); } + static T sinDerivative(T phase) { + phase += 0.25f; + phase -= simd::floor(phase); + return T(2 * M_PI) * sin(phase); + } }; From bc4156d2183712e98ae6c6b8067be5ed517dd790 Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Mon, 5 Jan 2026 21:58:19 -0500 Subject: [PATCH 12/18] VCF: Use TPT/ZDF solver with first-order AADA. --- src/VCF.cpp | 250 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 160 insertions(+), 90 deletions(-) diff --git a/src/VCF.cpp b/src/VCF.cpp index 022a77d..5db0615 100644 --- a/src/VCF.cpp +++ b/src/VCF.cpp @@ -4,66 +4,164 @@ using simd::float_4; +/** Approximates tan(x) for x in [0, π*0.49]. +Max error: 1.8e-7 +*/ template -static T clip(T x) { - // return std::tanh(x); - // Pade approximant of tanh - x = simd::clamp(x, -3.f, 3.f); - return x * (27 + x * x) / (27 + 9 * x * x); +inline T tan_5_4(T x) { + T x2 = x * x; + return x * (T(1) + x2 * (T(-0.1122835153) + T(0.00114264086) * x2)) + / (T(1) + x2 * (T(-0.4456155807) + T(0.01634547741) * x2)); } +/** Approximates tanh(x) for x in [-4, 4]. +Max error: 1.1e-5 +*/ +template +inline T tanh_5_6(T x) { + x = simd::clamp(x, T(-4), T(4)); + T x2 = x * x; + return x * (T(1450.73) + x2 * (T(152.494) + T(1.12357) * x2)) + / (T(1450.80) + x2 * (T(635.839) + x2 * (T(19.8913) + T(0.00188) * x2))); +} + + +/** Approximates ln(cosh(x)) for x >= 0. +Max error: 6.2e-5 over [0, 10]. +*/ +template +inline T lncosh_5_3(T x) { + T x2 = x * x; + return x2 * T(0.5) * (T(1) + x * (T(0.5825995649809) + x * (T(0.29698351914621) + x * T(-0.0001879624507617)))) + / (T(1) + x * (T(0.5938203513329) + x * (T(0.4266725633273) + x * T(0.1456088792904)))); +} + + +/** First antiderivative of tanh: F(x) = ln(cosh(x)). +For numerical stability, uses asymptotic form for |x| > 10. +*/ +template +inline T tanh_F1(T x) { + x = simd::abs(x); + // For |x| > 10, ln(cosh(x)) ≈ |x| - ln(2) + T asymptotic = x - T(0.6931471805599453); + T exact = lncosh_5_3(x); + return simd::ifelse(x < T(10), exact, asymptotic); +} + + +/** First-order ADAA (Antiderivative Anti-Aliasing) processor for tanh. +Computes (F(x[n]) - F(x[n-1])) / (x[n] - x[n-1]) where F is the antiderivative. +Falls back to tanh(midpoint) when x[n] ≈ x[n-1] (L'Hôpital's rule). +*/ +template +struct TanhADAA1 { + T xPrev = T(0); + + void reset() { + xPrev = T(0); + } + + T process(T x) { + const T eps = T(1e-5); + T diff = x - xPrev; + + // Normal case: finite difference of antiderivative + T Fx = tanh_F1(x); + T FxPrev = tanh_F1(xPrev); + T adaaResult = (Fx - FxPrev) / diff; + + // Fallback: use tanh at midpoint when diff is small + T midpoint = (x + xPrev) * T(0.5); + T fallbackResult = tanh_5_6(midpoint); + + T y = simd::ifelse(simd::abs(diff) > eps, adaaResult, fallbackResult); + + xPrev = x; + return y; + } +}; + + +/** 4-pole (24dB/oct) transistor ladder filter using TPT/ZDF (Zero-Delay Feedback). +Uses first-order ADAA on tanh saturation. +*/ template struct LadderFilter { - T omega0; - T resonance = 1; T state[4]; - T input; + TanhADAA1 adaa[5]; + + struct Frame { + T input; + /** Normalized cutoff frequency: 0 <= fc/fs < 0.5. */ + T cutoff; + /** Resonance: 0-1, self-oscillation begins around 0.7. */ + T resonance; + + /** 4-pole (24dB/oct) lowpass output. */ + T lowpass4; + /** 4-pole (24dB/oct) highpass output. */ + T highpass4; + }; LadderFilter() { reset(); - setCutoff(0); } void reset() { for (int i = 0; i < 4; i++) { - state[i] = 0; + state[i] = T(0); + } + for (int i = 0; i < 5; i++) { + adaa[i].reset(); } } - void setCutoff(T cutoff) { - omega0 = 2 * T(M_PI) * cutoff; - } - - void process(T input, T dt) { - dsp::stepRK4(T(0), dt, state, 4, [&](T t, const T x[], T dxdt[]) { - T inputt = crossfade(this->input, input, t / dt); - T inputc = clip(inputt - resonance * x[3]); - T yc0 = clip(x[0]); - T yc1 = clip(x[1]); - T yc2 = clip(x[2]); - T yc3 = clip(x[3]); - - dxdt[0] = omega0 * (inputc - yc0); - dxdt[1] = omega0 * (yc0 - yc1); - dxdt[2] = omega0 * (yc1 - yc2); - dxdt[3] = omega0 * (yc2 - yc3); - }); - - this->input = input; - } - - T lowpass() { - return state[3]; - } - T highpass() { - return clip((input - resonance * state[3]) - 4 * state[0] + 6 * state[1] - 4 * state[2] + state[3]); + void process(Frame& frame) { + // Pre-warp cutoff frequency using bilinear transform + T g = tan_5_4(T(M_PI) * frame.cutoff); + // Compute the one-pole lowpass gain coefficient G = g/(1+g) + T G = g / (T(1) + g); + + // Global feedback path + // Apply resonance scaling and soft-clip with tanh (with ADAA) + T feedback = adaa[4].process(frame.resonance * state[3]); + T u = frame.input - feedback; + + // Stage 0 + T sat0 = adaa[0].process(u); + T v0 = G * (sat0 - state[0]); + T y0 = v0 + state[0]; + state[0] = y0 + v0; + + // Stage 1 + T sat1 = adaa[1].process(y0); + T v1 = G * (sat1 - state[1]); + T y1 = v1 + state[1]; + state[1] = y1 + v1; + + // Stage 2 + T sat2 = adaa[2].process(y1); + T v2 = G * (sat2 - state[2]); + T y2 = v2 + state[2]; + state[2] = y2 + v2; + + // Stage 3 + T sat3 = adaa[3].process(y2); + T v3 = G * (sat3 - state[3]); + T y3 = v3 + state[3]; + state[3] = y3 + v3; + + frame.lowpass4 = y3; + + // Binomial expansion (1-H)^4 = 1 - 4H + 6H^2 - 4H^3 + H^4 + // Using saturated input and stage outputs + frame.highpass4 = sat0 - T(4) * y0 + T(6) * y1 - T(4) * y2 + y3; } }; -static const int UPSAMPLE = 2; - struct VCF : Module { enum ParamIds { FREQ_PARAM, @@ -90,9 +188,6 @@ struct VCF : Module { }; LadderFilter filters[4]; - // Upsampler inputUpsampler; - // Decimator lowpassDecimator; - // Decimator highpassDecimator; VCF() { config(NUM_PARAMS, NUM_INPUTS, NUM_OUTPUTS); @@ -100,9 +195,9 @@ struct VCF : Module { // freq = C4 * 2^(10 * param - 5) // or // param = (log2(freq / C4) + 5) / 10 - const float minFreq = (std::log2(dsp::FREQ_C4 / 8000.f) + 5) / 10; - const float maxFreq = (std::log2(8000.f / dsp::FREQ_C4) + 5) / 10; - const float defaultFreq = (0.f + 5) / 10; + const float minFreq = (std::log2(8.f / dsp::FREQ_C4) + 5) / 10; + const float maxFreq = (std::log2(20000.f / dsp::FREQ_C4) + 5) / 10; + const float defaultFreq = (minFreq + maxFreq) / 2.f; configParam(FREQ_PARAM, minFreq, maxFreq, defaultFreq, "Cutoff frequency", " Hz", std::pow(2, 10.f), dsp::FREQ_C4 / std::pow(2, 5.f)); configParam(RES_PARAM, 0.f, 1.f, 0.f, "Resonance", "%", 0.f, 100.f); configParam(RES_CV_PARAM, -1.f, 1.f, 0.f, "Resonance CV", "%", 0.f, 100.f); @@ -127,8 +222,9 @@ struct VCF : Module { } void onReset() override { - for (int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { filters[i].reset(); + } } void process(const ProcessArgs& args) override { @@ -148,63 +244,37 @@ struct VCF : Module { int channels = std::max(1, inputs[IN_INPUT].getChannels()); for (int c = 0; c < channels; c += 4) { - auto& filter = filters[c / 4]; + LadderFilter::Frame frame; + // Input float_4 input = inputs[IN_INPUT].getVoltageSimd(c) / 5.f; - // Drive gain + // Drive float_4 drive = driveParam + inputs[DRIVE_INPUT].getPolyVoltageSimd(c) / 10.f * driveCvParam; - drive = clamp(drive, -1.f, 1.f); + drive = simd::clamp(drive, -1.f, 1.f); float_4 gain = simd::pow(1.f + drive, 5); input *= gain; // Add -120dB noise to bootstrap self-oscillation input += 1e-6f * (2.f * random::uniform() - 1.f); + frame.input = input; - // Set resonance + // Resonance float_4 resonance = resParam + inputs[RES_INPUT].getPolyVoltageSimd(c) / 10.f * resCvParam; - resonance = clamp(resonance, 0.f, 1.f); - filter.resonance = simd::pow(resonance, 2) * 10.f; + resonance = simd::clamp(resonance, 0.f, 1.f); + frame.resonance = resonance * resonance * 10.f; - // Get pitch + // Cutoff frequency float_4 pitch = freqParam + inputs[FREQ_INPUT].getPolyVoltageSimd(c) * freqCvParam; - // Set cutoff float_4 cutoff = dsp::FREQ_C4 * dsp::exp2_taylor5(pitch); - // Without oversampling, we must limit to 8000 Hz or so @ 44100 Hz - cutoff = clamp(cutoff, 1.f, args.sampleRate * 0.18f); - filter.setCutoff(cutoff); - - // Upsample input - // float dt = args.sampleTime / UPSAMPLE; - // float inputBuf[UPSAMPLE]; - // float lowpassBuf[UPSAMPLE]; - // float highpassBuf[UPSAMPLE]; - // inputUpsampler.process(input, inputBuf); - // for (int i = 0; i < UPSAMPLE; i++) { - // // Step the filter - // filter.process(inputBuf[i], dt); - // if (outputs[LPF_OUTPUT].isConnected()) - // lowpassBuf[i] = filter.lowpass(); - // if (outputs[HPF_OUTPUT].isConnected()) - // highpassBuf[i] = filter.highpass(); - // } - - // // Set outputs - // if (outputs[LPF_OUTPUT].isConnected()) { - // outputs[LPF_OUTPUT].setVoltage(5.f * lowpassDecimator.process(lowpassBuf)); - // } - // if (outputs[HPF_OUTPUT].isConnected()) { - // outputs[HPF_OUTPUT].setVoltage(5.f * highpassDecimator.process(highpassBuf)); - // } - - // Set outputs - filter.process(input, args.sampleTime); - if (outputs[LPF_OUTPUT].isConnected()) { - outputs[LPF_OUTPUT].setVoltageSimd(5.f * filter.lowpass(), c); - } - if (outputs[HPF_OUTPUT].isConnected()) { - outputs[HPF_OUTPUT].setVoltageSimd(5.f * filter.highpass(), c); - } + frame.cutoff = simd::clamp(cutoff * args.sampleTime, 0.f, 0.49f); + + // Process + filters[c / 4].process(frame); + + // Outputs + outputs[LPF_OUTPUT].setVoltageSimd(frame.lowpass4 * 5.f, c); + outputs[HPF_OUTPUT].setVoltageSimd(frame.highpass4 * 5.f, c); } outputs[LPF_OUTPUT].setChannels(channels); @@ -212,7 +282,7 @@ struct VCF : Module { } void paramsFromJson(json_t* rootJ) override { - // These attenuators didn't exist in version <2.0, so set to 1 in case they are not overwritten. + // These attenuators didn't exist in version <2, so set to 1 in case they are not overwritten. params[RES_CV_PARAM].setValue(1.f); params[DRIVE_CV_PARAM].setValue(1.f); From ba328ae9cc2316d7716419e5217c619afebf58c2 Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Tue, 6 Jan 2026 01:13:08 -0500 Subject: [PATCH 13/18] VCF: Optimize function approximations. Increase maximum frequency to 22,000 Hz. --- src/VCF.cpp | 67 ++++++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/src/VCF.cpp b/src/VCF.cpp index 5db0615..932a282 100644 --- a/src/VCF.cpp +++ b/src/VCF.cpp @@ -1,53 +1,56 @@ #include "plugin.hpp" -using simd::float_4; - - -/** Approximates tan(x) for x in [0, π*0.49]. -Max error: 1.8e-7 +/** Approximates tan(pi*x) for x in [0, 0.5). +Optimized coefficients for max rel error: 1.18e-05. */ template -inline T tan_5_4(T x) { +inline T tan_pi_1_2(T x) { T x2 = x * x; - return x * (T(1) + x2 * (T(-0.1122835153) + T(0.00114264086) * x2)) - / (T(1) + x2 * (T(-0.4456155807) + T(0.01634547741) * x2)); + T num = T(1) + T(-0.9622845476351338) * x2; + T den = T(1) + x2 * (T(-4.252711329946427) + T(1.0108965067534537) * x2); + return T(M_PI) * x * num / den; } /** Approximates tanh(x) for x in [-4, 4]. -Max error: 1.1e-5 +Optimized coefficients for max rel error: 5.56e-07. */ template -inline T tanh_5_6(T x) { +inline T tanh_2_3(T x) { x = simd::clamp(x, T(-4), T(4)); T x2 = x * x; - return x * (T(1450.73) + x2 * (T(152.494) + T(1.12357) * x2)) - / (T(1450.80) + x2 * (T(635.839) + x2 * (T(19.8913) + T(0.00188) * x2))); + T num = T(1) + x2 * (T(0.11823399425250458) + x2 * T(0.0017593473306865004)); + T den = T(1) + x2 * (T(0.4515649138214517) + x2 * (T(0.01895237471013944) + x2 * T(7.340776670083926e-05))); + return x * num / den; } -/** Approximates ln(cosh(x)) for x >= 0. -Max error: 6.2e-5 over [0, 10]. +/** Approximates ln(cosh(x)) for all x. +Optimized coefficients for max deriv rel error: 2.18e-04. +Max func abs error: 2.33e-04. */ template -inline T lncosh_5_3(T x) { +inline T ln_cosh_3_3(T x) { + x = simd::abs(x); T x2 = x * x; - return x2 * T(0.5) * (T(1) + x * (T(0.5825995649809) + x * (T(0.29698351914621) + x * T(-0.0001879624507617)))) - / (T(1) + x * (T(0.5938203513329) + x * (T(0.4266725633273) + x * T(0.1456088792904)))); + T num = T(1) + x * (T(0.5536821172234367) + x * (T(0.25181887093756017) + x * T(-0.00022483065845568806))); + T den = T(1) + x * (T(0.5571167780242312) + x * (T(0.4011383699340152) + x * T(0.12250932588907415))); + return x2 * T(0.5) * num / den; } -/** First antiderivative of tanh: F(x) = ln(cosh(x)). -For numerical stability, uses asymptotic form for |x| > 10. +/** Approximates ln(cosh(x)) for all x. +Optimized coefficients for max deriv rel error: 1.73e-05. +Max func abs error: 1.59e-05. */ template -inline T tanh_F1(T x) { +inline T ln_cosh_3_4(T x) { x = simd::abs(x); - // For |x| > 10, ln(cosh(x)) ≈ |x| - ln(2) - T asymptotic = x - T(0.6931471805599453); - T exact = lncosh_5_3(x); - return simd::ifelse(x < T(10), exact, asymptotic); + T x2 = x * x; + T num = T(1) + x * (T(0.6818733052030992) + x * (T(0.3310714761981197) + x * T(0.07328250587472884))); + T den = T(1) + x * (T(0.6818667996836973) + x * (T(0.4970931090131887) + x * (T(0.18905704720922234) + x * T(0.0366985697841935)))); + return x2 * T(0.5) * num / den; } @@ -68,13 +71,13 @@ struct TanhADAA1 { T diff = x - xPrev; // Normal case: finite difference of antiderivative - T Fx = tanh_F1(x); - T FxPrev = tanh_F1(xPrev); + T Fx = ln_cosh_3_4(x); + T FxPrev = ln_cosh_3_4(xPrev); T adaaResult = (Fx - FxPrev) / diff; // Fallback: use tanh at midpoint when diff is small T midpoint = (x + xPrev) * T(0.5); - T fallbackResult = tanh_5_6(midpoint); + T fallbackResult = tanh_2_3(midpoint); T y = simd::ifelse(simd::abs(diff) > eps, adaaResult, fallbackResult); @@ -120,7 +123,7 @@ struct LadderFilter { void process(Frame& frame) { // Pre-warp cutoff frequency using bilinear transform - T g = tan_5_4(T(M_PI) * frame.cutoff); + T g = tan_pi_1_2(frame.cutoff); // Compute the one-pole lowpass gain coefficient G = g/(1+g) T G = g / (T(1) + g); @@ -156,12 +159,14 @@ struct LadderFilter { frame.lowpass4 = y3; // Binomial expansion (1-H)^4 = 1 - 4H + 6H^2 - 4H^3 + H^4 - // Using saturated input and stage outputs frame.highpass4 = sat0 - T(4) * y0 + T(6) * y1 - T(4) * y2 + y3; } }; +using simd::float_4; + + struct VCF : Module { enum ParamIds { FREQ_PARAM, @@ -196,7 +201,7 @@ struct VCF : Module { // or // param = (log2(freq / C4) + 5) / 10 const float minFreq = (std::log2(8.f / dsp::FREQ_C4) + 5) / 10; - const float maxFreq = (std::log2(20000.f / dsp::FREQ_C4) + 5) / 10; + const float maxFreq = (std::log2(22000.f / dsp::FREQ_C4) + 5) / 10; const float defaultFreq = (minFreq + maxFreq) / 2.f; configParam(FREQ_PARAM, minFreq, maxFreq, defaultFreq, "Cutoff frequency", " Hz", std::pow(2, 10.f), dsp::FREQ_C4 / std::pow(2, 5.f)); configParam(RES_PARAM, 0.f, 1.f, 0.f, "Resonance", "%", 0.f, 100.f); @@ -267,7 +272,7 @@ struct VCF : Module { // Cutoff frequency float_4 pitch = freqParam + inputs[FREQ_INPUT].getPolyVoltageSimd(c) * freqCvParam; float_4 cutoff = dsp::FREQ_C4 * dsp::exp2_taylor5(pitch); - frame.cutoff = simd::clamp(cutoff * args.sampleTime, 0.f, 0.49f); + frame.cutoff = simd::clamp(cutoff * args.sampleTime, 0.f, 0.499f); // Process filters[c / 4].process(frame); From 48c26c5198a842beb29006a03757972efc3428b5 Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Tue, 6 Jan 2026 03:21:36 -0500 Subject: [PATCH 14/18] VCF: Use faster ln(cosh(x)) approximation. Cache previous ADAA antiderivative value. --- src/VCF.cpp | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/src/VCF.cpp b/src/VCF.cpp index 932a282..6dd0e19 100644 --- a/src/VCF.cpp +++ b/src/VCF.cpp @@ -1,8 +1,16 @@ #include "plugin.hpp" +/** Approximates 1/x, using the rcp() instruction and 1 Newton-Raphson refinement */ +template +inline T rcp_newton1(T x) { + T r = simd::rcp(x); + return r * (T(2) - x * r); +} + + /** Approximates tan(pi*x) for x in [0, 0.5). -Optimized coefficients for max rel error: 1.18e-05. +Optimized coefficients for max relative error: 1.18e-05. */ template inline T tan_pi_1_2(T x) { @@ -14,7 +22,7 @@ inline T tan_pi_1_2(T x) { /** Approximates tanh(x) for x in [-4, 4]. -Optimized coefficients for max rel error: 5.56e-07. +Optimized coefficients for max relative error: 5.56e-07. */ template inline T tanh_2_3(T x) { @@ -27,8 +35,8 @@ inline T tanh_2_3(T x) { /** Approximates ln(cosh(x)) for all x. -Optimized coefficients for max deriv rel error: 2.18e-04. -Max func abs error: 2.33e-04. +Optimized coefficients for max derivative relative error: 2.18e-04. +Max absolute error: 2.33e-04. */ template inline T ln_cosh_3_3(T x) { @@ -41,8 +49,8 @@ inline T ln_cosh_3_3(T x) { /** Approximates ln(cosh(x)) for all x. -Optimized coefficients for max deriv rel error: 1.73e-05. -Max func abs error: 1.59e-05. +Optimized coefficients for max derivative relative error: 1.73e-05. +Max absolute error: 1.59e-05. */ template inline T ln_cosh_3_4(T x) { @@ -54,34 +62,31 @@ inline T ln_cosh_3_4(T x) { } -/** First-order ADAA (Antiderivative Anti-Aliasing) processor for tanh. -Computes (F(x[n]) - F(x[n-1])) / (x[n] - x[n-1]) where F is the antiderivative. -Falls back to tanh(midpoint) when x[n] ≈ x[n-1] (L'Hôpital's rule). -*/ +/** First-order ADAA for tanh, caching F(x) between samples. */ template struct TanhADAA1 { T xPrev = T(0); + T FxPrev = T(0); void reset() { xPrev = T(0); + FxPrev = T(0); } T process(T x) { const T eps = T(1e-5); T diff = x - xPrev; - // Normal case: finite difference of antiderivative - T Fx = ln_cosh_3_4(x); - T FxPrev = ln_cosh_3_4(xPrev); + T Fx = ln_cosh_3_3(x); T adaaResult = (Fx - FxPrev) / diff; - // Fallback: use tanh at midpoint when diff is small T midpoint = (x + xPrev) * T(0.5); T fallbackResult = tanh_2_3(midpoint); T y = simd::ifelse(simd::abs(diff) > eps, adaaResult, fallbackResult); xPrev = x; + FxPrev = Fx; return y; } }; @@ -134,24 +139,28 @@ struct LadderFilter { // Stage 0 T sat0 = adaa[0].process(u); + // T sat0 = tanh_2_3(u); T v0 = G * (sat0 - state[0]); T y0 = v0 + state[0]; state[0] = y0 + v0; // Stage 1 T sat1 = adaa[1].process(y0); + // T sat1 = tanh_2_3(y0); T v1 = G * (sat1 - state[1]); T y1 = v1 + state[1]; state[1] = y1 + v1; // Stage 2 T sat2 = adaa[2].process(y1); + // T sat2 = tanh_2_3(y1); T v2 = G * (sat2 - state[2]); T y2 = v2 + state[2]; state[2] = y2 + v2; // Stage 3 T sat3 = adaa[3].process(y2); + // T sat3 = tanh_2_3(y2); T v3 = G * (sat3 - state[3]); T y3 = v3 + state[3]; state[3] = y3 + v3; From cb6b1b9d814625276b7dde752d3ba6f34ad5149d Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Tue, 6 Jan 2026 03:55:31 -0500 Subject: [PATCH 15/18] VCO: Increase maximum frequency to around 21,000 Hz. --- src/VCO.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/VCO.cpp b/src/VCO.cpp index d858967..51811fa 100644 --- a/src/VCO.cpp +++ b/src/VCO.cpp @@ -667,7 +667,7 @@ struct VCO : Module { config(NUM_PARAMS, NUM_INPUTS, NUM_OUTPUTS, NUM_LIGHTS); configSwitch(LINEAR_PARAM, 0.f, 1.f, 0.f, "FM mode", {"1V/octave", "Linear"}); configSwitch(SYNC_PARAM, 0.f, 1.f, 1.f, "Sync mode", {"Soft", "Hard"}); - configParam(FREQ_PARAM, -75.f, 75.f, 0.f, "Frequency", " Hz", dsp::FREQ_SEMITONE, dsp::FREQ_C4); + configParam(FREQ_PARAM, -76.f, 76.f, 0.f, "Frequency", " Hz", dsp::FREQ_SEMITONE, dsp::FREQ_C4); configParam(FM_PARAM, -1.f, 1.f, 0.f, "Frequency modulation", "%", 0.f, 100.f); getParamQuantity(FM_PARAM)->randomizeEnabled = false; configParam(PW_PARAM, 0.01f, 0.99f, 0.5f, "Pulse width", "%", 0.f, 100.f); From 40ae8916387609ac9a563e02f039177fac65ea15 Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Tue, 6 Jan 2026 04:51:19 -0500 Subject: [PATCH 16/18] VCF: Clean up comments. --- src/VCF.cpp | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/VCF.cpp b/src/VCF.cpp index 6dd0e19..daafb61 100644 --- a/src/VCF.cpp +++ b/src/VCF.cpp @@ -62,7 +62,7 @@ inline T ln_cosh_3_4(T x) { } -/** First-order ADAA for tanh, caching F(x) between samples. */ +/** 1st-order antiderivative anti-aliasing (ADAA) for tanh. */ template struct TanhADAA1 { T xPrev = T(0); @@ -92,8 +92,7 @@ struct TanhADAA1 { }; -/** 4-pole (24dB/oct) transistor ladder filter using TPT/ZDF (Zero-Delay Feedback). -Uses first-order ADAA on tanh saturation. +/** 4-pole transistor ladder filter using TPT/ZDF (Zero-Delay Feedback). */ template struct LadderFilter { @@ -102,14 +101,13 @@ struct LadderFilter { struct Frame { T input; - /** Normalized cutoff frequency: 0 <= fc/fs < 0.5. */ + /** Normalized cutoff frequency fc/fs in [0, 0.5) */ T cutoff; - /** Resonance: 0-1, self-oscillation begins around 0.7. */ T resonance; - /** 4-pole (24dB/oct) lowpass output. */ + /** 4-pole lowpass output. */ T lowpass4; - /** 4-pole (24dB/oct) highpass output. */ + /** 4-pole highpass output. */ T highpass4; }; @@ -127,13 +125,13 @@ struct LadderFilter { } void process(Frame& frame) { - // Pre-warp cutoff frequency using bilinear transform + // Pre-warped frequency T g = tan_pi_1_2(frame.cutoff); - // Compute the one-pole lowpass gain coefficient G = g/(1+g) + // Integrator gain T G = g / (T(1) + g); - // Global feedback path - // Apply resonance scaling and soft-clip with tanh (with ADAA) + // Feedback path + // Apply resonance scaling and soft-clip with tanh T feedback = adaa[4].process(frame.resonance * state[3]); T u = frame.input - feedback; @@ -166,8 +164,6 @@ struct LadderFilter { state[3] = y3 + v3; frame.lowpass4 = y3; - - // Binomial expansion (1-H)^4 = 1 - 4H + 6H^2 - 4H^3 + H^4 frame.highpass4 = sat0 - T(4) * y0 + T(6) * y1 - T(4) * y2 + y3; } }; @@ -276,7 +272,7 @@ struct VCF : Module { // Resonance float_4 resonance = resParam + inputs[RES_INPUT].getPolyVoltageSimd(c) / 10.f * resCvParam; resonance = simd::clamp(resonance, 0.f, 1.f); - frame.resonance = resonance * resonance * 10.f; + frame.resonance = simd::pow(resonance, 2) * 10.f; // Cutoff frequency float_4 pitch = freqParam + inputs[FREQ_INPUT].getPolyVoltageSimd(c) * freqCvParam; From 5fe39fa5be340c1eed278d15246c82d0579e4c8f Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Tue, 6 Jan 2026 15:17:41 -0500 Subject: [PATCH 17/18] VCF: Add citations for TPT and ADAA. --- src/VCF.cpp | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/VCF.cpp b/src/VCF.cpp index daafb61..2c343f5 100644 --- a/src/VCF.cpp +++ b/src/VCF.cpp @@ -9,15 +9,15 @@ inline T rcp_newton1(T x) { } -/** Approximates tan(pi*x) for x in [0, 0.5). -Optimized coefficients for max relative error: 1.18e-05. +/** Approximates tan(x) for x in [0, pi*0.5). +Optimized coefficients for max relative error: 2.78e-05. */ template -inline T tan_pi_1_2(T x) { +inline T tan_1_2(T x) { T x2 = x * x; - T num = T(1) + T(-0.9622845476351338) * x2; - T den = T(1) + x2 * (T(-4.252711329946427) + T(1.0108965067534537) * x2); - return T(M_PI) * x * num / den; + T num = T(1) + x2 * T(-0.09776575533683811); + T den = T(1) + x2 * (T(-0.43119539396382) + x2 * T(0.0105011966117302)); + return x * num / den; } @@ -62,7 +62,7 @@ inline T ln_cosh_3_4(T x) { } -/** 1st-order antiderivative anti-aliasing (ADAA) for tanh. */ +/** 1st-order ADAA for tanh. */ template struct TanhADAA1 { T xPrev = T(0); @@ -92,7 +92,19 @@ struct TanhADAA1 { }; -/** 4-pole transistor ladder filter using TPT/ZDF (Zero-Delay Feedback). +/** 4-pole transistor ladder filter using TPT (Topology-Preserving Transform). +Feedback uses previous sample (i.e. 1 sample delayed) to avoid iterative solving. + +For the equivalent state / trapezoidal integrator approach: +Zavalishin, V. "The Art of VA Filter Design". 2018 +https://www.native-instruments.com/fileadmin/ni_media/downloads/pdf/VAFilterDesign_2.1.2.pdf + +For Cytomic's formulation: +https://cytomic.com/files/dsp/SvfLinearTrapOptimised2.pdf + +For antiderivative anti-aliasing (ADAA): +Parker, J., Zavalishin, V., Le Bihan, E. "Reducing the Aliasing of Nonlinear Waveshaping Using Continuous-Time Convolution". DAFx 2016 +https://dafx.de/paper-archive/2016/dafxpapers/20-DAFx-16_paper_41-PN.pdf */ template struct LadderFilter { @@ -126,13 +138,14 @@ struct LadderFilter { void process(Frame& frame) { // Pre-warped frequency - T g = tan_pi_1_2(frame.cutoff); + T g = tan_1_2(T(M_PI) * frame.cutoff); // Integrator gain T G = g / (T(1) + g); // Feedback path // Apply resonance scaling and soft-clip with tanh T feedback = adaa[4].process(frame.resonance * state[3]); + // T feedback = tanh_2_3(frame.resonance * state[3]); T u = frame.input - feedback; // Stage 0 From cca3480c14d89758a0e3a0e44387698761130a62 Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Tue, 6 Jan 2026 17:03:20 -0500 Subject: [PATCH 18/18] VCF: Use x/sqrt(x^2+1) soft clipping instead of tanh(x). --- src/VCF.cpp | 81 ++++++++++++++++------------------------------------- 1 file changed, 24 insertions(+), 57 deletions(-) diff --git a/src/VCF.cpp b/src/VCF.cpp index 2c343f5..8f8f399 100644 --- a/src/VCF.cpp +++ b/src/VCF.cpp @@ -9,6 +9,14 @@ inline T rcp_newton1(T x) { } +/** Approximates 1/sqrt(x), using the rsqrt() instruction and 1 Newton-Raphson refinement */ +template +inline T rsqrt_newton1(T x) { + T y = simd::rsqrt(x); + return y * (T(3) - x * y * y) * T(0.5); +} + + /** Approximates tan(x) for x in [0, pi*0.5). Optimized coefficients for max relative error: 2.78e-05. */ @@ -21,72 +29,37 @@ inline T tan_1_2(T x) { } -/** Approximates tanh(x) for x in [-4, 4]. -Optimized coefficients for max relative error: 5.56e-07. -*/ -template -inline T tanh_2_3(T x) { - x = simd::clamp(x, T(-4), T(4)); - T x2 = x * x; - T num = T(1) + x2 * (T(0.11823399425250458) + x2 * T(0.0017593473306865004)); - T den = T(1) + x2 * (T(0.4515649138214517) + x2 * (T(0.01895237471013944) + x2 * T(7.340776670083926e-05))); - return x * num / den; -} - - -/** Approximates ln(cosh(x)) for all x. -Optimized coefficients for max derivative relative error: 2.18e-04. -Max absolute error: 2.33e-04. -*/ -template -inline T ln_cosh_3_3(T x) { - x = simd::abs(x); - T x2 = x * x; - T num = T(1) + x * (T(0.5536821172234367) + x * (T(0.25181887093756017) + x * T(-0.00022483065845568806))); - T den = T(1) + x * (T(0.5571167780242312) + x * (T(0.4011383699340152) + x * T(0.12250932588907415))); - return x2 * T(0.5) * num / den; -} - - -/** Approximates ln(cosh(x)) for all x. -Optimized coefficients for max derivative relative error: 1.73e-05. -Max absolute error: 1.59e-05. -*/ -template -inline T ln_cosh_3_4(T x) { - x = simd::abs(x); - T x2 = x * x; - T num = T(1) + x * (T(0.6818733052030992) + x * (T(0.3310714761981197) + x * T(0.07328250587472884))); - T den = T(1) + x * (T(0.6818667996836973) + x * (T(0.4970931090131887) + x * (T(0.18905704720922234) + x * T(0.0366985697841935)))); - return x2 * T(0.5) * num / den; -} - - -/** 1st-order ADAA for tanh. */ +/** 1st-order ADAA for softclip: f(x) = x/sqrt(x²+1), F(x) = sqrt(x²+1). */ template -struct TanhADAA1 { +struct SoftclipADAA1 { T xPrev = T(0); - T FxPrev = T(0); + T fPrev = T(0); // f(0) = 0 + T FPrev = T(1); // F(0) = sqrt(0+1) = 1 void reset() { xPrev = T(0); - FxPrev = T(0); + fPrev = T(0); + FPrev = T(1); } T process(T x) { const T eps = T(1e-5); T diff = x - xPrev; - T Fx = ln_cosh_3_3(x); - T adaaResult = (Fx - FxPrev) / diff; + // f = x/sqrt(x^2+1), F = sqrt(x^2+1) + T x2p1 = x * x + T(1); + T r = rsqrt_newton1(x2p1); + T f = x * r; + T F = x2p1 * r; - T midpoint = (x + xPrev) * T(0.5); - T fallbackResult = tanh_2_3(midpoint); + T adaaResult = (F - FPrev) * rcp_newton1(diff); + T fallbackResult = (f + fPrev) * T(0.5); T y = simd::ifelse(simd::abs(diff) > eps, adaaResult, fallbackResult); xPrev = x; - FxPrev = Fx; + fPrev = f; + FPrev = F; return y; } }; @@ -109,7 +82,7 @@ https://dafx.de/paper-archive/2016/dafxpapers/20-DAFx-16_paper_41-PN.pdf template struct LadderFilter { T state[4]; - TanhADAA1 adaa[5]; + SoftclipADAA1 adaa[5]; struct Frame { T input; @@ -143,35 +116,29 @@ struct LadderFilter { T G = g / (T(1) + g); // Feedback path - // Apply resonance scaling and soft-clip with tanh T feedback = adaa[4].process(frame.resonance * state[3]); - // T feedback = tanh_2_3(frame.resonance * state[3]); T u = frame.input - feedback; // Stage 0 T sat0 = adaa[0].process(u); - // T sat0 = tanh_2_3(u); T v0 = G * (sat0 - state[0]); T y0 = v0 + state[0]; state[0] = y0 + v0; // Stage 1 T sat1 = adaa[1].process(y0); - // T sat1 = tanh_2_3(y0); T v1 = G * (sat1 - state[1]); T y1 = v1 + state[1]; state[1] = y1 + v1; // Stage 2 T sat2 = adaa[2].process(y1); - // T sat2 = tanh_2_3(y1); T v2 = G * (sat2 - state[2]); T y2 = v2 + state[2]; state[2] = y2 + v2; // Stage 3 T sat3 = adaa[3].process(y2); - // T sat3 = tanh_2_3(y2); T v3 = G * (sat3 - state[3]); T y3 = v3 + state[3]; state[3] = y3 + v3;