From 11ee02c0d269865bbee2a3a0e9c2216e85658efa Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Mon, 29 Dec 2025 03:09:08 -0500
Subject: [PATCH 01/18] VCO: Increase frequency range to about 3 to 20,000 Hz.
 Make sine pure instead of quadratic. Add DC blocker to all outputs. Improve
 performance.

---
 src/VCO.cpp | 411 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 267 insertions(+), 144 deletions(-)
diff --git a/src/VCO.cpp b/src/VCO.cpp
index 57b5683..f65237a 100644
--- a/src/VCO.cpp
+++ b/src/VCO.cpp
@@ -1,66 +1,222 @@
 #include "plugin.hpp"
 
 
-using simd::float_4;
-
+// TODO: Remove these DSP classes after released in Rack SDK
 
-// Accurate only on [0, 1]
-template <typename T>
-T sin2pi_pade_05_7_6(T x) {
-	x -= 0.5f;
-	return (T(-6.28319) * x + T(35.353) * simd::pow(x, 3) - T(44.9043) * simd::pow(x, 5) + T(16.0951) * simd::pow(x, 7))
-	       / (1 + T(0.953136) * simd::pow(x, 2) + T(0.430238) * simd::pow(x, 4) + T(0.0981408) * simd::pow(x, 6));
+inline simd::float_4 ifelse(simd::float_4 mask, simd::float_4 a, simd::float_4 b) {
+	return simd::float_4(_mm_blendv_ps(b.v, a.v, mask.v));
 }
 
+/** Approximates sin(2pi x) in the domain [0, 1].
+Evaluates a 7th order odd polynomial on the range [0, 0.25] after folding the argument.
+Optimized for THD and max error.
+THD -84 dB, max error 1e-04
+*/
 template <typename T>
-T sin2pi_pade_05_5_4(T x) {
-	x -= 0.5f;
-	return (T(-6.283185307) * x + T(33.19863968) * simd::pow(x, 3) - T(32.44191367) * simd::pow(x, 5))
-	       / (1 + T(1.296008659) * simd::pow(x, 2) + T(0.7028072946) * simd::pow(x, 4));
+T sin2pi_fast(T x) {
+	// Get sign of result
+	T sign = ::ifelse(x >= T(0.5), -1, 1);
+	// Shift if negative
+	x = ::ifelse(x >= T(0.5), x - T(0.5), x);
+	// Flip if >0.25
+	x = ::ifelse(x >= T(0.25), T(0.5) - x, x);
+	T x2 = x * x;
+	const T c1 = 6.281422578399;
+	const T c3 = -41.122058514502;
+	const T c5 = 74.010711837743;
+	return sign * x * (c1 + x2 * (c3 + x2 * c5));
 }
 
-template <typename T>
-T expCurve(T x) {
-	return (3 + x * (-13 + 5 * x)) / (3 + 2 * x);
-}
 
+/** One-pole low-pass filter, exponential moving average.
+-6 dB/octave slope.
+Useful for leaky integrators and smoothing control signals.
+Has a pole at (1 - alpha) and a zero at 0.
+*/
+struct OnePoleLowpass {
+	float alpha = 0.f;
 
-template <int OVERSAMPLE, int QUALITY, typename T>
-struct VoltageControlledOscillator {
-	bool analog = false;
-	bool soft = false;
-	bool syncEnabled = false;
-	// For optimizing in serial code
-	int channels = 0;
+	/** Sets the cutoff frequency where gain is -3 dB.
+	f = f_c / f_s, the normalized frequency in the range [0, 0.5].
+	*/
+	void setCutoff(float f) {
+		alpha = 1.f - std::exp(-2.f * M_PI * f);
+	}
 
-	T lastSyncValue = 0.f;
+	template <typename T = float>
+	struct State {
+		T y = 0.f;
+	};
+
+	/** Advances the state with input x. Returns the output. */
+	template <typename T>
+	T process(State<T>& s, T x) {
+		s.y += alpha * (x - s.y);
+		return s.y;
+	}
+
+	/** Computes the frequency response at normalized frequency f. */
+	std::complex<float> getResponse(float f) const {
+		float omega = 2.f * M_PI * f;
+		std::complex<float> z = std::exp(std::complex<float>(0.f, omega));
+		return alpha * z / (z - (1.f - alpha));
+	}
+	float getMagnitude(float f) const {
+		return std::abs(getResponse(f));
+	}
+	float getPhase(float f) const {
+		return std::arg(getResponse(f));
+	}
+};
+
+
+/** One-pole high-pass filter.
+6 dB/octave slope.
+Useful for DC-blocking.
+Has a pole at (1 - alpha) and a zero at 1.
+*/
+struct OnePoleHighpass : OnePoleLowpass {
+	template <typename T>
+	T process(State<T>& s, T x) {
+		return x - OnePoleLowpass::process(s, x);
+	}
+
+	std::complex<float> getResponse(float f) const {
+		return 1.f - OnePoleLowpass::getResponse(f);
+	}
+	float getMagnitude(float f) const {
+		return std::abs(getResponse(f));
+	}
+	float getPhase(float f) const {
+		return std::arg(getResponse(f));
+	}
+};
+
+
+/** Computes the minimum-phase bandlimited step (MinBLEP)
+Z: number of zero-crossings on each side of the original symmetric sync signal
+O: oversample factor
+output: must be length `(2 * Z) * O`.
+First sample is >0. Last sample is 1.
+
+Algorithm from "Hard Sync Without Aliasing" by Eli Brandt (2001).
+https://www.cs.cmu.edu/~eli/papers/icmc01-hardsync.pdf
+*/
+void minBlepImpulse(int Z, int O, float* output);
+
+
+template <int Z, int O, typename T = float>
+struct MinBlepGenerator {
+	T buffer[2 * Z] = {};
+	int bufferIndex = 0;
+
+	/** Reordered impulse response for adjacent access, minus 1.0.
+	Padded at end with zeros so impulseReordered[o][2 * Z] can be loaded into a simd::float_4.
+	*/
+	alignas(16) float impulseReordered[O][2 * Z + 4] = {};
+
+	MinBlepGenerator() {
+		float impulse[2 * Z][O];
+		dsp::minBlepImpulse(Z, O, &impulse[0][0]);
+
+		// Transpose array and pre-subtract 1.
+		for (int o = 0; o < O; o++) {
+			for (int z = 0; z < 2 * Z; z++) {
+				impulseReordered[o][z] = impulse[z][o] - 1.f;
+			}
+		}
+	}
+
+	/** Places a discontinuity with magnitude `x` at -1 < p <= 0 relative to the current frame.
+	For example if a square wave jumps from 1 to -1 at the subsample time 0.1 frames ago, use insertDiscontinuity(-0.1, -2.0)
+	*/
+	void insertDiscontinuity(float p, T x) {
+		if (!(-1 < p && p <= 0))
+			return;
+
+		// Calculate oversampling index and fractional part
+		float subsample = -p * O;
+		int o = (int) subsample;
+		float frac = subsample - o;
+
+		// For each zero crossing, interpolate impulse response between oversample indices
+		// Compute 4 at a time with SIMD
+		for (int z = 0; z < 2 * Z; z += 4) {
+			simd::float_4 ir0 = simd::float_4::load(&impulseReordered[o][z]);
+			// If oversample index reaches next zero crossing, wrap to next z
+			simd::float_4 ir1 = simd::float_4::load((o + 1 < O) ? &impulseReordered[o + 1][z] : &impulseReordered[0][z + 1]);
+			simd::float_4 ir = (ir0 + (ir1 - ir0) * frac);
+
+			for (int zz = 0; zz < 4; zz++) {
+				buffer[(bufferIndex + z + zz) % (2 * Z)] += ir[zz] * x;
+			}
+		}
+		// for (int z = 0; z < 2 * Z; z++) {
+		// 	float ir0 = impulseReordered[o][z];
+		// 	float ir1 = (o + 1 < O) ? impulseReordered[o + 1][z] : impulseReordered[0][z + 1];
+		// 	float ir = (ir0 + (ir1 - ir0) * frac);
+		// 	buffer[(bufferIndex + z) % (2 * Z)] += ir * x;
+		// }
+	}
+
+	/** Should be called every frame after inserting any discontinuities. */
+	T process() {
+		T v = buffer[bufferIndex];
+		buffer[bufferIndex] = T(0);
+		bufferIndex = (bufferIndex + 1) % (2 * Z);
+		return v;
+	}
+};
+
+
+template <typename T>
+struct VCOProcessor {
 	T phase = 0.f;
-	T freq = 0.f;
-	T pulseWidth = 0.5f;
+	T lastSyncValue = 0.f;
 	T syncDirection = 1.f;
 	T sqrState = 1.f;
 
-	dsp::TRCFilter<T> sqrFilter;
-
-	dsp::MinBlepGenerator<QUALITY, OVERSAMPLE, T> sqrMinBlep;
-	dsp::MinBlepGenerator<QUALITY, OVERSAMPLE, T> sawMinBlep;
-	dsp::MinBlepGenerator<QUALITY, OVERSAMPLE, T> triMinBlep;
-	dsp::MinBlepGenerator<QUALITY, OVERSAMPLE, T> sinMinBlep;
+	OnePoleHighpass dcFilter;
+	OnePoleHighpass::State<T> dcFilterStateSqr;
+	OnePoleHighpass::State<T> dcFilterStateSaw;
+	OnePoleHighpass::State<T> dcFilterStateTri;
+	OnePoleHighpass::State<T> dcFilterStateSin;
 
-	T sqrValue = 0.f;
-	T sawValue = 0.f;
-	T triValue = 0.f;
-	T sinValue = 0.f;
+	MinBlepGenerator<16, 16, T> sqrMinBlep;
+	MinBlepGenerator<16, 16, T> sawMinBlep;
+	MinBlepGenerator<16, 16, T> triMinBlep;
+	MinBlepGenerator<16, 16, T> sinMinBlep;
 
-	void setPulseWidth(T pulseWidth) {
-		const float pwMin = 0.01f;
-		this->pulseWidth = simd::clamp(pulseWidth, pwMin, 1.f - pwMin);
+	void setSampleTime(float sampleTime) {
+		dcFilter.setCutoff(std::min(0.4f, 40.f * sampleTime));
 	}
 
-	void process(float deltaTime, T syncValue) {
+	struct Frame {
+		/** Number of channels valid in SIMD type
+		For optimizing serial operations.
+		*/
+		uint8_t channels = 0;
+		bool soft = false;
+		bool syncEnabled = false;
+		bool sqrEnabled = false;
+		bool sawEnabled = false;
+		bool triEnabled = false;
+		bool sinEnabled = false;
+		T pulseWidth = 0.5f;
+		T sync = 0.f;
+		T freq = 0.f;
+
+		// Outputs
+		T sqr = 0.f;
+		T saw = 0.f;
+		T tri = 0.f;
+		T sin = 0.f;
+	};
+
+	void process(Frame& frame, float deltaTime) {
 		// Advance phase
-		T deltaPhase = simd::clamp(freq * deltaTime, 0.f, 0.35f);
-		if (soft) {
+		T deltaPhase = simd::clamp(frame.freq * deltaTime, 0.f, 0.49f);
+		if (frame.soft) {
 			// Reverse direction
 			deltaPhase *= syncDirection;
 		}
@@ -80,7 +236,7 @@ struct VoltageControlledOscillator {
 		if (wrapM) {
 			T wrapPhase = (syncDirection == -1.f) & 1.f;
 			T wrapCrossing = (wrapPhase - (phase - deltaPhase)) / deltaPhase;
-			for (int i = 0; i < channels; i++) {
+			for (int i = 0; i < frame.channels; i++) {
 				if (wrapM & (1 << i)) {
 					T mask = simd::movemaskInverse<T>(1 << i);
 					// TODO: MinBlepGenerator::insertDiscontinuity() should handle subframes outside the range -1 < p <= 0 instead of failing silently.
@@ -92,12 +248,16 @@ struct VoltageControlledOscillator {
 		}
 		sqrState = simd::ifelse(wrapMask, syncDirection, sqrState);
 
+		// Pulse width
+		const float pwMin = 0.01f;
+		T pulseWidth = simd::clamp(frame.pulseWidth, pwMin, 1.f - pwMin);
+
 		// Jump sqr when crossing `pulseWidth`
 		T pwMask = (syncDirection == sqrState) & ((syncDirection == 1.f) ^ (phase < pulseWidth));
 		int pw = simd::movemask(pwMask);
 		if (pw) {
 			T pulseCrossing = (pulseWidth - (phase - deltaPhase)) / deltaPhase;
-			for (int i = 0; i < channels; i++) {
+			for (int i = 0; i < frame.channels; i++) {
 				if (pw & (1 << i)) {
 					T mask = simd::movemaskInverse<T>(1 << i);
 					float p = clamp(pulseCrossing[i] - 1.f, -1.f, 0.f);
@@ -112,7 +272,7 @@ struct VoltageControlledOscillator {
 		T halfCrossing = (0.5f - (phase - deltaPhase)) / deltaPhase;
 		int halfMask = simd::movemask((0 < halfCrossing) & (halfCrossing <= 1.f));
 		if (halfMask) {
-			for (int i = 0; i < channels; i++) {
+			for (int i = 0; i < frame.channels; i++) {
 				if (halfMask & (1 << i)) {
 					T mask = simd::movemaskInverse<T>(1 << i);
 					float p = halfCrossing[i] - 1.f;
@@ -124,20 +284,20 @@ struct VoltageControlledOscillator {
 
 		// Detect sync
 		// Might be NAN or outside of [0, 1) range
-		if (syncEnabled) {
-			T deltaSync = syncValue - lastSyncValue;
+		if (frame.syncEnabled) {
+			T deltaSync = frame.sync - lastSyncValue;
 			T syncCrossing = -lastSyncValue / deltaSync;
-			lastSyncValue = syncValue;
-			T sync = (0.f < syncCrossing) & (syncCrossing <= 1.f) & (syncValue >= 0.f);
+			lastSyncValue = frame.sync;
+			T sync = (0.f < syncCrossing) & (syncCrossing <= 1.f) & (frame.sync >= 0.f);
 			int syncMask = simd::movemask(sync);
 			if (syncMask) {
-				if (soft) {
+				if (frame.soft) {
 					syncDirection = simd::ifelse(sync, -syncDirection, syncDirection);
 				}
 				else {
 					T newPhase = simd::ifelse(sync, (1.f - syncCrossing) * deltaPhase, phase);
 					// Insert minBLEP for sync
-					for (int i = 0; i < channels; i++) {
+					for (int i = 0; i < frame.channels; i++) {
 						if (syncMask & (1 << i)) {
 							T mask = simd::movemaskInverse<T>(1 << i);
 							float p = syncCrossing[i] - 1.f;
@@ -160,93 +320,56 @@ struct VoltageControlledOscillator {
 		}
 
 		// Square
-		sqrValue = sqrState;
-		sqrValue += sqrMinBlep.process();
-
-		if (analog) {
-			sqrFilter.setCutoffFreq(20.f * deltaTime);
-			sqrFilter.process(sqrValue);
-			sqrValue = sqrFilter.highpass() * 0.95f;
+		if (frame.sqrEnabled) {
+			frame.sqr = sqrState;
+			frame.sqr += sqrMinBlep.process();
+			frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr);
 		}
 
 		// Saw
-		sawValue = saw(phase);
-		sawValue += sawMinBlep.process();
+		if (frame.sawEnabled) {
+			frame.saw = saw(phase);
+			frame.saw += sawMinBlep.process();
+			frame.saw = dcFilter.process(dcFilterStateSaw, frame.saw);
+		}
 
 		// Tri
-		triValue = tri(phase);
-		triValue += triMinBlep.process();
+		if (frame.triEnabled) {
+			frame.tri = tri(phase);
+			frame.tri += triMinBlep.process();
+			frame.tri = dcFilter.process(dcFilterStateTri, frame.tri);
+		}
 
 		// Sin
-		sinValue = sin(phase);
-		sinValue += sinMinBlep.process();
-	}
-
-	T sin(T phase) {
-		T v;
-		if (analog) {
-			// Quadratic approximation of sine, slightly richer harmonics
-			T halfPhase = (phase < 0.5f);
-			T x = phase - simd::ifelse(halfPhase, 0.25f, 0.75f);
-			v = 1.f - 16.f * simd::pow(x, 2);
-			v *= simd::ifelse(halfPhase, 1.f, -1.f);
+		if (frame.sinEnabled) {
+			frame.sin = sin(phase);
+			frame.sin += sinMinBlep.process();
+			frame.sin = dcFilter.process(dcFilterStateSin, frame.sin);
 		}
-		else {
-			v = sin2pi_pade_05_5_4(phase);
-			// v = sin2pi_pade_05_7_6(phase);
-			// v = simd::sin(2 * T(M_PI) * phase);
-		}
-		return v;
-	}
-	T sin() {
-		return sinValue;
 	}
 
-	T tri(T phase) {
-		T v;
-		if (analog) {
-			T x = phase + 0.25f;
-			x -= simd::trunc(x);
-			T halfX = (x >= 0.5f);
-			x *= 2;
-			x -= simd::trunc(x);
-			v = expCurve(x) * simd::ifelse(halfX, 1.f, -1.f);
-		}
-		else {
-			v = 1 - 4 * simd::fmin(simd::fabs(phase - 0.25f), simd::fabs(phase - 1.25f));
-		}
-		return v;
-	}
-	T tri() {
-		return triValue;
+	T light() const {
+		return sin(phase);
 	}
 
-	T saw(T phase) {
-		T v;
+	static T saw(T phase) {
 		T x = phase + 0.5f;
 		x -= simd::trunc(x);
-		if (analog) {
-			v = -expCurve(x);
-		}
-		else {
-			v = 2 * x - 1;
-		}
-		return v;
+		return 2 * x - 1;
 	}
-	T saw() {
-		return sawValue;
+	static T tri(T phase) {
+		return 1 - 4 * simd::fmin(simd::fabs(phase - 0.25f), simd::fabs(phase - 1.25f));
 	}
-
-	T sqr() {
-		return sqrValue;
-	}
-
-	T light() {
-		return simd::sin(2 * T(M_PI) * phase);
+	static T sin(T phase) {
+		return sin2pi_fast(phase);
+		// return simd::sin(2.f * M_PI * phase);
 	}
 };
 
 
+using simd::float_4;
+
+
 struct VCO : Module {
 	enum ParamIds {
 		MODE_PARAM, // removed
@@ -281,14 +404,14 @@ struct VCO : Module {
 		NUM_LIGHTS
 	};
 
-	VoltageControlledOscillator<16, 16, float_4> oscillators[4];
+	VCOProcessor<float_4> processors[4];
 	dsp::ClockDivider lightDivider;
 
 	VCO() {
 		config(NUM_PARAMS, NUM_INPUTS, NUM_OUTPUTS, NUM_LIGHTS);
 		configSwitch(LINEAR_PARAM, 0.f, 1.f, 0.f, "FM mode", {"1V/octave", "Linear"});
 		configSwitch(SYNC_PARAM, 0.f, 1.f, 1.f, "Sync mode", {"Soft", "Hard"});
-		configParam(FREQ_PARAM, -54.f, 54.f, 0.f, "Frequency", " Hz", dsp::FREQ_SEMITONE, dsp::FREQ_C4);
+		configParam(FREQ_PARAM, -75.f, 75.f, 0.f, "Frequency", " Hz", dsp::FREQ_SEMITONE, dsp::FREQ_C4);
 		configParam(FM_PARAM, -1.f, 1.f, 0.f, "Frequency modulation", "%", 0.f, 100.f);
 		getParamQuantity(FM_PARAM)->randomizeEnabled = false;
 		configParam(PW_PARAM, 0.01f, 0.99f, 0.5f, "Pulse width", "%", 0.f, 100.f);
@@ -308,22 +431,29 @@ struct VCO : Module {
 		lightDivider.setDivision(16);
 	}
 
+	void onSampleRateChange(const SampleRateChangeEvent& e) override {
+		for (int c = 0; c < 16; c += 4) {
+			processors[c / 4].setSampleTime(e.sampleTime);
+		}
+	}
+
 	void process(const ProcessArgs& args) override {
+		VCOProcessor<float_4>::Frame frame;
 		float freqParam = params[FREQ_PARAM].getValue() / 12.f;
 		float fmParam = params[FM_PARAM].getValue();
 		float pwParam = params[PW_PARAM].getValue();
 		float pwCvParam = params[PW_CV_PARAM].getValue();
 		bool linear = params[LINEAR_PARAM].getValue() > 0.f;
-		bool soft = params[SYNC_PARAM].getValue() <= 0.f;
-
+		frame.soft = params[SYNC_PARAM].getValue() <= 0.f;
+		frame.syncEnabled = inputs[SYNC_INPUT].isConnected();
+		frame.sqrEnabled = outputs[SQR_OUTPUT].isConnected();
+		frame.sawEnabled = outputs[SAW_OUTPUT].isConnected();
+		frame.triEnabled = outputs[TRI_OUTPUT].isConnected();
+		frame.sinEnabled = outputs[SIN_OUTPUT].isConnected();
 		int channels = std::max(inputs[PITCH_INPUT].getChannels(), 1);
 
 		for (int c = 0; c < channels; c += 4) {
-			auto& oscillator = oscillators[c / 4];
-			oscillator.channels = std::min(channels - c, 4);
-			// removed
-			oscillator.analog = true;
-			oscillator.soft = soft;
+			frame.channels = std::min(channels - c, 4);
 
 			// Get frequency
 			float_4 pitch = freqParam + inputs[PITCH_INPUT].getPolyVoltageSimd<float_4>(c);
@@ -336,26 +466,19 @@ struct VCO : Module {
 				freq = dsp::FREQ_C4 * dsp::exp2_taylor5(pitch);
 				freq += dsp::FREQ_C4 * inputs[FM_INPUT].getPolyVoltageSimd<float_4>(c) * fmParam;
 			}
-			freq = clamp(freq, 0.f, args.sampleRate / 2.f);
-			oscillator.freq = freq;
+			frame.freq = clamp(freq, 0.f, args.sampleRate / 2.f);
 
 			// Get pulse width
-			float_4 pw = pwParam + inputs[PW_INPUT].getPolyVoltageSimd<float_4>(c) / 10.f * pwCvParam;
-			oscillator.setPulseWidth(pw);
+			frame.pulseWidth = pwParam + inputs[PW_INPUT].getPolyVoltageSimd<float_4>(c) / 10.f * pwCvParam;
 
-			oscillator.syncEnabled = inputs[SYNC_INPUT].isConnected();
-			float_4 sync = inputs[SYNC_INPUT].getPolyVoltageSimd<float_4>(c);
-			oscillator.process(args.sampleTime, sync);
+			frame.sync = inputs[SYNC_INPUT].getPolyVoltageSimd<float_4>(c);
+			processors[c / 4].process(frame, args.sampleTime);
 
 			// Set output
-			if (outputs[SIN_OUTPUT].isConnected())
-				outputs[SIN_OUTPUT].setVoltageSimd(5.f * oscillator.sin(), c);
-			if (outputs[TRI_OUTPUT].isConnected())
-				outputs[TRI_OUTPUT].setVoltageSimd(5.f * oscillator.tri(), c);
-			if (outputs[SAW_OUTPUT].isConnected())
-				outputs[SAW_OUTPUT].setVoltageSimd(5.f * oscillator.saw(), c);
-			if (outputs[SQR_OUTPUT].isConnected())
-				outputs[SQR_OUTPUT].setVoltageSimd(5.f * oscillator.sqr(), c);
+			outputs[SQR_OUTPUT].setVoltageSimd(5.f * frame.sqr, c);
+			outputs[SAW_OUTPUT].setVoltageSimd(5.f * frame.saw, c);
+			outputs[TRI_OUTPUT].setVoltageSimd(5.f * frame.tri, c);
+			outputs[SIN_OUTPUT].setVoltageSimd(5.f * frame.sin, c);
 		}
 
 		outputs[SIN_OUTPUT].setChannels(channels);
@@ -366,7 +489,7 @@ struct VCO : Module {
 		// Light
 		if (lightDivider.process()) {
 			if (channels == 1) {
-				float lightValue = oscillators[0].light()[0];
+				float lightValue = processors[0].light()[0];
 				lights[PHASE_LIGHT + 0].setSmoothBrightness(-lightValue, args.sampleTime * lightDivider.getDivision());
 				lights[PHASE_LIGHT + 1].setSmoothBrightness(lightValue, args.sampleTime * lightDivider.getDivision());
 				lights[PHASE_LIGHT + 2].setBrightness(0.f);
@@ -377,7 +500,7 @@ struct VCO : Module {
 				lights[PHASE_LIGHT + 2].setBrightness(1.f);
 			}
 			lights[LINEAR_LIGHT].setBrightness(linear);
-			lights[SOFT_LIGHT].setBrightness(soft);
+			lights[SOFT_LIGHT].setBrightness(frame.soft);
 		}
 	}
 };

From b71bf3abcfc21a0b3670cbc8fffc58974ce926af Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Mon, 29 Dec 2025 04:05:31 -0500
Subject: [PATCH 02/18] VCO: Make triangle output an integrator of the square.
 Don't insert minBLEPs unless an output is enabled (connected).

---
 src/VCO.cpp | 146 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 81 insertions(+), 65 deletions(-)

diff --git a/src/VCO.cpp b/src/VCO.cpp
index f65237a..b1eea10 100644
--- a/src/VCO.cpp
+++ b/src/VCO.cpp
@@ -175,6 +175,7 @@ struct VCOProcessor {
 	T lastSyncValue = 0.f;
 	T syncDirection = 1.f;
 	T sqrState = 1.f;
+	T triFilterState = 0.f;
 
 	OnePoleHighpass dcFilter;
 	OnePoleHighpass::State<T> dcFilterStateSqr;
@@ -184,7 +185,6 @@ struct VCOProcessor {
 
 	MinBlepGenerator<16, 16, T> sqrMinBlep;
 	MinBlepGenerator<16, 16, T> sawMinBlep;
-	MinBlepGenerator<16, 16, T> triMinBlep;
 	MinBlepGenerator<16, 16, T> sinMinBlep;
 
 	void setSampleTime(float sampleTime) {
@@ -230,54 +230,58 @@ struct VCOProcessor {
 		T phaseFloor = simd::floor(phase);
 		phase -= phaseFloor;
 
-		// Jump sqr when phase crosses 1, or crosses 0 if running backwards
-		T wrapMask = (phaseFloor != 0.f);
-		int wrapM = simd::movemask(wrapMask);
-		if (wrapM) {
-			T wrapPhase = (syncDirection == -1.f) & 1.f;
-			T wrapCrossing = (wrapPhase - (phase - deltaPhase)) / deltaPhase;
-			for (int i = 0; i < frame.channels; i++) {
-				if (wrapM & (1 << i)) {
-					T mask = simd::movemaskInverse<T>(1 << i);
-					// TODO: MinBlepGenerator::insertDiscontinuity() should handle subframes outside the range -1 < p <= 0 instead of failing silently.
-					float p = clamp(wrapCrossing[i] - 1.f, -1.f, 0.f);
-					T x = mask & (2.f * syncDirection);
-					sqrMinBlep.insertDiscontinuity(p, x);
+		if (frame.sqrEnabled || frame.triEnabled) {
+			// Jump sqr when phase crosses 1, or crosses 0 if running backwards
+			T wrapMask = (phaseFloor != 0.f);
+			int wrapM = simd::movemask(wrapMask);
+			if (wrapM) {
+				T wrapPhase = (syncDirection == -1.f) & 1.f;
+				T wrapCrossing = (wrapPhase - (phase - deltaPhase)) / deltaPhase;
+				for (int i = 0; i < frame.channels; i++) {
+					if (wrapM & (1 << i)) {
+						T mask = simd::movemaskInverse<T>(1 << i);
+						// TODO: MinBlepGenerator::insertDiscontinuity() should handle subframes outside the range -1 < p <= 0 instead of failing silently.
+						float p = clamp(wrapCrossing[i] - 1.f, -1.f, 0.f);
+						T x = mask & (2.f * syncDirection);
+						sqrMinBlep.insertDiscontinuity(p, x);
+					}
 				}
 			}
-		}
-		sqrState = simd::ifelse(wrapMask, syncDirection, sqrState);
-
-		// Pulse width
-		const float pwMin = 0.01f;
-		T pulseWidth = simd::clamp(frame.pulseWidth, pwMin, 1.f - pwMin);
-
-		// Jump sqr when crossing `pulseWidth`
-		T pwMask = (syncDirection == sqrState) & ((syncDirection == 1.f) ^ (phase < pulseWidth));
-		int pw = simd::movemask(pwMask);
-		if (pw) {
-			T pulseCrossing = (pulseWidth - (phase - deltaPhase)) / deltaPhase;
-			for (int i = 0; i < frame.channels; i++) {
-				if (pw & (1 << i)) {
-					T mask = simd::movemaskInverse<T>(1 << i);
-					float p = clamp(pulseCrossing[i] - 1.f, -1.f, 0.f);
-					T x = mask & (-2.f * syncDirection);
-					sqrMinBlep.insertDiscontinuity(p, x);
+			sqrState = simd::ifelse(wrapMask, syncDirection, sqrState);
+
+			// Pulse width
+			const float pwMin = 0.01f;
+			T pulseWidth = simd::clamp(frame.pulseWidth, pwMin, 1.f - pwMin);
+
+			// Jump sqr when crossing `pulseWidth`
+			T pwMask = (syncDirection == sqrState) & ((syncDirection == 1.f) ^ (phase < pulseWidth));
+			int pw = simd::movemask(pwMask);
+			if (pw) {
+				T pulseCrossing = (pulseWidth - (phase - deltaPhase)) / deltaPhase;
+				for (int i = 0; i < frame.channels; i++) {
+					if (pw & (1 << i)) {
+						T mask = simd::movemaskInverse<T>(1 << i);
+						float p = clamp(pulseCrossing[i] - 1.f, -1.f, 0.f);
+						T x = mask & (-2.f * syncDirection);
+						sqrMinBlep.insertDiscontinuity(p, x);
+					}
 				}
 			}
+			sqrState = simd::ifelse(pwMask, -syncDirection, sqrState);
 		}
-		sqrState = simd::ifelse(pwMask, -syncDirection, sqrState);
-
-		// Jump saw when crossing 0.5
-		T halfCrossing = (0.5f - (phase - deltaPhase)) / deltaPhase;
-		int halfMask = simd::movemask((0 < halfCrossing) & (halfCrossing <= 1.f));
-		if (halfMask) {
-			for (int i = 0; i < frame.channels; i++) {
-				if (halfMask & (1 << i)) {
-					T mask = simd::movemaskInverse<T>(1 << i);
-					float p = halfCrossing[i] - 1.f;
-					T x = mask & (-2.f * syncDirection);
-					sawMinBlep.insertDiscontinuity(p, x);
+
+		if (frame.sawEnabled) {
+			// Jump saw when crossing 0.5
+			T halfCrossing = (0.5f - (phase - deltaPhase)) / deltaPhase;
+			int halfMask = simd::movemask((0 < halfCrossing) & (halfCrossing <= 1.f));
+			if (halfMask) {
+				for (int i = 0; i < frame.channels; i++) {
+					if (halfMask & (1 << i)) {
+						T mask = simd::movemaskInverse<T>(1 << i);
+						float p = halfCrossing[i] - 1.f;
+						T x = mask & (-2.f * syncDirection);
+						sawMinBlep.insertDiscontinuity(p, x);
+					}
 				}
 			}
 		}
@@ -303,15 +307,19 @@ struct VCOProcessor {
 							float p = syncCrossing[i] - 1.f;
 							T x;
 							// Assume that hard-syncing a square always resets it to HIGH
-							x = mask & (1.f - sqrState);
-							sqrState = simd::ifelse(mask, 1.f, sqrState);
-							sqrMinBlep.insertDiscontinuity(p, x);
-							x = mask & (saw(newPhase) - saw(phase));
-							sawMinBlep.insertDiscontinuity(p, x);
-							x = mask & (tri(newPhase) - tri(phase));
-							triMinBlep.insertDiscontinuity(p, x);
-							x = mask & (sin(newPhase) - sin(phase));
-							sinMinBlep.insertDiscontinuity(p, x);
+							if (frame.sqrEnabled || frame.triEnabled) {
+								x = mask & (1.f - sqrState);
+								sqrState = simd::ifelse(mask, 1.f, sqrState);
+								sqrMinBlep.insertDiscontinuity(p, x);
+							}
+							if (frame.sawEnabled) {
+								x = mask & (saw(newPhase) - saw(phase));
+								sawMinBlep.insertDiscontinuity(p, x);
+							}
+							if (frame.sinEnabled) {
+								x = mask & (sin(newPhase) - sin(phase));
+								sinMinBlep.insertDiscontinuity(p, x);
+							}
 						}
 					}
 					phase = newPhase;
@@ -319,13 +327,6 @@ struct VCOProcessor {
 			}
 		}
 
-		// Square
-		if (frame.sqrEnabled) {
-			frame.sqr = sqrState;
-			frame.sqr += sqrMinBlep.process();
-			frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr);
-		}
-
 		// Saw
 		if (frame.sawEnabled) {
 			frame.saw = saw(phase);
@@ -333,11 +334,26 @@ struct VCOProcessor {
 			frame.saw = dcFilter.process(dcFilterStateSaw, frame.saw);
 		}
 
-		// Tri
-		if (frame.triEnabled) {
-			frame.tri = tri(phase);
-			frame.tri += triMinBlep.process();
-			frame.tri = dcFilter.process(dcFilterStateTri, frame.tri);
+		// Square
+		if (frame.sqrEnabled || frame.triEnabled) {
+			frame.sqr = sqrState;
+			frame.sqr += sqrMinBlep.process();
+			T triSqr = frame.sqr;
+			frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr);
+
+			// Tri
+			if (frame.triEnabled) {
+				// Integrate square wave
+				const float triShape = 0.2f;
+				T triFreq = deltaTime * triShape * frame.freq;
+				// T alpha = 1.f - simd::exp(-2.f * M_PI * triFreq);
+				// Use bilinear transform to derive alpha
+				T alpha = 1 / (1 + 1 / (M_PI * triFreq));
+				triFilterState += alpha * (triSqr - triFilterState);
+				// Apply gain to roughly have unit amplitude at 0.5 pulseWidth. Depends on triShape.
+				frame.tri = triFilterState * 6.6f;
+				frame.tri = dcFilter.process(dcFilterStateTri, frame.tri);
+			}
 		}
 
 		// Sin

From e819498fd388755efcb876b37d1e33fddf4a29ac Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Tue, 30 Dec 2025 22:19:42 -0500
Subject: [PATCH 03/18] VCO: Use cubic interpolation for minBLEP impulse. Use
 more accurate sine.

---
 src/VCO.cpp | 204 ++++++++++++++++++++++++++++------------------------
 1 file changed, 111 insertions(+), 93 deletions(-)

diff --git a/src/VCO.cpp b/src/VCO.cpp
index b1eea10..5cb955e 100644
--- a/src/VCO.cpp
+++ b/src/VCO.cpp
@@ -3,30 +3,27 @@
 
 // TODO: Remove these DSP classes after released in Rack SDK
 
-inline simd::float_4 ifelse(simd::float_4 mask, simd::float_4 a, simd::float_4 b) {
-	return simd::float_4(_mm_blendv_ps(b.v, a.v, mask.v));
-}
-
-/** Approximates sin(2pi x) in the domain [0, 1].
-Evaluates a 7th order odd polynomial on the range [0, 0.25] after folding the argument.
-Optimized for THD and max error.
-THD -84 dB, max error 1e-04
+/** Evaluates sin(pi x) for x in [-1, 1].
+9th order polynomial with roots at 0, -1, and 1.
+Optimized coefficients for best THD (-103.7 dB) and max absolute error (6.68e-06).
 */
 template <typename T>
-T sin2pi_fast(T x) {
-	// Get sign of result
-	T sign = ::ifelse(x >= T(0.5), -1, 1);
-	// Shift if negative
-	x = ::ifelse(x >= T(0.5), x - T(0.5), x);
-	// Flip if >0.25
-	x = ::ifelse(x >= T(0.25), T(0.5) - x, x);
+inline T sin_pi_9(T x) {
 	T x2 = x * x;
-	const T c1 = 6.281422578399;
-	const T c3 = -41.122058514502;
-	const T c5 = 74.010711837743;
-	return sign * x * (c1 + x2 * (c3 + x2 * c5));
+	return x * (T(1) - x2) * (T(3.141521108990) + x2 * (T(-2.024773594411) + x2 * (T(0.517493161073) + x2 * T(-0.063691343423))));
 }
 
+/** Catmull-Rom cubic interpolation
+t is the fractional position between y1 and y2.
+*/
+template <typename T>
+T cubicInterp(T y0, T y1, T y2, T y3, T t) {
+	T t2 = t * t;
+	T t3 = t2 * t;
+	return y1 + T(0.5) * t * (y2 - y0)
+		+ t2 * (y0 - T(2.5)*y1 + T(2)*y2 - T(0.5)*y3)
+		+ t3 * (T(-0.5)*y0 + T(1.5)*y1 - T(1.5)*y2 + T(0.5)*y3);
+}
 
 /** One-pole low-pass filter, exponential moving average.
 -6 dB/octave slope.
@@ -50,7 +47,7 @@ struct OnePoleLowpass {
 
 	/** Advances the state with input x. Returns the output. */
 	template <typename T>
-	T process(State<T>& s, T x) {
+	T process(State<T>& s, T x) const {
 		s.y += alpha * (x - s.y);
 		return s.y;
 	}
@@ -77,7 +74,7 @@ Has a pole at (1 - alpha) and a zero at 1.
 */
 struct OnePoleHighpass : OnePoleLowpass {
 	template <typename T>
-	T process(State<T>& s, T x) {
+	T process(State<T>& s, T x) const {
 		return x - OnePoleLowpass::process(s, x);
 	}
 
@@ -93,82 +90,103 @@ struct OnePoleHighpass : OnePoleLowpass {
 };
 
 
-/** Computes the minimum-phase bandlimited step (MinBLEP)
-Z: number of zero-crossings on each side of the original symmetric sync signal
-O: oversample factor
-output: must be length `(2 * Z) * O`.
-First sample is >0. Last sample is 1.
-
-Algorithm from "Hard Sync Without Aliasing" by Eli Brandt (2001).
-https://www.cs.cmu.edu/~eli/papers/icmc01-hardsync.pdf
-*/
-void minBlepImpulse(int Z, int O, float* output);
-
-
-template <int Z, int O, typename T = float>
-struct MinBlepGenerator {
-	T buffer[2 * Z] = {};
-	int bufferIndex = 0;
-
-	/** Reordered impulse response for adjacent access, minus 1.0.
-	Padded at end with zeros so impulseReordered[o][2 * Z] can be loaded into a simd::float_4.
+template <int Z, int O>
+struct MinBlep {
+	/** Reordered impulse response for cubic interpolation, minus 1.0.
+	Z dimension has +1 padding at start (for z-1 wrap) and +4 at end (for SIMD + z+1 wrap).
+	Access via getImpulse() which handles o wrapping and z offset.
 	*/
-	alignas(16) float impulseReordered[O][2 * Z + 4] = {};
+	float impulseReordered[O][1 + 2 * Z + 4] = {};
 
-	MinBlepGenerator() {
+	MinBlep() {
 		float impulse[2 * Z][O];
 		dsp::minBlepImpulse(Z, O, &impulse[0][0]);
 
-		// Transpose array and pre-subtract 1.
+		// Fill impulseReordered with transposed data, minus 1.
+		// Storage index = z + 1 (to allow z = -1 access at index 0)
 		for (int o = 0; o < O; o++) {
+			// z = -1: before impulse starts
+			impulseReordered[o][0] = -1.f;
+			// z = 0 to 2*Z-1: actual impulse data
 			for (int z = 0; z < 2 * Z; z++) {
-				impulseReordered[o][z] = impulse[z][o] - 1.f;
+				impulseReordered[o][1 + z] = impulse[z][o] - 1.f;
+			}
+			// z = 2*Z to 2*Z+3: after impulse ends (for SIMD padding)
+			for (int z = 2 * Z; z < 2 * Z + 4; z++) {
+				impulseReordered[o][1 + z] = 0.f;
 			}
 		}
 	}
 
-	/** Places a discontinuity with magnitude `x` at -1 < p <= 0 relative to the current frame.
-	For example if a square wave jumps from 1 to -1 at the subsample time 0.1 frames ago, use insertDiscontinuity(-0.1, -2.0)
+	/** Get pointer to impulse data, handling o wrapping and z offset. */
+	const float* getImpulse(int o, int z) const {
+		int index = z * O + o;
+		z = index / O;
+		o = index % O;
+		if (o < 0) {
+			z -= 1;
+			o += O;
+		}
+		return &impulseReordered[o][z + 1];
+	}
+
+	template <typename T = float>
+	struct State {
+		// Double buffer of length 2 * Z
+		T buffer[4 * Z] = {};
+		int32_t bufferIndex = 0;
+	};
+
+	/** Places a discontinuity with magnitude `x` at 0 < p <= 1 relative to the current frame.
+	For example if a square wave will jump from 1 to -1 in 0.1 frames, use insertDiscontinuity(0.1, -2.0).
+
+	Note: In the deprecated MinBlepGenerator, `p` was in the range (-1, 0], so add 1 to p if updating to MinBlep.
 	*/
-	void insertDiscontinuity(float p, T x) {
-		if (!(-1 < p && p <= 0))
+	template <typename T>
+	void insertDiscontinuity(State<T>& s, float p, T x) const {
+		if (!(0 < p && p <= 1))
 			return;
 
-		// Calculate oversampling index and fractional part
-		float subsample = -p * O;
+		// Calculate impulse array index and fractional part
+		float subsample = (1 - p) * O;
 		int o = (int) subsample;
-		float frac = subsample - o;
+		float t = subsample - o;
 
-		// For each zero crossing, interpolate impulse response between oversample indices
-		// Compute 4 at a time with SIMD
+		// For each zero crossing, cubic interpolate impulse response
 		for (int z = 0; z < 2 * Z; z += 4) {
-			simd::float_4 ir0 = simd::float_4::load(&impulseReordered[o][z]);
-			// If oversample index reaches next zero crossing, wrap to next z
-			simd::float_4 ir1 = simd::float_4::load((o + 1 < O) ? &impulseReordered[o + 1][z] : &impulseReordered[0][z + 1]);
-			simd::float_4 ir = (ir0 + (ir1 - ir0) * frac);
+			simd::float_4 y0 = simd::float_4::load(getImpulse(o - 1, z));
+			simd::float_4 y1 = simd::float_4::load(getImpulse(o, z));
+			simd::float_4 y2 = simd::float_4::load(getImpulse(o + 1, z));
+			simd::float_4 y3 = simd::float_4::load(getImpulse(o + 2, z));
+			simd::float_4 ir = cubicInterp(y0, y1, y2, y3, T(t));
 
+			// Write to double buffer
 			for (int zz = 0; zz < 4; zz++) {
-				buffer[(bufferIndex + z + zz) % (2 * Z)] += ir[zz] * x;
+				s.buffer[s.bufferIndex + z + zz] += ir[zz] * x;
 			}
 		}
-		// for (int z = 0; z < 2 * Z; z++) {
-		// 	float ir0 = impulseReordered[o][z];
-		// 	float ir1 = (o + 1 < O) ? impulseReordered[o + 1][z] : impulseReordered[0][z + 1];
-		// 	float ir = (ir0 + (ir1 - ir0) * frac);
-		// 	buffer[(bufferIndex + z) % (2 * Z)] += ir * x;
-		// }
 	}
 
 	/** Should be called every frame after inserting any discontinuities. */
-	T process() {
-		T v = buffer[bufferIndex];
-		buffer[bufferIndex] = T(0);
-		bufferIndex = (bufferIndex + 1) % (2 * Z);
+	template <typename T>
+	T process(State<T>& s) const {
+		T v = s.buffer[s.bufferIndex];
+		s.buffer[s.bufferIndex] = T(0);
+		s.bufferIndex++;
+		if (s.bufferIndex >= 2 * Z) {
+			// Move second half of buffer to beginning
+			std::memcpy(s.buffer, s.buffer + 2 * Z, 2 * Z * sizeof(T));
+			std::memset(s.buffer + 2 * Z, 0, 2 * Z * sizeof(T));
+			s.bufferIndex = 0;
+		}
 		return v;
 	}
 };
 
 
+static MinBlep<16, 16> minBlep;
+
+
 template <typename T>
 struct VCOProcessor {
 	T phase = 0.f;
@@ -183,12 +201,12 @@ struct VCOProcessor {
 	OnePoleHighpass::State<T> dcFilterStateTri;
 	OnePoleHighpass::State<T> dcFilterStateSin;
 
-	MinBlepGenerator<16, 16, T> sqrMinBlep;
-	MinBlepGenerator<16, 16, T> sawMinBlep;
-	MinBlepGenerator<16, 16, T> sinMinBlep;
+	MinBlep<16, 16>::State<T> sqrMinBlep;
+	MinBlep<16, 16>::State<T> sawMinBlep;
+	MinBlep<16, 16>::State<T> sinMinBlep;
 
 	void setSampleTime(float sampleTime) {
-		dcFilter.setCutoff(std::min(0.4f, 40.f * sampleTime));
+		dcFilter.setCutoff(std::min(0.4f, 20.f * sampleTime));
 	}
 
 	struct Frame {
@@ -241,9 +259,9 @@ struct VCOProcessor {
 					if (wrapM & (1 << i)) {
 						T mask = simd::movemaskInverse<T>(1 << i);
 						// TODO: MinBlepGenerator::insertDiscontinuity() should handle subframes outside the range -1 < p <= 0 instead of failing silently.
-						float p = clamp(wrapCrossing[i] - 1.f, -1.f, 0.f);
+						float p = clamp(wrapCrossing[i], 0.f, 1.f);
 						T x = mask & (2.f * syncDirection);
-						sqrMinBlep.insertDiscontinuity(p, x);
+						minBlep.insertDiscontinuity(sqrMinBlep, p, x);
 					}
 				}
 			}
@@ -261,9 +279,9 @@ struct VCOProcessor {
 				for (int i = 0; i < frame.channels; i++) {
 					if (pw & (1 << i)) {
 						T mask = simd::movemaskInverse<T>(1 << i);
-						float p = clamp(pulseCrossing[i] - 1.f, -1.f, 0.f);
+						float p = clamp(pulseCrossing[i], 0.f, 1.f);
 						T x = mask & (-2.f * syncDirection);
-						sqrMinBlep.insertDiscontinuity(p, x);
+						minBlep.insertDiscontinuity(sqrMinBlep, p, x);
 					}
 				}
 			}
@@ -278,18 +296,19 @@ struct VCOProcessor {
 				for (int i = 0; i < frame.channels; i++) {
 					if (halfMask & (1 << i)) {
 						T mask = simd::movemaskInverse<T>(1 << i);
-						float p = halfCrossing[i] - 1.f;
+						float p = halfCrossing[i];
 						T x = mask & (-2.f * syncDirection);
-						sawMinBlep.insertDiscontinuity(p, x);
+						minBlep.insertDiscontinuity(sawMinBlep, p, x);
 					}
 				}
 			}
 		}
 
 		// Detect sync
-		// Might be NAN or outside of [0, 1) range
 		if (frame.syncEnabled) {
 			T deltaSync = frame.sync - lastSyncValue;
+			// Sample position where sync crosses 0.
+			// Might be NAN or outside of (0, 1] range
 			T syncCrossing = -lastSyncValue / deltaSync;
 			lastSyncValue = frame.sync;
 			T sync = (0.f < syncCrossing) & (syncCrossing <= 1.f) & (frame.sync >= 0.f);
@@ -304,21 +323,21 @@ struct VCOProcessor {
 					for (int i = 0; i < frame.channels; i++) {
 						if (syncMask & (1 << i)) {
 							T mask = simd::movemaskInverse<T>(1 << i);
-							float p = syncCrossing[i] - 1.f;
-							T x;
-							// Assume that hard-syncing a square always resets it to HIGH
+							float p = syncCrossing[i];
 							if (frame.sqrEnabled || frame.triEnabled) {
-								x = mask & (1.f - sqrState);
+								// Assume that hard-syncing a square always resets it to HIGH
+								T x = mask & (1.f - sqrState);
 								sqrState = simd::ifelse(mask, 1.f, sqrState);
-								sqrMinBlep.insertDiscontinuity(p, x);
+								minBlep.insertDiscontinuity(sqrMinBlep, p, x);
+								DEBUG("%f %f %f", p, x[i], sqrState[i]);
 							}
 							if (frame.sawEnabled) {
-								x = mask & (saw(newPhase) - saw(phase));
-								sawMinBlep.insertDiscontinuity(p, x);
+								T x = mask & (saw(newPhase) - saw(phase));
+								minBlep.insertDiscontinuity(sawMinBlep, p, x);
 							}
 							if (frame.sinEnabled) {
-								x = mask & (sin(newPhase) - sin(phase));
-								sinMinBlep.insertDiscontinuity(p, x);
+								T x = mask & (sin(newPhase) - sin(phase));
+								minBlep.insertDiscontinuity(sinMinBlep, p, x);
 							}
 						}
 					}
@@ -330,14 +349,14 @@ struct VCOProcessor {
 		// Saw
 		if (frame.sawEnabled) {
 			frame.saw = saw(phase);
-			frame.saw += sawMinBlep.process();
+			frame.saw += minBlep.process(sawMinBlep);
 			frame.saw = dcFilter.process(dcFilterStateSaw, frame.saw);
 		}
 
 		// Square
 		if (frame.sqrEnabled || frame.triEnabled) {
 			frame.sqr = sqrState;
-			frame.sqr += sqrMinBlep.process();
+			frame.sqr += minBlep.process(sqrMinBlep);
 			T triSqr = frame.sqr;
 			frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr);
 
@@ -359,7 +378,7 @@ struct VCOProcessor {
 		// Sin
 		if (frame.sinEnabled) {
 			frame.sin = sin(phase);
-			frame.sin += sinMinBlep.process();
+			frame.sin += minBlep.process(sinMinBlep);
 			frame.sin = dcFilter.process(dcFilterStateSin, frame.sin);
 		}
 	}
@@ -377,8 +396,7 @@ struct VCOProcessor {
 		return 1 - 4 * simd::fmin(simd::fabs(phase - 0.25f), simd::fabs(phase - 1.25f));
 	}
 	static T sin(T phase) {
-		return sin2pi_fast(phase);
-		// return simd::sin(2.f * M_PI * phase);
+		return sin_pi_9(2 * phase - 1);
 	}
 };
 

From eae28b72c040b9ece5fb85f634f6c826c79f7399 Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Wed, 31 Dec 2025 07:35:00 -0500
Subject: [PATCH 04/18] VCO: Correctly handle events like phase crossing, pulse
 width crossing, 0.5 saw crossing, and sync in any order. WIP.

---
 src/VCO.cpp | 321 ++++++++++++++++++++++++++++------------------------
 1 file changed, 176 insertions(+), 145 deletions(-)

diff --git a/src/VCO.cpp b/src/VCO.cpp
index 5cb955e..bd5c7e1 100644
--- a/src/VCO.cpp
+++ b/src/VCO.cpp
@@ -17,7 +17,7 @@ inline T sin_pi_9(T x) {
 t is the fractional position between y1 and y2.
 */
 template <typename T>
-T cubicInterp(T y0, T y1, T y2, T y3, T t) {
+T catmullRomInterpolate(T y0, T y1, T y2, T y3, T t) {
 	T t2 = t * t;
 	T t3 = t2 * t;
 	return y1 + T(0.5) * t * (y2 - y0)
@@ -90,11 +90,13 @@ struct OnePoleHighpass : OnePoleLowpass {
 };
 
 
+/** Holds a precomputed minBLEP impulse response, reordered to efficiently insert into an audio buffer.
+*/
 template <int Z, int O>
 struct MinBlep {
 	/** Reordered impulse response for cubic interpolation, minus 1.0.
 	Z dimension has +1 padding at start (for z-1 wrap) and +4 at end (for SIMD + z+1 wrap).
-	Access via getImpulse() which handles o wrapping and z offset.
+	Access via getImpulse() which handles O wrapping and Z offset.
 	*/
 	float impulseReordered[O][1 + 2 * Z + 4] = {};
 
@@ -102,7 +104,7 @@ struct MinBlep {
 		float impulse[2 * Z][O];
 		dsp::minBlepImpulse(Z, O, &impulse[0][0]);
 
-		// Fill impulseReordered with transposed data, minus 1.
+		// Fill impulseReordered with transposed and pre-subtracted data
 		// Storage index = z + 1 (to allow z = -1 access at index 0)
 		for (int o = 0; o < O; o++) {
 			// z = -1: before impulse starts
@@ -120,30 +122,25 @@ struct MinBlep {
 
 	/** Get pointer to impulse data, handling o wrapping and z offset. */
 	const float* getImpulse(int o, int z) const {
-		int index = z * O + o;
-		z = index / O;
-		o = index % O;
+		z += o / O;
+		o = o % O;
 		if (o < 0) {
 			z -= 1;
 			o += O;
 		}
+		// assert(0 <= o && o < O);
+		// assert(-1 <= z && z < 2 * Z + 4);
 		return &impulseReordered[o][z + 1];
 	}
 
-	template <typename T = float>
-	struct State {
-		// Double buffer of length 2 * Z
-		T buffer[4 * Z] = {};
-		int32_t bufferIndex = 0;
-	};
-
-	/** Places a discontinuity with magnitude `x` at 0 < p <= 1 relative to the current frame.
-	For example if a square wave will jump from 1 to -1 in 0.1 frames, use insertDiscontinuity(0.1, -2.0).
+	/** Places a discontinuity at 0 < p <= 1 relative to the current frame.
+	`out` must have enough space to write 2*Z floats, spaced by `stride` floats.
+	`p` is the subsample position to insert a discontinuity of `magnitude`.
+	For example if a square wave will jump from 1 to -1 in 0.1 frames, use insertDiscontinuity(out, 1, 0.1f, -2.f).
 
 	Note: In the deprecated MinBlepGenerator, `p` was in the range (-1, 0], so add 1 to p if updating to MinBlep.
 	*/
-	template <typename T>
-	void insertDiscontinuity(State<T>& s, float p, T x) const {
+	void insertDiscontinuity(float* out, int stride, float p, float magnitude) const {
 		if (!(0 < p && p <= 1))
 			return;
 
@@ -152,35 +149,61 @@ struct MinBlep {
 		int o = (int) subsample;
 		float t = subsample - o;
 
-		// For each zero crossing, cubic interpolate impulse response
+		// For each zero crossing, interpolate impulse response from oversample points
+		using simd::float_4;
 		for (int z = 0; z < 2 * Z; z += 4) {
-			simd::float_4 y0 = simd::float_4::load(getImpulse(o - 1, z));
-			simd::float_4 y1 = simd::float_4::load(getImpulse(o, z));
-			simd::float_4 y2 = simd::float_4::load(getImpulse(o + 1, z));
-			simd::float_4 y3 = simd::float_4::load(getImpulse(o + 2, z));
-			simd::float_4 ir = cubicInterp(y0, y1, y2, y3, T(t));
-
-			// Write to double buffer
-			for (int zz = 0; zz < 4; zz++) {
-				s.buffer[s.bufferIndex + z + zz] += ir[zz] * x;
+			float_4 y0 = float_4::load(getImpulse(o - 1, z));
+			float_4 y1 = float_4::load(getImpulse(o, z));
+			float_4 y2 = float_4::load(getImpulse(o + 1, z));
+			float_4 y3 = float_4::load(getImpulse(o + 2, z));
+			float_4 y = catmullRomInterpolate(y0, y1, y2, y3, float_4(t));
+			y *= magnitude;
+
+			// Write all 4 samples to buffer
+			for (int zi = 0; zi < 4; zi++) {
+				out[(z + zi) * stride] += y[zi];
 			}
 		}
 	}
+};
 
-	/** Should be called every frame after inserting any discontinuities. */
-	template <typename T>
-	T process(State<T>& s) const {
-		T v = s.buffer[s.bufferIndex];
-		s.buffer[s.bufferIndex] = T(0);
-		s.bufferIndex++;
-		if (s.bufferIndex >= 2 * Z) {
+
+/** Buffer that allows reading/writing up to N future elements contiguously.
+*/
+template <int N, typename T>
+struct MinBlepBuffer {
+	T buffer[2 * N] = {};
+	int32_t bufferIndex = 0;
+
+	T* startData() {
+		return &buffer[bufferIndex];
+	}
+
+	/** Returns the current element and advances the buffer.
+	*/
+	T shift() {
+		T v = buffer[bufferIndex];
+		bufferIndex++;
+		if (bufferIndex >= N) {
 			// Move second half of buffer to beginning
-			std::memcpy(s.buffer, s.buffer + 2 * Z, 2 * Z * sizeof(T));
-			std::memset(s.buffer + 2 * Z, 0, 2 * Z * sizeof(T));
-			s.bufferIndex = 0;
+			std::memcpy(buffer, buffer + N, N * sizeof(T));
+			std::memset(buffer + N, 0, N * sizeof(T));
+			bufferIndex = 0;
 		}
 		return v;
 	}
+
+	/** Copies `n` elements to `out` and advances the buffer.
+	*/
+	void shiftBuffer(T* out, size_t n) {
+		std::memcpy(out, buffer + bufferIndex, n * sizeof(T));
+		bufferIndex += n;
+		if (bufferIndex >= N) {
+			std::memcpy(buffer, buffer + N, N * sizeof(T));
+			std::memset(buffer + N, 0, N * sizeof(T));
+			bufferIndex = 0;
+		}
+	}
 };
 
 
@@ -190,9 +213,8 @@ static MinBlep<16, 16> minBlep;
 template <typename T>
 struct VCOProcessor {
 	T phase = 0.f;
-	T lastSyncValue = 0.f;
+	T lastSync = 0.f;
 	T syncDirection = 1.f;
-	T sqrState = 1.f;
 	T triFilterState = 0.f;
 
 	OnePoleHighpass dcFilter;
@@ -201,9 +223,9 @@ struct VCOProcessor {
 	OnePoleHighpass::State<T> dcFilterStateTri;
 	OnePoleHighpass::State<T> dcFilterStateSin;
 
-	MinBlep<16, 16>::State<T> sqrMinBlep;
-	MinBlep<16, 16>::State<T> sawMinBlep;
-	MinBlep<16, 16>::State<T> sinMinBlep;
+	MinBlepBuffer<16*2, T> sqrMinBlep;
+	MinBlepBuffer<16*2, T> sawMinBlep;
+	MinBlepBuffer<16*2, T> sinMinBlep;
 
 	void setSampleTime(float sampleTime) {
 		dcFilter.setCutoff(std::min(0.4f, 20.f * sampleTime));
@@ -231,143 +253,153 @@ struct VCOProcessor {
 		T sin = 0.f;
 	};
 
-	void process(Frame& frame, float deltaTime) {
-		// Advance phase
-		T deltaPhase = simd::clamp(frame.freq * deltaTime, 0.f, 0.49f);
+	void process(Frame& frame, float sampleTime) {
+		// Compute deltaPhase for full frame
+		T deltaPhase = simd::clamp(frame.freq * sampleTime, 0.f, 0.49f);
 		if (frame.soft) {
-			// Reverse direction
 			deltaPhase *= syncDirection;
 		}
 		else {
-			// Reset back to forward
 			syncDirection = 1.f;
 		}
-		phase += deltaPhase;
-
-		// Wrap phase
-		T phaseFloor = simd::floor(phase);
-		phase -= phaseFloor;
 
-		if (frame.sqrEnabled || frame.triEnabled) {
-			// Jump sqr when phase crosses 1, or crosses 0 if running backwards
-			T wrapMask = (phaseFloor != 0.f);
-			int wrapM = simd::movemask(wrapMask);
-			if (wrapM) {
-				T wrapPhase = (syncDirection == -1.f) & 1.f;
-				T wrapCrossing = (wrapPhase - (phase - deltaPhase)) / deltaPhase;
-				for (int i = 0; i < frame.channels; i++) {
-					if (wrapM & (1 << i)) {
-						T mask = simd::movemaskInverse<T>(1 << i);
-						// TODO: MinBlepGenerator::insertDiscontinuity() should handle subframes outside the range -1 < p <= 0 instead of failing silently.
-						float p = clamp(wrapCrossing[i], 0.f, 1.f);
-						T x = mask & (2.f * syncDirection);
-						minBlep.insertDiscontinuity(sqrMinBlep, p, x);
-					}
+		T prevPhase = phase;
+		const float pwMin = 0.01f;
+		T pulseWidth = simd::clamp(frame.pulseWidth, pwMin, 1.f - pwMin);
+
+		// Inserts minBLEP for each channel where mask is true.
+		// Serial but does nothing if mask is all zero.
+		auto insertMinBlep = [&](T mask, T p, T magnitude, MinBlepBuffer<16*2, T>& minBlepBuffer) {
+			int m = simd::movemask(mask);
+			if (!m)
+				return;
+			float* buffer = (float*) minBlepBuffer.startData();
+			for (int i = 0; i < frame.channels; i++) {
+				if (m & (1 << i)) {
+					minBlep.insertDiscontinuity(&buffer[i], 4, p[i], magnitude[i]);
 				}
 			}
-			sqrState = simd::ifelse(wrapMask, syncDirection, sqrState);
-
-			// Pulse width
-			const float pwMin = 0.01f;
-			T pulseWidth = simd::clamp(frame.pulseWidth, pwMin, 1.f - pwMin);
-
-			// Jump sqr when crossing `pulseWidth`
-			T pwMask = (syncDirection == sqrState) & ((syncDirection == 1.f) ^ (phase < pulseWidth));
-			int pw = simd::movemask(pwMask);
-			if (pw) {
-				T pulseCrossing = (pulseWidth - (phase - deltaPhase)) / deltaPhase;
-				for (int i = 0; i < frame.channels; i++) {
-					if (pw & (1 << i)) {
-						T mask = simd::movemaskInverse<T>(1 << i);
-						float p = clamp(pulseCrossing[i], 0.f, 1.f);
-						T x = mask & (-2.f * syncDirection);
-						minBlep.insertDiscontinuity(sqrMinBlep, p, x);
-					}
-				}
+		};
+
+		// Computes subsample time where phase crosses threshold.
+		auto getCrossing = [](T threshold, T startPhase, T endPhase, T startSubsample, T endSubsample) -> T {
+			// Wrap threshold mod 1 to be in (startPhase, startPhase + 1]
+			threshold -= simd::floor(threshold - startPhase);
+			// Map to (0, 1]
+			T p = (threshold - startPhase) / (endPhase - startPhase);
+			// Map to (startSubsample, endSubsample]
+			return startSubsample + p * (endSubsample - startSubsample);
+		};
+
+		// Processes wrap/pulse/saw crossings between startPhase and endPhase.
+		// startSubsample and endSubsample define the time range within the frame.
+		// channelMask limits processing to specific channels.
+		auto processCrossings = [&](T startPhase, T endPhase, T startSubsample, T endSubsample, T channelMask) {
+			if (frame.sqrEnabled || frame.triEnabled) {
+				// Wrap crossing (phase crosses 0 mod 1)
+				T wrapSubsample = getCrossing(1.f, startPhase, endPhase, startSubsample, endSubsample);
+				T wrapMask = channelMask & (startSubsample < wrapSubsample) & (wrapSubsample <= endSubsample);
+				insertMinBlep(wrapMask, wrapSubsample, 2.f * syncDirection, sqrMinBlep);
+
+				// Pulse width crossing
+				T pulseSubsample = getCrossing(pulseWidth, startPhase, endPhase, startSubsample, endSubsample);
+				T pulseMask = channelMask & (startSubsample < pulseSubsample) & (pulseSubsample <= endSubsample);
+				insertMinBlep(pulseMask, pulseSubsample, -2.f * syncDirection, sqrMinBlep);
 			}
-			sqrState = simd::ifelse(pwMask, -syncDirection, sqrState);
-		}
 
-		if (frame.sawEnabled) {
-			// Jump saw when crossing 0.5
-			T halfCrossing = (0.5f - (phase - deltaPhase)) / deltaPhase;
-			int halfMask = simd::movemask((0 < halfCrossing) & (halfCrossing <= 1.f));
-			if (halfMask) {
-				for (int i = 0; i < frame.channels; i++) {
-					if (halfMask & (1 << i)) {
-						T mask = simd::movemaskInverse<T>(1 << i);
-						float p = halfCrossing[i];
-						T x = mask & (-2.f * syncDirection);
-						minBlep.insertDiscontinuity(sawMinBlep, p, x);
-					}
-				}
+			if (frame.sawEnabled) {
+				// Saw crossing at 0.5
+				T sawSubsample = getCrossing(0.5f, startPhase, endPhase, startSubsample, endSubsample);
+				T sawMask = channelMask & (startSubsample < sawSubsample) & (sawSubsample <= endSubsample);
+				insertMinBlep(sawMask, sawSubsample, -2.f * syncDirection, sawMinBlep);
 			}
+		};
+
+		// Main logic
+		if (!frame.syncEnabled) {
+			// No sync. Process full frame
+			T endPhase = prevPhase + deltaPhase;
+			processCrossings(prevPhase, endPhase, 0.f, 1.f, T::mask());
+			phase = endPhase;
 		}
+		else {
+			// Compute sync subsample position
+			T deltaSync = frame.sync - lastSync;
+			T syncSubsample = -lastSync / deltaSync;
+			lastSync = frame.sync;
+			T syncOccurred = (0.f < syncSubsample) & (syncSubsample <= 1.f) & (frame.sync >= 0.f);
+			T noSync = ~syncOccurred;
+
+			if (simd::movemask(noSync)) {
+				// No sync for these channels. Process full frame
+				T endPhase = prevPhase + deltaPhase;
+				processCrossings(prevPhase, endPhase, 0.f, 1.f, noSync);
+				phase = simd::ifelse(noSync, endPhase, phase);
+			}
+
+			if (simd::movemask(syncOccurred)) {
+				// Process crossings before sync
+				T syncPhase = prevPhase + deltaPhase * syncSubsample;
+				processCrossings(prevPhase, syncPhase, 0.f, syncSubsample, syncOccurred);
 
-		// Detect sync
-		if (frame.syncEnabled) {
-			T deltaSync = frame.sync - lastSyncValue;
-			// Sample position where sync crosses 0.
-			// Might be NAN or outside of (0, 1] range
-			T syncCrossing = -lastSyncValue / deltaSync;
-			lastSyncValue = frame.sync;
-			T sync = (0.f < syncCrossing) & (syncCrossing <= 1.f) & (frame.sync >= 0.f);
-			int syncMask = simd::movemask(sync);
-			if (syncMask) {
 				if (frame.soft) {
-					syncDirection = simd::ifelse(sync, -syncDirection, syncDirection);
+					// Soft sync: Reverse direction, continue from syncPhase
+					syncDirection = simd::ifelse(syncOccurred, -syncDirection, syncDirection);
+					T endPhase = syncPhase + (-deltaPhase) * (1.f - syncSubsample);
+					processCrossings(syncPhase, endPhase, syncSubsample, 1.f, syncOccurred);
+					phase = simd::ifelse(syncOccurred, endPhase, phase);
 				}
 				else {
-					T newPhase = simd::ifelse(sync, (1.f - syncCrossing) * deltaPhase, phase);
-					// Insert minBLEP for sync
-					for (int i = 0; i < frame.channels; i++) {
-						if (syncMask & (1 << i)) {
-							T mask = simd::movemaskInverse<T>(1 << i);
-							float p = syncCrossing[i];
-							if (frame.sqrEnabled || frame.triEnabled) {
-								// Assume that hard-syncing a square always resets it to HIGH
-								T x = mask & (1.f - sqrState);
-								sqrState = simd::ifelse(mask, 1.f, sqrState);
-								minBlep.insertDiscontinuity(sqrMinBlep, p, x);
-								DEBUG("%f %f %f", p, x[i], sqrState[i]);
-							}
-							if (frame.sawEnabled) {
-								T x = mask & (saw(newPhase) - saw(phase));
-								minBlep.insertDiscontinuity(sawMinBlep, p, x);
-							}
-							if (frame.sinEnabled) {
-								T x = mask & (sin(newPhase) - sin(phase));
-								minBlep.insertDiscontinuity(sinMinBlep, p, x);
-							}
-						}
+					// Hard sync: reset to 0, insert discontinuities
+					if (frame.sqrEnabled || frame.triEnabled) {
+						// Sqr jumps from current state to +1
+						T sqrJump = (syncPhase - simd::floor(syncPhase) < pulseWidth);
+						insertMinBlep(syncOccurred & sqrJump, syncSubsample, 2.f, sqrMinBlep);
+					}
+					if (frame.triEnabled) {
+						// TODO Insert minBLEP to Tri
+					}
+					if (frame.sawEnabled) {
+						// Saw jumps from saw(syncPhase) to saw(0) = 0
+						insertMinBlep(syncOccurred, syncSubsample, -saw(syncPhase), sawMinBlep);
+					}
+					if (frame.sinEnabled) {
+						// sin jumps from sin(syncPhase) to sin(0) = 0
+						insertMinBlep(syncOccurred, syncSubsample, -sin(syncPhase), sinMinBlep);
 					}
-					phase = newPhase;
+
+					// Process crossings after sync (starting from phase 0)
+					T endPhase = deltaPhase * (1.f - syncSubsample);
+					processCrossings(0.f, endPhase, syncSubsample, 1.f, syncOccurred);
+					phase = simd::ifelse(syncOccurred, endPhase, phase);
 				}
 			}
 		}
 
-		// Saw
+		// Wrap phase to [0, 1)
+		phase -= simd::floor(phase);
+
+		// Generate outputs
 		if (frame.sawEnabled) {
 			frame.saw = saw(phase);
-			frame.saw += minBlep.process(sawMinBlep);
+			frame.saw += sawMinBlep.shift();
 			frame.saw = dcFilter.process(dcFilterStateSaw, frame.saw);
 		}
 
-		// Square
 		if (frame.sqrEnabled || frame.triEnabled) {
-			frame.sqr = sqrState;
-			frame.sqr += minBlep.process(sqrMinBlep);
+			T sqrHigh = (phase < pulseWidth) ^ syncDirection;
+			frame.sqr = simd::ifelse(sqrHigh, 1.f, -1.f);
+			frame.sqr += sqrMinBlep.shift();
 			T triSqr = frame.sqr;
 			frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr);
 
-			// Tri
 			if (frame.triEnabled) {
 				// Integrate square wave
 				const float triShape = 0.2f;
-				T triFreq = deltaTime * triShape * frame.freq;
-				// T alpha = 1.f - simd::exp(-2.f * M_PI * triFreq);
+				T triFreq = sampleTime * triShape * frame.freq;
 				// Use bilinear transform to derive alpha
 				T alpha = 1 / (1 + 1 / (M_PI * triFreq));
+				// T alpha = 1.f - simd::exp(-2.f * M_PI * triFreq);
 				triFilterState += alpha * (triSqr - triFilterState);
 				// Apply gain to roughly have unit amplitude at 0.5 pulseWidth. Depends on triShape.
 				frame.tri = triFilterState * 6.6f;
@@ -375,10 +407,9 @@ struct VCOProcessor {
 			}
 		}
 
-		// Sin
 		if (frame.sinEnabled) {
 			frame.sin = sin(phase);
-			frame.sin += minBlep.process(sinMinBlep);
+			frame.sin += sinMinBlep.shift();
 			frame.sin = dcFilter.process(dcFilterStateSin, frame.sin);
 		}
 	}

From a3bc9a9e4b453c2fd76e2772d8402e028c59e3b2 Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Thu, 1 Jan 2026 23:52:41 -0500
Subject: [PATCH 05/18] VCO: Fix square glitching when pulse width changes.

---
 src/VCO.cpp | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/VCO.cpp b/src/VCO.cpp
index bd5c7e1..cf0f2b7 100644
--- a/src/VCO.cpp
+++ b/src/VCO.cpp
@@ -213,8 +213,12 @@ static MinBlep<16, 16> minBlep;
 template <typename T>
 struct VCOProcessor {
 	T phase = 0.f;
-	T lastSync = 0.f;
+	/** 1 for forward, -1 for backward. */
 	T syncDirection = 1.f;
+	T lastSync = 0.f;
+	/** 1 or -1 */
+	T lastSqrState = 1.f;
+	/** Leaky integrator result. */
 	T triFilterState = 0.f;
 
 	OnePoleHighpass dcFilter;
@@ -315,7 +319,14 @@ struct VCOProcessor {
 			}
 		};
 
-		// Main logic
+		// Check if square value changed due to pulseWidth changing since last frame
+		if (frame.sqrEnabled || frame.triEnabled) {
+			T sqrState = simd::ifelse(prevPhase < pulseWidth, 1.f, -1.f);
+			T changed = (sqrState != lastSqrState);
+			T magnitude = (sqrState - lastSqrState) * syncDirection;
+			insertMinBlep(changed, 1e-6f, magnitude, sqrMinBlep);
+		}
+
 		if (!frame.syncEnabled) {
 			// No sync. Process full frame
 			T endPhase = prevPhase + deltaPhase;
@@ -387,8 +398,8 @@ struct VCOProcessor {
 		}
 
 		if (frame.sqrEnabled || frame.triEnabled) {
-			T sqrHigh = (phase < pulseWidth) ^ syncDirection;
-			frame.sqr = simd::ifelse(sqrHigh, 1.f, -1.f);
+			frame.sqr = simd::ifelse(phase < pulseWidth, 1.f, -1.f);
+			lastSqrState = frame.sqr;
 			frame.sqr += sqrMinBlep.shift();
 			T triSqr = frame.sqr;
 			frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr);

From dec738ffde090bd4d912218c7551739040604dd1 Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Fri, 2 Jan 2026 01:59:46 -0500
Subject: [PATCH 06/18] VCO: Fix minBLEP cross detection for backwards phase
 and narrow pulse width.

---
 src/VCO.cpp | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/VCO.cpp b/src/VCO.cpp
index cf0f2b7..e987b97 100644
--- a/src/VCO.cpp
+++ b/src/VCO.cpp
@@ -286,12 +286,13 @@ struct VCOProcessor {
 		};
 
 		// Computes subsample time where phase crosses threshold.
-		auto getCrossing = [](T threshold, T startPhase, T endPhase, T startSubsample, T endSubsample) -> T {
-			// Wrap threshold mod 1 to be in (startPhase, startPhase + 1]
-			threshold -= simd::floor(threshold - startPhase);
-			// Map to (0, 1]
-			T p = (threshold - startPhase) / (endPhase - startPhase);
-			// Map to (startSubsample, endSubsample]
+		auto getCrossing = [](T thresholdPhase, T startPhase, T endPhase, T startSubsample, T endSubsample) -> T {
+			T delta = endPhase - startPhase;
+			T diff = thresholdPhase - startPhase;
+			// Forward: wrap thresholdPhase to (startPhase, startPhase+1]
+			// Backward: wrap thresholdPhase to [startPhase-1, startPhase)
+			thresholdPhase -= simd::ifelse(delta >= 0.f, simd::floor(diff), simd::ceil(diff));
+			T p = (thresholdPhase - startPhase) / delta;
 			return startSubsample + p * (endSubsample - startSubsample);
 		};
 
@@ -300,19 +301,19 @@ struct VCOProcessor {
 		// channelMask limits processing to specific channels.
 		auto processCrossings = [&](T startPhase, T endPhase, T startSubsample, T endSubsample, T channelMask) {
 			if (frame.sqrEnabled || frame.triEnabled) {
-				// Wrap crossing (phase crosses 0 mod 1)
+				// Insert minBLEP to square when phase crosses 0 (mod 1)
 				T wrapSubsample = getCrossing(1.f, startPhase, endPhase, startSubsample, endSubsample);
 				T wrapMask = channelMask & (startSubsample < wrapSubsample) & (wrapSubsample <= endSubsample);
 				insertMinBlep(wrapMask, wrapSubsample, 2.f * syncDirection, sqrMinBlep);
 
-				// Pulse width crossing
+				// Insert minBLEP to square when phase crosses pulse width
 				T pulseSubsample = getCrossing(pulseWidth, startPhase, endPhase, startSubsample, endSubsample);
 				T pulseMask = channelMask & (startSubsample < pulseSubsample) & (pulseSubsample <= endSubsample);
 				insertMinBlep(pulseMask, pulseSubsample, -2.f * syncDirection, sqrMinBlep);
 			}
 
 			if (frame.sawEnabled) {
-				// Saw crossing at 0.5
+				// Insert minBLEP to saw when crossing 0.5
 				T sawSubsample = getCrossing(0.5f, startPhase, endPhase, startSubsample, endSubsample);
 				T sawMask = channelMask & (startSubsample < sawSubsample) & (sawSubsample <= endSubsample);
 				insertMinBlep(sawMask, sawSubsample, -2.f * syncDirection, sawMinBlep);

From ebfd4fc64a2264f56e208ef3872f86b24fa3fc6c Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Fri, 2 Jan 2026 04:30:33 -0500
Subject: [PATCH 07/18] VCO: Fix sine phase.

---
 src/VCO.cpp | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/VCO.cpp b/src/VCO.cpp
index e987b97..65d7e20 100644
--- a/src/VCO.cpp
+++ b/src/VCO.cpp
@@ -324,7 +324,7 @@ struct VCOProcessor {
 		if (frame.sqrEnabled || frame.triEnabled) {
 			T sqrState = simd::ifelse(prevPhase < pulseWidth, 1.f, -1.f);
 			T changed = (sqrState != lastSqrState);
-			T magnitude = (sqrState - lastSqrState) * syncDirection;
+			T magnitude = sqrState - lastSqrState;
 			insertMinBlep(changed, 1e-6f, magnitude, sqrMinBlep);
 		}
 
@@ -339,7 +339,8 @@ struct VCOProcessor {
 			T deltaSync = frame.sync - lastSync;
 			T syncSubsample = -lastSync / deltaSync;
 			lastSync = frame.sync;
-			T syncOccurred = (0.f < syncSubsample) & (syncSubsample <= 1.f) & (frame.sync >= 0.f);
+			// Check if sync rises through 0
+			T syncOccurred = (0.f < syncSubsample) & (syncSubsample <= 1.f) & (frame.sync >= lastSync);
 			T noSync = ~syncOccurred;
 
 			if (simd::movemask(noSync)) {
@@ -353,6 +354,8 @@ struct VCOProcessor {
 				// Process crossings before sync
 				T syncPhase = prevPhase + deltaPhase * syncSubsample;
 				processCrossings(prevPhase, syncPhase, 0.f, syncSubsample, syncOccurred);
+				// Wrap sync phase
+				syncPhase -= simd::floor(syncPhase);
 
 				if (frame.soft) {
 					// Soft sync: Reverse direction, continue from syncPhase
@@ -362,10 +365,10 @@ struct VCOProcessor {
 					phase = simd::ifelse(syncOccurred, endPhase, phase);
 				}
 				else {
-					// Hard sync: reset to 0, insert discontinuities
+					// Hard sync: Reset phase from syncPhase to 0 at syncSubsample, insert discontinuities
 					if (frame.sqrEnabled || frame.triEnabled) {
-						// Sqr jumps from current state to +1
-						T sqrJump = (syncPhase - simd::floor(syncPhase) < pulseWidth);
+						// Check if square jumps from -1 to +1
+						T sqrJump = (syncPhase >= pulseWidth);
 						insertMinBlep(syncOccurred & sqrJump, syncSubsample, 2.f, sqrMinBlep);
 					}
 					if (frame.triEnabled) {
@@ -439,7 +442,7 @@ struct VCOProcessor {
 		return 1 - 4 * simd::fmin(simd::fabs(phase - 0.25f), simd::fabs(phase - 1.25f));
 	}
 	static T sin(T phase) {
-		return sin_pi_9(2 * phase - 1);
+		return sin_pi_9(1 - 2 * phase);
 	}
 };
 

From c8ccf9162b1d06ca17fcca2cef2351ad527407c0 Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Fri, 2 Jan 2026 04:44:34 -0500
Subject: [PATCH 08/18] VCO: Fix sync triggering when falling through 0.

---
 src/VCO.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/VCO.cpp b/src/VCO.cpp
index 65d7e20..6a612bc 100644
--- a/src/VCO.cpp
+++ b/src/VCO.cpp
@@ -340,7 +340,7 @@ struct VCOProcessor {
 			T syncSubsample = -lastSync / deltaSync;
 			lastSync = frame.sync;
 			// Check if sync rises through 0
-			T syncOccurred = (0.f < syncSubsample) & (syncSubsample <= 1.f) & (frame.sync >= lastSync);
+			T syncOccurred = (0.f < syncSubsample) & (syncSubsample <= 1.f) & (deltaSync >= 0.f);
 			T noSync = ~syncOccurred;
 
 			if (simd::movemask(noSync)) {

From 6e908fd93b716f1249d2de4fee7d51677bdbe14b Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Sun, 4 Jan 2026 02:14:05 -0500
Subject: [PATCH 09/18] VCO: Use linear interpolation instead of Catmull-Rom.
 Generate minBLEP impulse with doubles instead of floats.

---
 src/VCO.cpp | 166 ++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 136 insertions(+), 30 deletions(-)

diff --git a/src/VCO.cpp b/src/VCO.cpp
index 6a612bc..71f6c0a 100644
--- a/src/VCO.cpp
+++ b/src/VCO.cpp
@@ -13,18 +13,6 @@ inline T sin_pi_9(T x) {
 	return x * (T(1) - x2) * (T(3.141521108990) + x2 * (T(-2.024773594411) + x2 * (T(0.517493161073) + x2 * T(-0.063691343423))));
 }
 
-/** Catmull-Rom cubic interpolation
-t is the fractional position between y1 and y2.
-*/
-template <typename T>
-T catmullRomInterpolate(T y0, T y1, T y2, T y3, T t) {
-	T t2 = t * t;
-	T t3 = t2 * t;
-	return y1 + T(0.5) * t * (y2 - y0)
-		+ t2 * (y0 - T(2.5)*y1 + T(2)*y2 - T(0.5)*y3)
-		+ t3 * (T(-0.5)*y0 + T(1.5)*y1 - T(1.5)*y2 + T(0.5)*y3);
-}
-
 /** One-pole low-pass filter, exponential moving average.
 -6 dB/octave slope.
 Useful for leaky integrators and smoothing control signals.
@@ -90,37 +78,157 @@ struct OnePoleHighpass : OnePoleLowpass {
 };
 
 
+/** Simple radix-2 Cooley-Tukey FFT. n must be a power of 2. */
+template <typename T>
+void fft(std::complex<T>* x, int n, bool inverse = false) {
+	// Bit reversal permutation
+	for (int i = 1, j = 0; i < n; i++) {
+		int bit = n >> 1;
+		for (; j & bit; bit >>= 1)
+			j ^= bit;
+		j ^= bit;
+		if (i < j)
+			std::swap(x[i], x[j]);
+	}
+
+	// Cooley-Tukey butterflies
+	for (int len = 2; len <= n; len <<= 1) {
+		T ang = (inverse ? 1 : -1) * 2 * M_PI / len;
+		std::complex<T> wn(std::cos(ang), std::sin(ang));
+		for (int i = 0; i < n; i += len) {
+			std::complex<T> w(1);
+			for (int j = 0; j < len / 2; j++) {
+				std::complex<T> u = x[i + j];
+				std::complex<T> v = x[i + j + len / 2] * w;
+				x[i + j] = u + v;
+				x[i + j + len / 2] = u - v;
+				w *= wn;
+			}
+		}
+	}
+
+	if (inverse) {
+		for (int i = 0; i < n; i++)
+			x[i] /= T(n);
+	}
+}
+
+
+template <typename T>
+inline T blackmanHarris(T p) {
+	return
+	  + T(0.35875)
+	  - T(0.48829) * simd::cos(T(2 * M_PI) * p)
+	  + T(0.14128) * simd::cos(T(4 * M_PI) * p)
+	  - T(0.01168) * simd::cos(T(6 * M_PI) * p);
+}
+
+
+/** Computes the minimum-phase bandlimited step (minBLEP), an impulse step response that rises from 0 to 1.
+Z: number of zero-crossings on each side of the original symmetric sync signal
+O: oversample factor
+output: must be length `(2 * Z) * O`.
+
+Algorithm from "Hard Sync Without Aliasing" by Eli Brandt (2001).
+https://www.cs.cmu.edu/~eli/papers/icmc01-hardsync.pdf
+*/
+inline void minBlepImpulse(int z, int o, float* output) {
+	// Symmetric sinc impulse with `z` zero-crossings on each side
+	int n = 2 * z * o;
+	std::complex<double>* x = new std::complex<double>[n];
+	for (int i = 0; i < n; i++) {
+		double p = (double) i / o - z;
+		x[i] = (p == 0.0) ? 1.0 : std::sin(M_PI * p) / (M_PI * p);
+	}
+
+	// Apply window
+	for (int i = 0; i < n; i++) {
+		x[i] *= blackmanHarris(i / (double) (n - 1));
+	}
+
+	// Reconstruct impulse response with minimum phase
+	fft(x, n);
+
+	// Take log of magnitude and set phase to zero.
+	// Limit frequency bin to e^-10
+	for (int i = 0; i < n; i++) {
+		x[i] = std::log(std::abs(x[i]));
+	}
+
+
+	// Transform to cepstral domain.
+	fft(x, n, true);
+
+	// Apply minimum-phase window in cepstral domain
+	// Double positive quefrencies, zero negative quefrencies
+	for (int i = 1; i < n / 2; i++) {
+		x[i] *= 2.0;
+	}
+	for (int i = n / 2; i < n; i++) {
+		x[i] = 0.0;
+	}
+
+	// Transform back to frequency domain
+	fft(x, n);
+
+	// Take complex exponential of each bin
+	for (int i = 0; i < n; i++) {
+		x[i] = std::exp(x[i]);
+	}
+
+	// Transform to time domain
+	fft(x, n, true);
+
+	// Integrate
+	double total = 0.0;
+	for (int i = 0; i < n; i++) {
+		total += x[i].real();
+		x[i] = total;
+	}
+
+	// Renormalize so last sample is 1
+	for (int i = 0; i < n; i++) {
+		output[i] = x[i].real() / total;
+	}
+
+	FILE* f = fopen("plugins/Fundamental/minblep.txt", "w");
+	DEFER({fclose(f);});
+	for (int i = 0; i < n; i++) {
+		fprintf(f, "%.12f\n", output[i]);
+	}
+
+	delete[] x;
+}
+
+
 /** Holds a precomputed minBLEP impulse response, reordered to efficiently insert into an audio buffer.
 */
 template <int Z, int O>
 struct MinBlep {
-	/** Reordered impulse response for cubic interpolation, minus 1.0.
-	Z dimension has +1 padding at start (for z-1 wrap) and +4 at end (for SIMD + z+1 wrap).
-	Access via getImpulse() which handles O wrapping and Z offset.
+	/** Reordered impulse response for linear interpolation, minus 1.0.
+	Z dimension has +4 padding at end for SIMD and o+1 wrap.
+	Access via getImpulse() which handles O wrapping.
 	*/
-	float impulseReordered[O][1 + 2 * Z + 4] = {};
+	float impulseReordered[O][2 * Z + 4] = {};
 
 	MinBlep() {
 		float impulse[2 * Z][O];
-		dsp::minBlepImpulse(Z, O, &impulse[0][0]);
+		minBlepImpulse(Z, O, &impulse[0][0]);
 
 		// Fill impulseReordered with transposed and pre-subtracted data
-		// Storage index = z + 1 (to allow z = -1 access at index 0)
 		for (int o = 0; o < O; o++) {
-			// z = -1: before impulse starts
-			impulseReordered[o][0] = -1.f;
 			// z = 0 to 2*Z-1: actual impulse data
 			for (int z = 0; z < 2 * Z; z++) {
-				impulseReordered[o][1 + z] = impulse[z][o] - 1.f;
+				impulseReordered[o][z] = impulse[z][o] - 1.f;
 			}
 			// z = 2*Z to 2*Z+3: after impulse ends (for SIMD padding)
 			for (int z = 2 * Z; z < 2 * Z + 4; z++) {
-				impulseReordered[o][1 + z] = 0.f;
+				impulseReordered[o][z] = 0.f;
 			}
 		}
 	}
 
-	/** Get pointer to impulse data, handling o wrapping and z offset. */
+	/** Get pointer to impulse data, handling o wrapping. */
 	const float* getImpulse(int o, int z) const {
 		z += o / O;
 		o = o % O;
@@ -129,8 +237,8 @@ struct MinBlep {
 			o += O;
 		}
 		// assert(0 <= o && o < O);
-		// assert(-1 <= z && z < 2 * Z + 4);
-		return &impulseReordered[o][z + 1];
+		// assert(0 <= z && z < 2 * Z + 4);
+		return &impulseReordered[o][z];
 	}
 
 	/** Places a discontinuity at 0 < p <= 1 relative to the current frame.
@@ -149,14 +257,12 @@ struct MinBlep {
 		int o = (int) subsample;
 		float t = subsample - o;
 
-		// For each zero crossing, interpolate impulse response from oversample points
-		using simd::float_4;
+		// For each zero crossing, linearly interpolate impulse response from oversample points
 		for (int z = 0; z < 2 * Z; z += 4) {
-			float_4 y0 = float_4::load(getImpulse(o - 1, z));
+			using simd::float_4;
 			float_4 y1 = float_4::load(getImpulse(o, z));
 			float_4 y2 = float_4::load(getImpulse(o + 1, z));
-			float_4 y3 = float_4::load(getImpulse(o + 2, z));
-			float_4 y = catmullRomInterpolate(y0, y1, y2, y3, float_4(t));
+			float_4 y = y1 + t * (y2 - y1);
 			y *= magnitude;
 
 			// Write all 4 samples to buffer

From 2f3892bc71f3662db642dd549af90ceab3c4f997 Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Mon, 5 Jan 2026 01:33:51 -0500
Subject: [PATCH 10/18] VCO: Make triangle output based on phase, not a square
 integrator. Add minBLAMP impulse to MinBlep class.

---
 src/VCO.cpp | 186 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 106 insertions(+), 80 deletions(-)

diff --git a/src/VCO.cpp b/src/VCO.cpp
index 71f6c0a..fda2f17 100644
--- a/src/VCO.cpp
+++ b/src/VCO.cpp
@@ -179,22 +179,16 @@ inline void minBlepImpulse(int z, int o, float* output) {
 	// Transform to time domain
 	fft(x, n, true);
 
-	// Integrate
+	// Integrate. First sample x[0] should be 0.
 	double total = 0.0;
 	for (int i = 0; i < n; i++) {
-		total += x[i].real();
-		x[i] = total;
+		output[i] = total;
+		total += x[i].real() / o;
 	}
 
-	// Renormalize so last sample is 1
+	// Renormalize so last virtual sample x[n] should be exactly 1
 	for (int i = 0; i < n; i++) {
-		output[i] = x[i].real() / total;
-	}
-
-	FILE* f = fopen("plugins/Fundamental/minblep.txt", "w");
-	DEFER({fclose(f);});
-	for (int i = 0; i < n; i++) {
-		fprintf(f, "%.12f\n", output[i]);
+		output[i] /= total;
 	}
 
 	delete[] x;
@@ -207,61 +201,76 @@ template <int Z, int O>
 struct MinBlep {
 	/** Reordered impulse response for linear interpolation, minus 1.0.
 	Z dimension has +4 padding at end for SIMD and o+1 wrap.
-	Access via getImpulse() which handles O wrapping.
 	*/
 	float impulseReordered[O][2 * Z + 4] = {};
+	float rampReordered[O][2 * Z + 4] = {};
 
 	MinBlep() {
-		float impulse[2 * Z][O];
-		minBlepImpulse(Z, O, &impulse[0][0]);
+		float impulse[2 * Z * O];
+		minBlepImpulse(Z, O, impulse);
+
+		// Subtract 1 so our step impulse goes from -1 to 0.
+		for (int i = 0; i < 2*Z*O; i++) {
+			impulse[i] -= 1.f;
+		}
 
-		// Fill impulseReordered with transposed and pre-subtracted data
+		// Integrate minBLEP to obtain minBLAMP
+		float ramp[2 * Z * O];
+		double total = 0.0;
+		for (int i = 0; i < 2*Z*O; i++) {
+			ramp[i] = total;
+			total += impulse[i] / O;
+		}
+		// Subtract ideal ramp so ramp[0] and the virtual ramp[n] are 0
+		for (int i = 0; i < 2*Z*O; i++) {
+			ramp[i] -= (float) i / (2*Z*O) * total;
+		}
+
+		// Transpose samples by making z values contiguous for each o
 		for (int o = 0; o < O; o++) {
-			// z = 0 to 2*Z-1: actual impulse data
 			for (int z = 0; z < 2 * Z; z++) {
-				impulseReordered[o][z] = impulse[z][o] - 1.f;
-			}
-			// z = 2*Z to 2*Z+3: after impulse ends (for SIMD padding)
-			for (int z = 2 * Z; z < 2 * Z + 4; z++) {
-				impulseReordered[o][z] = 0.f;
+				impulseReordered[o][z] = impulse[z * O + o];
 			}
 		}
-	}
-
-	/** Get pointer to impulse data, handling o wrapping. */
-	const float* getImpulse(int o, int z) const {
-		z += o / O;
-		o = o % O;
-		if (o < 0) {
-			z -= 1;
-			o += O;
+		for (int o = 0; o < O; o++) {
+			for (int z = 0; z < 2 * Z; z++) {
+				rampReordered[o][z] = ramp[z * O + o];
+			}
 		}
-		// assert(0 <= o && o < O);
-		// assert(0 <= z && z < 2 * Z + 4);
-		return &impulseReordered[o][z];
 	}
 
-	/** Places a discontinuity at 0 < p <= 1 relative to the current frame.
+	/** Places a discontinuity at 0 < subsample <= 1 relative to the current frame.
 	`out` must have enough space to write 2*Z floats, spaced by `stride` floats.
-	`p` is the subsample position to insert a discontinuity of `magnitude`.
+	`subsample` is the subsample position to insert a discontinuity of `magnitude`.
 	For example if a square wave will jump from 1 to -1 in 0.1 frames, use insertDiscontinuity(out, 1, 0.1f, -2.f).
 
-	Note: In the deprecated MinBlepGenerator, `p` was in the range (-1, 0], so add 1 to p if updating to MinBlep.
+	Note: In the deprecated MinBlepGenerator, `subsample` was in the range (-1, 0], so add 1 to subsample if updating to MinBlep.
 	*/
-	void insertDiscontinuity(float* out, int stride, float p, float magnitude) const {
-		if (!(0 < p && p <= 1))
+	void insertDiscontinuity(float subsample, float magnitude, float* out, int stride = 1) const {
+		insert(impulseReordered, subsample, magnitude, out, stride);
+	}
+
+	void insertRampDiscontinuity(float subsample, float magnitude, float* out, int stride = 1) const {
+		insert(rampReordered, subsample, magnitude, out, stride);
+	}
+
+private:
+	void insert(const float table[O][2 * Z + 4], float subsample, float magnitude, float* out, int stride = 1) const {
+		if (!(0.f < subsample && subsample <= 1.f))
 			return;
 
 		// Calculate impulse array index and fractional part
-		float subsample = (1 - p) * O;
-		int o = (int) subsample;
-		float t = subsample - o;
+		float t = (1.f - subsample) * O;
+		int o = (int) t;
+		t -= o;
 
 		// For each zero crossing, linearly interpolate impulse response from oversample points
 		for (int z = 0; z < 2 * Z; z += 4) {
 			using simd::float_4;
-			float_4 y1 = float_4::load(getImpulse(o, z));
-			float_4 y2 = float_4::load(getImpulse(o + 1, z));
+			float_4 y1 = float_4::load(&table[o][z]);
+			int o2 = (o + 1) % O;
+			int z2 = z + (o + 1) / O;
+			float_4 y2 = float_4::load(&table[o2][z2]);
 			float_4 y = y1 + t * (y2 - y1);
 			y *= magnitude;
 
@@ -313,7 +322,10 @@ struct MinBlepBuffer {
 };
 
 
-static MinBlep<16, 16> minBlep;
+static const MinBlep<16, 16>& getMinBlep() {
+	static MinBlep<16, 16> minBlep;
+	return minBlep;
+}
 
 
 template <typename T>
@@ -324,8 +336,6 @@ struct VCOProcessor {
 	T lastSync = 0.f;
 	/** 1 or -1 */
 	T lastSqrState = 1.f;
-	/** Leaky integrator result. */
-	T triFilterState = 0.f;
 
 	OnePoleHighpass dcFilter;
 	OnePoleHighpass::State<T> dcFilterStateSqr;
@@ -335,6 +345,7 @@ struct VCOProcessor {
 
 	MinBlepBuffer<16*2, T> sqrMinBlep;
 	MinBlepBuffer<16*2, T> sawMinBlep;
+	MinBlepBuffer<16*2, T> triMinBlep;
 	MinBlepBuffer<16*2, T> sinMinBlep;
 
 	void setSampleTime(float sampleTime) {
@@ -379,14 +390,26 @@ struct VCOProcessor {
 
 		// Inserts minBLEP for each channel where mask is true.
 		// Serial but does nothing if mask is all zero.
-		auto insertMinBlep = [&](T mask, T p, T magnitude, MinBlepBuffer<16*2, T>& minBlepBuffer) {
+		auto insertDiscontinuity = [&](T mask, T subsample, T magnitude, MinBlepBuffer<16*2, T>& buffer) {
 			int m = simd::movemask(mask);
 			if (!m)
 				return;
-			float* buffer = (float*) minBlepBuffer.startData();
 			for (int i = 0; i < frame.channels; i++) {
 				if (m & (1 << i)) {
-					minBlep.insertDiscontinuity(&buffer[i], 4, p[i], magnitude[i]);
+					float* x = (float*) buffer.startData();
+					getMinBlep().insertDiscontinuity(subsample[i], magnitude[i], &x[i], 4);
+				}
+			}
+		};
+
+		auto insertRampDiscontinuity = [&](T mask, T subsample, T magnitude, MinBlepBuffer<16*2, T>& buffer) {
+			int m = simd::movemask(mask);
+			if (!m)
+				return;
+			for (int i = 0; i < frame.channels; i++) {
+				if (m & (1 << i)) {
+					float* x = (float*) buffer.startData();
+					getMinBlep().insertRampDiscontinuity(subsample[i], magnitude[i], &x[i], 4);
 				}
 			}
 		};
@@ -406,32 +429,43 @@ struct VCOProcessor {
 		// startSubsample and endSubsample define the time range within the frame.
 		// channelMask limits processing to specific channels.
 		auto processCrossings = [&](T startPhase, T endPhase, T startSubsample, T endSubsample, T channelMask) {
-			if (frame.sqrEnabled || frame.triEnabled) {
+			if (frame.sqrEnabled) {
 				// Insert minBLEP to square when phase crosses 0 (mod 1)
 				T wrapSubsample = getCrossing(1.f, startPhase, endPhase, startSubsample, endSubsample);
-				T wrapMask = channelMask & (startSubsample < wrapSubsample) & (wrapSubsample <= endSubsample);
-				insertMinBlep(wrapMask, wrapSubsample, 2.f * syncDirection, sqrMinBlep);
+				T mask = channelMask & (startSubsample < wrapSubsample) & (wrapSubsample <= endSubsample);
+				insertDiscontinuity(mask, wrapSubsample, 2.f * syncDirection, sqrMinBlep);
 
 				// Insert minBLEP to square when phase crosses pulse width
 				T pulseSubsample = getCrossing(pulseWidth, startPhase, endPhase, startSubsample, endSubsample);
-				T pulseMask = channelMask & (startSubsample < pulseSubsample) & (pulseSubsample <= endSubsample);
-				insertMinBlep(pulseMask, pulseSubsample, -2.f * syncDirection, sqrMinBlep);
+				mask = channelMask & (startSubsample < pulseSubsample) & (pulseSubsample <= endSubsample);
+				insertDiscontinuity(mask, pulseSubsample, -2.f * syncDirection, sqrMinBlep);
 			}
 
 			if (frame.sawEnabled) {
 				// Insert minBLEP to saw when crossing 0.5
 				T sawSubsample = getCrossing(0.5f, startPhase, endPhase, startSubsample, endSubsample);
-				T sawMask = channelMask & (startSubsample < sawSubsample) & (sawSubsample <= endSubsample);
-				insertMinBlep(sawMask, sawSubsample, -2.f * syncDirection, sawMinBlep);
+				T mask = channelMask & (startSubsample < sawSubsample) & (sawSubsample <= endSubsample);
+				insertDiscontinuity(mask, sawSubsample, -2.f * syncDirection, sawMinBlep);
+			}
+
+			if (frame.triEnabled) {
+				// Insert minBLAMP to tri when crossing 0.25, slope from +4 to -4
+				T triSubsample = getCrossing(0.25f, startPhase, endPhase, startSubsample, endSubsample);
+				T mask = channelMask & (startSubsample < triSubsample) & (triSubsample <= endSubsample);
+				insertRampDiscontinuity(mask, triSubsample, -8.f * deltaPhase, triMinBlep);
+				// Insert minBLAMP to tri when crossing 0.75, slope from -4 to +4
+				triSubsample = getCrossing(0.75f, startPhase, endPhase, startSubsample, endSubsample);
+				mask = channelMask & (startSubsample < triSubsample) & (triSubsample <= endSubsample);
+				insertRampDiscontinuity(mask, triSubsample, 8.f * deltaPhase, triMinBlep);
 			}
 		};
 
 		// Check if square value changed due to pulseWidth changing since last frame
-		if (frame.sqrEnabled || frame.triEnabled) {
+		if (frame.sqrEnabled) {
 			T sqrState = simd::ifelse(prevPhase < pulseWidth, 1.f, -1.f);
-			T changed = (sqrState != lastSqrState);
 			T magnitude = sqrState - lastSqrState;
-			insertMinBlep(changed, 1e-6f, magnitude, sqrMinBlep);
+			T changed = (magnitude != 0.f);
+			insertDiscontinuity(changed, 1e-6f, magnitude, sqrMinBlep);
 		}
 
 		if (!frame.syncEnabled) {
@@ -472,21 +506,21 @@ struct VCOProcessor {
 				}
 				else {
 					// Hard sync: Reset phase from syncPhase to 0 at syncSubsample, insert discontinuities
-					if (frame.sqrEnabled || frame.triEnabled) {
+					if (frame.sqrEnabled) {
 						// Check if square jumps from -1 to +1
 						T sqrJump = (syncPhase >= pulseWidth);
-						insertMinBlep(syncOccurred & sqrJump, syncSubsample, 2.f, sqrMinBlep);
-					}
-					if (frame.triEnabled) {
-						// TODO Insert minBLEP to Tri
+						insertDiscontinuity(syncOccurred & sqrJump, syncSubsample, 2.f, sqrMinBlep);
 					}
 					if (frame.sawEnabled) {
 						// Saw jumps from saw(syncPhase) to saw(0) = 0
-						insertMinBlep(syncOccurred, syncSubsample, -saw(syncPhase), sawMinBlep);
+						insertDiscontinuity(syncOccurred, syncSubsample, -saw(syncPhase), sawMinBlep);
+					}
+					if (frame.triEnabled) {
+						insertDiscontinuity(syncOccurred, syncSubsample, -tri(syncPhase), triMinBlep);
 					}
 					if (frame.sinEnabled) {
 						// sin jumps from sin(syncPhase) to sin(0) = 0
-						insertMinBlep(syncOccurred, syncSubsample, -sin(syncPhase), sinMinBlep);
+						insertDiscontinuity(syncOccurred, syncSubsample, -sin(syncPhase), sinMinBlep);
 					}
 
 					// Process crossings after sync (starting from phase 0)
@@ -507,25 +541,17 @@ struct VCOProcessor {
 			frame.saw = dcFilter.process(dcFilterStateSaw, frame.saw);
 		}
 
-		if (frame.sqrEnabled || frame.triEnabled) {
+		if (frame.sqrEnabled) {
 			frame.sqr = simd::ifelse(phase < pulseWidth, 1.f, -1.f);
 			lastSqrState = frame.sqr;
 			frame.sqr += sqrMinBlep.shift();
-			T triSqr = frame.sqr;
 			frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr);
+		}
 
-			if (frame.triEnabled) {
-				// Integrate square wave
-				const float triShape = 0.2f;
-				T triFreq = sampleTime * triShape * frame.freq;
-				// Use bilinear transform to derive alpha
-				T alpha = 1 / (1 + 1 / (M_PI * triFreq));
-				// T alpha = 1.f - simd::exp(-2.f * M_PI * triFreq);
-				triFilterState += alpha * (triSqr - triFilterState);
-				// Apply gain to roughly have unit amplitude at 0.5 pulseWidth. Depends on triShape.
-				frame.tri = triFilterState * 6.6f;
-				frame.tri = dcFilter.process(dcFilterStateTri, frame.tri);
-			}
+		if (frame.triEnabled) {
+			frame.tri = tri(phase);
+			frame.tri += triMinBlep.shift();
+			frame.tri = dcFilter.process(dcFilterStateTri, frame.tri);
 		}
 
 		if (frame.sinEnabled) {

From 81b0db8a37181b567234141d21b5ae398d3250c7 Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Mon, 5 Jan 2026 03:23:18 -0500
Subject: [PATCH 11/18] VCO: When hard or soft syncing and the slope of tri/saw
 change, insert a minBLAMP.

---
 src/VCO.cpp | 64 ++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 54 insertions(+), 10 deletions(-)

diff --git a/src/VCO.cpp b/src/VCO.cpp
index fda2f17..d858967 100644
--- a/src/VCO.cpp
+++ b/src/VCO.cpp
@@ -146,6 +146,7 @@ inline void minBlepImpulse(int z, int o, float* output) {
 		x[i] *= blackmanHarris(i / (double) (n - 1));
 	}
 
+#if 1
 	// Reconstruct impulse response with minimum phase
 	fft(x, n);
 
@@ -178,6 +179,7 @@ inline void minBlepImpulse(int z, int o, float* output) {
 
 	// Transform to time domain
 	fft(x, n, true);
+#endif
 
 	// Integrate. First sample x[0] should be 0.
 	double total = 0.0;
@@ -226,6 +228,12 @@ struct MinBlep {
 			ramp[i] -= (float) i / (2*Z*O) * total;
 		}
 
+		// FILE* f = fopen("plugins/Fundamental/minblep.txt", "w");
+		// for (int i = 0; i < 2*Z*O; i++) {
+		// 	fprintf(f, "%.12f\n", ramp[i]);
+		// }
+		// fclose(f);
+
 		// Transpose samples by making z values contiguous for each o
 		for (int o = 0; o < O; o++) {
 			for (int z = 0; z < 2 * Z; z++) {
@@ -250,7 +258,7 @@ struct MinBlep {
 		insert(impulseReordered, subsample, magnitude, out, stride);
 	}
 
-	void insertRampDiscontinuity(float subsample, float magnitude, float* out, int stride = 1) const {
+	void insertSlopeDiscontinuity(float subsample, float magnitude, float* out, int stride = 1) const {
 		insert(rampReordered, subsample, magnitude, out, stride);
 	}
 
@@ -402,14 +410,14 @@ struct VCOProcessor {
 			}
 		};
 
-		auto insertRampDiscontinuity = [&](T mask, T subsample, T magnitude, MinBlepBuffer<16*2, T>& buffer) {
+		auto insertSlopeDiscontinuity = [&](T mask, T subsample, T magnitude, MinBlepBuffer<16*2, T>& buffer) {
 			int m = simd::movemask(mask);
 			if (!m)
 				return;
 			for (int i = 0; i < frame.channels; i++) {
 				if (m & (1 << i)) {
 					float* x = (float*) buffer.startData();
-					getMinBlep().insertRampDiscontinuity(subsample[i], magnitude[i], &x[i], 4);
+					getMinBlep().insertSlopeDiscontinuity(subsample[i], magnitude[i], &x[i], 4);
 				}
 			}
 		};
@@ -449,20 +457,23 @@ struct VCOProcessor {
 			}
 
 			if (frame.triEnabled) {
-				// Insert minBLAMP to tri when crossing 0.25, slope from +4 to -4
+				// Insert minBLAMP to tri when crossing 0.25
 				T triSubsample = getCrossing(0.25f, startPhase, endPhase, startSubsample, endSubsample);
 				T mask = channelMask & (startSubsample < triSubsample) & (triSubsample <= endSubsample);
-				insertRampDiscontinuity(mask, triSubsample, -8.f * deltaPhase, triMinBlep);
+				// Slope goes from +4 to -4, so slope jump is -8 * abs(deltaPhase)
+				insertSlopeDiscontinuity(mask, triSubsample, -8.f * deltaPhase * syncDirection, triMinBlep);
+
 				// Insert minBLAMP to tri when crossing 0.75, slope from -4 to +4
 				triSubsample = getCrossing(0.75f, startPhase, endPhase, startSubsample, endSubsample);
 				mask = channelMask & (startSubsample < triSubsample) & (triSubsample <= endSubsample);
-				insertRampDiscontinuity(mask, triSubsample, 8.f * deltaPhase, triMinBlep);
+				// Slope goes from -4 to +4, so slope jump is 8 * abs(deltaPhase)
+				insertSlopeDiscontinuity(mask, triSubsample, 8.f * deltaPhase * syncDirection, triMinBlep);
 			}
 		};
 
 		// Check if square value changed due to pulseWidth changing since last frame
 		if (frame.sqrEnabled) {
-			T sqrState = simd::ifelse(prevPhase < pulseWidth, 1.f, -1.f);
+			T sqrState = sqr(prevPhase, pulseWidth);
 			T magnitude = sqrState - lastSqrState;
 			T changed = (magnitude != 0.f);
 			insertDiscontinuity(changed, 1e-6f, magnitude, sqrMinBlep);
@@ -499,13 +510,31 @@ struct VCOProcessor {
 
 				if (frame.soft) {
 					// Soft sync: Reverse direction, continue from syncPhase
+
+					if (frame.sawEnabled) {
+						// Saw slope reverses
+						// +2 slope becomes -2 if deltaPhase > 0
+						// -2 slope becomes +2 if deltaPhase < 0
+						insertSlopeDiscontinuity(syncOccurred, syncSubsample, -4.f * deltaPhase, sawMinBlep);
+					}
+					if (frame.triEnabled) {
+						// Tri slope reverses
+						// -4 slope becomes +4 if 0.25 < phase < 0.75
+						// -4 slope becomes +4 otherwise
+						T descending = (0.25f < syncPhase) & (syncPhase < 0.75f);
+						T slopeJump = simd::ifelse(descending, 8.f, -8.f);
+						insertSlopeDiscontinuity(syncOccurred, syncSubsample, slopeJump * deltaPhase, triMinBlep);
+					}
+
 					syncDirection = simd::ifelse(syncOccurred, -syncDirection, syncDirection);
-					T endPhase = syncPhase + (-deltaPhase) * (1.f - syncSubsample);
+					deltaPhase = simd::ifelse(syncOccurred, -deltaPhase, deltaPhase);
+					T endPhase = syncPhase + deltaPhase * (1.f - syncSubsample);
 					processCrossings(syncPhase, endPhase, syncSubsample, 1.f, syncOccurred);
 					phase = simd::ifelse(syncOccurred, endPhase, phase);
 				}
 				else {
 					// Hard sync: Reset phase from syncPhase to 0 at syncSubsample, insert discontinuities
+
 					if (frame.sqrEnabled) {
 						// Check if square jumps from -1 to +1
 						T sqrJump = (syncPhase >= pulseWidth);
@@ -516,11 +545,18 @@ struct VCOProcessor {
 						insertDiscontinuity(syncOccurred, syncSubsample, -saw(syncPhase), sawMinBlep);
 					}
 					if (frame.triEnabled) {
+						// Tri jumps from tri(syncPhase) to tri(0) = 0
 						insertDiscontinuity(syncOccurred, syncSubsample, -tri(syncPhase), triMinBlep);
+						// If descending slope (-4), reset to ascending slope (+4)
+						T wasDescending = (0.25f < syncPhase) & (syncPhase < 0.75f);
+						insertSlopeDiscontinuity(syncOccurred & wasDescending, syncSubsample, 8.f * deltaPhase, triMinBlep);
 					}
 					if (frame.sinEnabled) {
 						// sin jumps from sin(syncPhase) to sin(0) = 0
 						insertDiscontinuity(syncOccurred, syncSubsample, -sin(syncPhase), sinMinBlep);
+						// Slope changes from sinDerivative(syncPhase) to sinDerivative(0) = 2pi
+						// T slopeJump = T(2 * M_PI) - sinDerivative(syncPhase);
+						// insertSlopeDiscontinuity(syncOccurred, syncSubsample, slopeJump * deltaPhase, sinMinBlep);
 					}
 
 					// Process crossings after sync (starting from phase 0)
@@ -542,7 +578,7 @@ struct VCOProcessor {
 		}
 
 		if (frame.sqrEnabled) {
-			frame.sqr = simd::ifelse(phase < pulseWidth, 1.f, -1.f);
+			frame.sqr = sqr(phase, pulseWidth);
 			lastSqrState = frame.sqr;
 			frame.sqr += sqrMinBlep.shift();
 			frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr);
@@ -565,17 +601,25 @@ struct VCOProcessor {
 		return sin(phase);
 	}
 
+	static T sqr(T phase, T pulseWidth) {
+		return simd::ifelse(phase < pulseWidth, 1.f, -1.f);
+	}
 	static T saw(T phase) {
 		T x = phase + 0.5f;
 		x -= simd::trunc(x);
 		return 2 * x - 1;
 	}
 	static T tri(T phase) {
-		return 1 - 4 * simd::fmin(simd::fabs(phase - 0.25f), simd::fabs(phase - 1.25f));
+		return 1 - 4 * simd::fmin(simd::fabs(phase - 0.25f), 1.25f - phase);
 	}
 	static T sin(T phase) {
 		return sin_pi_9(1 - 2 * phase);
 	}
+	static T sinDerivative(T phase) {
+		phase += 0.25f;
+		phase -= simd::floor(phase);
+		return T(2 * M_PI) * sin(phase);
+	}
 };
 
 

From bc4156d2183712e98ae6c6b8067be5ed517dd790 Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Mon, 5 Jan 2026 21:58:19 -0500
Subject: [PATCH 12/18] VCF: Use TPT/ZDF solver with first-order AADA.

---
 src/VCF.cpp | 250 +++++++++++++++++++++++++++++++++-------------------
 1 file changed, 160 insertions(+), 90 deletions(-)

diff --git a/src/VCF.cpp b/src/VCF.cpp
index 022a77d..5db0615 100644
--- a/src/VCF.cpp
+++ b/src/VCF.cpp
@@ -4,66 +4,164 @@
 using simd::float_4;
 
 
+/** Approximates tan(x) for x in [0, π*0.49].
+Max error: 1.8e-7
+*/
 template <typename T>
-static T clip(T x) {
-	// return std::tanh(x);
-	// Pade approximant of tanh
-	x = simd::clamp(x, -3.f, 3.f);
-	return x * (27 + x * x) / (27 + 9 * x * x);
+inline T tan_5_4(T x) {
+	T x2 = x * x;
+	return x * (T(1) + x2 * (T(-0.1122835153) + T(0.00114264086) * x2))
+		/ (T(1) + x2 * (T(-0.4456155807) + T(0.01634547741) * x2));
 }
 
 
+/** Approximates tanh(x) for x in [-4, 4].
+Max error: 1.1e-5
+*/
+template <typename T>
+inline T tanh_5_6(T x) {
+	x = simd::clamp(x, T(-4), T(4));
+	T x2 = x * x;
+	return x * (T(1450.73) + x2 * (T(152.494) + T(1.12357) * x2))
+		/ (T(1450.80) + x2 * (T(635.839) + x2 * (T(19.8913) + T(0.00188) * x2)));
+}
+
+
+/** Approximates ln(cosh(x)) for x >= 0.
+Max error: 6.2e-5 over [0, 10].
+*/
+template <typename T>
+inline T lncosh_5_3(T x) {
+	T x2 = x * x;
+	return x2 * T(0.5) * (T(1) + x * (T(0.5825995649809) + x * (T(0.29698351914621) + x * T(-0.0001879624507617))))
+		/ (T(1) + x * (T(0.5938203513329) + x * (T(0.4266725633273) + x * T(0.1456088792904))));
+}
+
+
+/** First antiderivative of tanh: F(x) = ln(cosh(x)).
+For numerical stability, uses asymptotic form for |x| > 10.
+*/
+template <typename T>
+inline T tanh_F1(T x) {
+	x = simd::abs(x);
+	// For |x| > 10, ln(cosh(x)) ≈ |x| - ln(2)
+	T asymptotic = x - T(0.6931471805599453);
+	T exact = lncosh_5_3(x);
+	return simd::ifelse(x < T(10), exact, asymptotic);
+}
+
+
+/** First-order ADAA (Antiderivative Anti-Aliasing) processor for tanh.
+Computes (F(x[n]) - F(x[n-1])) / (x[n] - x[n-1]) where F is the antiderivative.
+Falls back to tanh(midpoint) when x[n] ≈ x[n-1] (L'Hôpital's rule).
+*/
+template <typename T>
+struct TanhADAA1 {
+	T xPrev = T(0);
+
+	void reset() {
+		xPrev = T(0);
+	}
+
+	T process(T x) {
+		const T eps = T(1e-5);
+		T diff = x - xPrev;
+
+		// Normal case: finite difference of antiderivative
+		T Fx = tanh_F1(x);
+		T FxPrev = tanh_F1(xPrev);
+		T adaaResult = (Fx - FxPrev) / diff;
+
+		// Fallback: use tanh at midpoint when diff is small
+		T midpoint = (x + xPrev) * T(0.5);
+		T fallbackResult = tanh_5_6(midpoint);
+
+		T y = simd::ifelse(simd::abs(diff) > eps, adaaResult, fallbackResult);
+
+		xPrev = x;
+		return y;
+	}
+};
+
+
+/** 4-pole (24dB/oct) transistor ladder filter using TPT/ZDF (Zero-Delay Feedback).
+Uses first-order ADAA on tanh saturation.
+*/
 template <typename T>
 struct LadderFilter {
-	T omega0;
-	T resonance = 1;
 	T state[4];
-	T input;
+	TanhADAA1<T> adaa[5];
+
+	struct Frame {
+		T input;
+		/** Normalized cutoff frequency: 0 <= fc/fs < 0.5. */
+		T cutoff;
+		/** Resonance: 0-1, self-oscillation begins around 0.7. */
+		T resonance;
+
+		/** 4-pole (24dB/oct) lowpass output. */
+		T lowpass4;
+		/** 4-pole (24dB/oct) highpass output. */
+		T highpass4;
+	};
 
 	LadderFilter() {
 		reset();
-		setCutoff(0);
 	}
 
 	void reset() {
 		for (int i = 0; i < 4; i++) {
-			state[i] = 0;
+			state[i] = T(0);
+		}
+		for (int i = 0; i < 5; i++) {
+			adaa[i].reset();
 		}
 	}
 
-	void setCutoff(T cutoff) {
-		omega0 = 2 * T(M_PI) * cutoff;
-	}
-
-	void process(T input, T dt) {
-		dsp::stepRK4(T(0), dt, state, 4, [&](T t, const T x[], T dxdt[]) {
-			T inputt = crossfade(this->input, input, t / dt);
-			T inputc = clip(inputt - resonance * x[3]);
-			T yc0 = clip(x[0]);
-			T yc1 = clip(x[1]);
-			T yc2 = clip(x[2]);
-			T yc3 = clip(x[3]);
-
-			dxdt[0] = omega0 * (inputc - yc0);
-			dxdt[1] = omega0 * (yc0 - yc1);
-			dxdt[2] = omega0 * (yc1 - yc2);
-			dxdt[3] = omega0 * (yc2 - yc3);
-		});
-
-		this->input = input;
-	}
-
-	T lowpass() {
-		return state[3];
-	}
-	T highpass() {
-		return clip((input - resonance * state[3]) - 4 * state[0] + 6 * state[1] - 4 * state[2] + state[3]);
+	void process(Frame& frame) {
+		// Pre-warp cutoff frequency using bilinear transform
+		T g = tan_5_4(T(M_PI) * frame.cutoff);
+		// Compute the one-pole lowpass gain coefficient G = g/(1+g)
+		T G = g / (T(1) + g);
+
+		// Global feedback path
+		// Apply resonance scaling and soft-clip with tanh (with ADAA)
+		T feedback = adaa[4].process(frame.resonance * state[3]);
+		T u = frame.input - feedback;
+
+		// Stage 0
+		T sat0 = adaa[0].process(u);
+		T v0 = G * (sat0 - state[0]);
+		T y0 = v0 + state[0];
+		state[0] = y0 + v0;
+
+		// Stage 1
+		T sat1 = adaa[1].process(y0);
+		T v1 = G * (sat1 - state[1]);
+		T y1 = v1 + state[1];
+		state[1] = y1 + v1;
+
+		// Stage 2
+		T sat2 = adaa[2].process(y1);
+		T v2 = G * (sat2 - state[2]);
+		T y2 = v2 + state[2];
+		state[2] = y2 + v2;
+
+		// Stage 3
+		T sat3 = adaa[3].process(y2);
+		T v3 = G * (sat3 - state[3]);
+		T y3 = v3 + state[3];
+		state[3] = y3 + v3;
+
+		frame.lowpass4 = y3;
+
+		// Binomial expansion (1-H)^4 = 1 - 4H + 6H^2 - 4H^3 + H^4
+		// Using saturated input and stage outputs
+		frame.highpass4 = sat0 - T(4) * y0 + T(6) * y1 - T(4) * y2 + y3;
 	}
 };
 
 
-static const int UPSAMPLE = 2;
-
 struct VCF : Module {
 	enum ParamIds {
 		FREQ_PARAM,
@@ -90,9 +188,6 @@ struct VCF : Module {
 	};
 
 	LadderFilter<float_4> filters[4];
-	// Upsampler<UPSAMPLE, 8> inputUpsampler;
-	// Decimator<UPSAMPLE, 8> lowpassDecimator;
-	// Decimator<UPSAMPLE, 8> highpassDecimator;
 
 	VCF() {
 		config(NUM_PARAMS, NUM_INPUTS, NUM_OUTPUTS);
@@ -100,9 +195,9 @@ struct VCF : Module {
 		// freq = C4 * 2^(10 * param - 5)
 		// or
 		// param = (log2(freq / C4) + 5) / 10
-		const float minFreq = (std::log2(dsp::FREQ_C4 / 8000.f) + 5) / 10;
-		const float maxFreq = (std::log2(8000.f / dsp::FREQ_C4) + 5) / 10;
-		const float defaultFreq = (0.f + 5) / 10;
+		const float minFreq = (std::log2(8.f / dsp::FREQ_C4) + 5) / 10;
+		const float maxFreq = (std::log2(20000.f / dsp::FREQ_C4) + 5) / 10;
+		const float defaultFreq = (minFreq + maxFreq) / 2.f;
 		configParam(FREQ_PARAM, minFreq, maxFreq, defaultFreq, "Cutoff frequency", " Hz", std::pow(2, 10.f), dsp::FREQ_C4 / std::pow(2, 5.f));
 		configParam(RES_PARAM, 0.f, 1.f, 0.f, "Resonance", "%", 0.f, 100.f);
 		configParam(RES_CV_PARAM, -1.f, 1.f, 0.f, "Resonance CV", "%", 0.f, 100.f);
@@ -127,8 +222,9 @@ struct VCF : Module {
 	}
 
 	void onReset() override {
-		for (int i = 0; i < 4; i++)
+		for (int i = 0; i < 4; i++) {
 			filters[i].reset();
+		}
 	}
 
 	void process(const ProcessArgs& args) override {
@@ -148,63 +244,37 @@ struct VCF : Module {
 		int channels = std::max(1, inputs[IN_INPUT].getChannels());
 
 		for (int c = 0; c < channels; c += 4) {
-			auto& filter = filters[c / 4];
+			LadderFilter<float_4>::Frame frame;
 
+			// Input
 			float_4 input = inputs[IN_INPUT].getVoltageSimd<float_4>(c) / 5.f;
 
-			// Drive gain
+			// Drive
 			float_4 drive = driveParam + inputs[DRIVE_INPUT].getPolyVoltageSimd<float_4>(c) / 10.f * driveCvParam;
-			drive = clamp(drive, -1.f, 1.f);
+			drive = simd::clamp(drive, -1.f, 1.f);
 			float_4 gain = simd::pow(1.f + drive, 5);
 			input *= gain;
 
 			// Add -120dB noise to bootstrap self-oscillation
 			input += 1e-6f * (2.f * random::uniform() - 1.f);
+			frame.input = input;
 
-			// Set resonance
+			// Resonance
 			float_4 resonance = resParam + inputs[RES_INPUT].getPolyVoltageSimd<float_4>(c) / 10.f * resCvParam;
-			resonance = clamp(resonance, 0.f, 1.f);
-			filter.resonance = simd::pow(resonance, 2) * 10.f;
+			resonance = simd::clamp(resonance, 0.f, 1.f);
+			frame.resonance = resonance * resonance * 10.f;
 
-			// Get pitch
+			// Cutoff frequency
 			float_4 pitch = freqParam + inputs[FREQ_INPUT].getPolyVoltageSimd<float_4>(c) * freqCvParam;
-			// Set cutoff
 			float_4 cutoff = dsp::FREQ_C4 * dsp::exp2_taylor5(pitch);
-			// Without oversampling, we must limit to 8000 Hz or so @ 44100 Hz
-			cutoff = clamp(cutoff, 1.f, args.sampleRate * 0.18f);
-			filter.setCutoff(cutoff);
-
-			// Upsample input
-			// float dt = args.sampleTime / UPSAMPLE;
-			// float inputBuf[UPSAMPLE];
-			// float lowpassBuf[UPSAMPLE];
-			// float highpassBuf[UPSAMPLE];
-			// inputUpsampler.process(input, inputBuf);
-			// for (int i = 0; i < UPSAMPLE; i++) {
-			// 	// Step the filter
-			// 	filter.process(inputBuf[i], dt);
-			// 	if (outputs[LPF_OUTPUT].isConnected())
-			// 		lowpassBuf[i] = filter.lowpass();
-			// 	if (outputs[HPF_OUTPUT].isConnected())
-			// 		highpassBuf[i] = filter.highpass();
-			// }
-
-			// // Set outputs
-			// if (outputs[LPF_OUTPUT].isConnected()) {
-			// 	outputs[LPF_OUTPUT].setVoltage(5.f * lowpassDecimator.process(lowpassBuf));
-			// }
-			// if (outputs[HPF_OUTPUT].isConnected()) {
-			// 	outputs[HPF_OUTPUT].setVoltage(5.f * highpassDecimator.process(highpassBuf));
-			// }
-
-			// Set outputs
-			filter.process(input, args.sampleTime);
-			if (outputs[LPF_OUTPUT].isConnected()) {
-				outputs[LPF_OUTPUT].setVoltageSimd(5.f * filter.lowpass(), c);
-			}
-			if (outputs[HPF_OUTPUT].isConnected()) {
-				outputs[HPF_OUTPUT].setVoltageSimd(5.f * filter.highpass(), c);
-			}
+			frame.cutoff = simd::clamp(cutoff * args.sampleTime, 0.f, 0.49f);
+
+			// Process
+			filters[c / 4].process(frame);
+
+			// Outputs
+			outputs[LPF_OUTPUT].setVoltageSimd(frame.lowpass4 * 5.f, c);
+			outputs[HPF_OUTPUT].setVoltageSimd(frame.highpass4 * 5.f, c);
 		}
 
 		outputs[LPF_OUTPUT].setChannels(channels);
@@ -212,7 +282,7 @@ struct VCF : Module {
 	}
 
 	void paramsFromJson(json_t* rootJ) override {
-		// These attenuators didn't exist in version <2.0, so set to 1 in case they are not overwritten.
+		// These attenuators didn't exist in version <2, so set to 1 in case they are not overwritten.
 		params[RES_CV_PARAM].setValue(1.f);
 		params[DRIVE_CV_PARAM].setValue(1.f);
 

From ba328ae9cc2316d7716419e5217c619afebf58c2 Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Tue, 6 Jan 2026 01:13:08 -0500
Subject: [PATCH 13/18] VCF: Optimize function approximations. Increase maximum
 frequency to 22,000 Hz.

---
 src/VCF.cpp | 67 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 36 insertions(+), 31 deletions(-)

diff --git a/src/VCF.cpp b/src/VCF.cpp
index 5db0615..932a282 100644
--- a/src/VCF.cpp
+++ b/src/VCF.cpp
@@ -1,53 +1,56 @@
 #include "plugin.hpp"
 
 
-using simd::float_4;
-
-
-/** Approximates tan(x) for x in [0, π*0.49].
-Max error: 1.8e-7
+/** Approximates tan(pi*x) for x in [0, 0.5).
+Optimized coefficients for max rel error: 1.18e-05.
 */
 template <typename T>
-inline T tan_5_4(T x) {
+inline T tan_pi_1_2(T x) {
 	T x2 = x * x;
-	return x * (T(1) + x2 * (T(-0.1122835153) + T(0.00114264086) * x2))
-		/ (T(1) + x2 * (T(-0.4456155807) + T(0.01634547741) * x2));
+	T num = T(1) + T(-0.9622845476351338) * x2;
+	T den = T(1) + x2 * (T(-4.252711329946427) + T(1.0108965067534537) * x2);
+	return T(M_PI) * x * num / den;
 }
 
 
 /** Approximates tanh(x) for x in [-4, 4].
-Max error: 1.1e-5
+Optimized coefficients for max rel error: 5.56e-07.
 */
 template <typename T>
-inline T tanh_5_6(T x) {
+inline T tanh_2_3(T x) {
 	x = simd::clamp(x, T(-4), T(4));
 	T x2 = x * x;
-	return x * (T(1450.73) + x2 * (T(152.494) + T(1.12357) * x2))
-		/ (T(1450.80) + x2 * (T(635.839) + x2 * (T(19.8913) + T(0.00188) * x2)));
+	T num = T(1) + x2 * (T(0.11823399425250458) + x2 * T(0.0017593473306865004));
+	T den = T(1) + x2 * (T(0.4515649138214517) + x2 * (T(0.01895237471013944) + x2 * T(7.340776670083926e-05)));
+	return x * num / den;
 }
 
 
-/** Approximates ln(cosh(x)) for x >= 0.
-Max error: 6.2e-5 over [0, 10].
+/** Approximates ln(cosh(x)) for all x.
+Optimized coefficients for max deriv rel error: 2.18e-04.
+Max func abs error: 2.33e-04.
 */
 template <typename T>
-inline T lncosh_5_3(T x) {
+inline T ln_cosh_3_3(T x) {
+	x = simd::abs(x);
 	T x2 = x * x;
-	return x2 * T(0.5) * (T(1) + x * (T(0.5825995649809) + x * (T(0.29698351914621) + x * T(-0.0001879624507617))))
-		/ (T(1) + x * (T(0.5938203513329) + x * (T(0.4266725633273) + x * T(0.1456088792904))));
+	T num = T(1) + x * (T(0.5536821172234367) + x * (T(0.25181887093756017) + x * T(-0.00022483065845568806)));
+	T den = T(1) + x * (T(0.5571167780242312) + x * (T(0.4011383699340152) + x * T(0.12250932588907415)));
+	return x2 * T(0.5) * num / den;
 }
 
 
-/** First antiderivative of tanh: F(x) = ln(cosh(x)).
-For numerical stability, uses asymptotic form for |x| > 10.
+/** Approximates ln(cosh(x)) for all x.
+Optimized coefficients for max deriv rel error: 1.73e-05.
+Max func abs error: 1.59e-05.
 */
 template <typename T>
-inline T tanh_F1(T x) {
+inline T ln_cosh_3_4(T x) {
 	x = simd::abs(x);
-	// For |x| > 10, ln(cosh(x)) ≈ |x| - ln(2)
-	T asymptotic = x - T(0.6931471805599453);
-	T exact = lncosh_5_3(x);
-	return simd::ifelse(x < T(10), exact, asymptotic);
+	T x2 = x * x;
+	T num = T(1) + x * (T(0.6818733052030992) + x * (T(0.3310714761981197) + x * T(0.07328250587472884)));
+	T den = T(1) + x * (T(0.6818667996836973) + x * (T(0.4970931090131887) + x * (T(0.18905704720922234) + x * T(0.0366985697841935))));
+	return x2 * T(0.5) * num / den;
 }
 
 
@@ -68,13 +71,13 @@ struct TanhADAA1 {
 		T diff = x - xPrev;
 
 		// Normal case: finite difference of antiderivative
-		T Fx = tanh_F1(x);
-		T FxPrev = tanh_F1(xPrev);
+		T Fx = ln_cosh_3_4(x);
+		T FxPrev = ln_cosh_3_4(xPrev);
 		T adaaResult = (Fx - FxPrev) / diff;
 
 		// Fallback: use tanh at midpoint when diff is small
 		T midpoint = (x + xPrev) * T(0.5);
-		T fallbackResult = tanh_5_6(midpoint);
+		T fallbackResult = tanh_2_3(midpoint);
 
 		T y = simd::ifelse(simd::abs(diff) > eps, adaaResult, fallbackResult);
 
@@ -120,7 +123,7 @@ struct LadderFilter {
 
 	void process(Frame& frame) {
 		// Pre-warp cutoff frequency using bilinear transform
-		T g = tan_5_4(T(M_PI) * frame.cutoff);
+		T g = tan_pi_1_2(frame.cutoff);
 		// Compute the one-pole lowpass gain coefficient G = g/(1+g)
 		T G = g / (T(1) + g);
 
@@ -156,12 +159,14 @@ struct LadderFilter {
 		frame.lowpass4 = y3;
 
 		// Binomial expansion (1-H)^4 = 1 - 4H + 6H^2 - 4H^3 + H^4
-		// Using saturated input and stage outputs
 		frame.highpass4 = sat0 - T(4) * y0 + T(6) * y1 - T(4) * y2 + y3;
 	}
 };
 
 
+using simd::float_4;
+
+
 struct VCF : Module {
 	enum ParamIds {
 		FREQ_PARAM,
@@ -196,7 +201,7 @@ struct VCF : Module {
 		// or
 		// param = (log2(freq / C4) + 5) / 10
 		const float minFreq = (std::log2(8.f / dsp::FREQ_C4) + 5) / 10;
-		const float maxFreq = (std::log2(20000.f / dsp::FREQ_C4) + 5) / 10;
+		const float maxFreq = (std::log2(22000.f / dsp::FREQ_C4) + 5) / 10;
 		const float defaultFreq = (minFreq + maxFreq) / 2.f;
 		configParam(FREQ_PARAM, minFreq, maxFreq, defaultFreq, "Cutoff frequency", " Hz", std::pow(2, 10.f), dsp::FREQ_C4 / std::pow(2, 5.f));
 		configParam(RES_PARAM, 0.f, 1.f, 0.f, "Resonance", "%", 0.f, 100.f);
@@ -267,7 +272,7 @@ struct VCF : Module {
 			// Cutoff frequency
 			float_4 pitch = freqParam + inputs[FREQ_INPUT].getPolyVoltageSimd<float_4>(c) * freqCvParam;
 			float_4 cutoff = dsp::FREQ_C4 * dsp::exp2_taylor5(pitch);
-			frame.cutoff = simd::clamp(cutoff * args.sampleTime, 0.f, 0.49f);
+			frame.cutoff = simd::clamp(cutoff * args.sampleTime, 0.f, 0.499f);
 
 			// Process
 			filters[c / 4].process(frame);

From 48c26c5198a842beb29006a03757972efc3428b5 Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Tue, 6 Jan 2026 03:21:36 -0500
Subject: [PATCH 14/18] VCF: Use faster ln(cosh(x)) approximation. Cache
 previous ADAA antiderivative value.

---
 src/VCF.cpp | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/src/VCF.cpp b/src/VCF.cpp
index 932a282..6dd0e19 100644
--- a/src/VCF.cpp
+++ b/src/VCF.cpp
@@ -1,8 +1,16 @@
 #include "plugin.hpp"
 
 
+/** Approximates 1/x, using the rcp() instruction and 1 Newton-Raphson refinement */
+template <typename T>
+inline T rcp_newton1(T x) {
+	T r = simd::rcp(x);
+	return r * (T(2) - x * r);
+}
+
+
 /** Approximates tan(pi*x) for x in [0, 0.5).
-Optimized coefficients for max rel error: 1.18e-05.
+Optimized coefficients for max relative error: 1.18e-05.
 */
 template <typename T>
 inline T tan_pi_1_2(T x) {
@@ -14,7 +22,7 @@ inline T tan_pi_1_2(T x) {
 
 
 /** Approximates tanh(x) for x in [-4, 4].
-Optimized coefficients for max rel error: 5.56e-07.
+Optimized coefficients for max relative error: 5.56e-07.
 */
 template <typename T>
 inline T tanh_2_3(T x) {
@@ -27,8 +35,8 @@ inline T tanh_2_3(T x) {
 
 
 /** Approximates ln(cosh(x)) for all x.
-Optimized coefficients for max deriv rel error: 2.18e-04.
-Max func abs error: 2.33e-04.
+Optimized coefficients for max derivative relative error: 2.18e-04.
+Max absolute error: 2.33e-04.
 */
 template <typename T>
 inline T ln_cosh_3_3(T x) {
@@ -41,8 +49,8 @@ inline T ln_cosh_3_3(T x) {
 
 
 /** Approximates ln(cosh(x)) for all x.
-Optimized coefficients for max deriv rel error: 1.73e-05.
-Max func abs error: 1.59e-05.
+Optimized coefficients for max derivative relative error: 1.73e-05.
+Max absolute error: 1.59e-05.
 */
 template <typename T>
 inline T ln_cosh_3_4(T x) {
@@ -54,34 +62,31 @@ inline T ln_cosh_3_4(T x) {
 }
 
 
-/** First-order ADAA (Antiderivative Anti-Aliasing) processor for tanh.
-Computes (F(x[n]) - F(x[n-1])) / (x[n] - x[n-1]) where F is the antiderivative.
-Falls back to tanh(midpoint) when x[n] ≈ x[n-1] (L'Hôpital's rule).
-*/
+/** First-order ADAA for tanh, caching F(x) between samples. */
 template <typename T>
 struct TanhADAA1 {
 	T xPrev = T(0);
+	T FxPrev = T(0);
 
 	void reset() {
 		xPrev = T(0);
+		FxPrev = T(0);
 	}
 
 	T process(T x) {
 		const T eps = T(1e-5);
 		T diff = x - xPrev;
 
-		// Normal case: finite difference of antiderivative
-		T Fx = ln_cosh_3_4(x);
-		T FxPrev = ln_cosh_3_4(xPrev);
+		T Fx = ln_cosh_3_3(x);
 		T adaaResult = (Fx - FxPrev) / diff;
 
-		// Fallback: use tanh at midpoint when diff is small
 		T midpoint = (x + xPrev) * T(0.5);
 		T fallbackResult = tanh_2_3(midpoint);
 
 		T y = simd::ifelse(simd::abs(diff) > eps, adaaResult, fallbackResult);
 
 		xPrev = x;
+		FxPrev = Fx;
 		return y;
 	}
 };
@@ -134,24 +139,28 @@ struct LadderFilter {
 
 		// Stage 0
 		T sat0 = adaa[0].process(u);
+		// T sat0 = tanh_2_3(u);
 		T v0 = G * (sat0 - state[0]);
 		T y0 = v0 + state[0];
 		state[0] = y0 + v0;
 
 		// Stage 1
 		T sat1 = adaa[1].process(y0);
+		// T sat1 = tanh_2_3(y0);
 		T v1 = G * (sat1 - state[1]);
 		T y1 = v1 + state[1];
 		state[1] = y1 + v1;
 
 		// Stage 2
 		T sat2 = adaa[2].process(y1);
+		// T sat2 = tanh_2_3(y1);
 		T v2 = G * (sat2 - state[2]);
 		T y2 = v2 + state[2];
 		state[2] = y2 + v2;
 
 		// Stage 3
 		T sat3 = adaa[3].process(y2);
+		// T sat3 = tanh_2_3(y2);
 		T v3 = G * (sat3 - state[3]);
 		T y3 = v3 + state[3];
 		state[3] = y3 + v3;

From cb6b1b9d814625276b7dde752d3ba6f34ad5149d Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Tue, 6 Jan 2026 03:55:31 -0500
Subject: [PATCH 15/18] VCO: Increase maximum frequency to around 21,000 Hz.

---
 src/VCO.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/VCO.cpp b/src/VCO.cpp
index d858967..51811fa 100644
--- a/src/VCO.cpp
+++ b/src/VCO.cpp
@@ -667,7 +667,7 @@ struct VCO : Module {
 		config(NUM_PARAMS, NUM_INPUTS, NUM_OUTPUTS, NUM_LIGHTS);
 		configSwitch(LINEAR_PARAM, 0.f, 1.f, 0.f, "FM mode", {"1V/octave", "Linear"});
 		configSwitch(SYNC_PARAM, 0.f, 1.f, 1.f, "Sync mode", {"Soft", "Hard"});
-		configParam(FREQ_PARAM, -75.f, 75.f, 0.f, "Frequency", " Hz", dsp::FREQ_SEMITONE, dsp::FREQ_C4);
+		configParam(FREQ_PARAM, -76.f, 76.f, 0.f, "Frequency", " Hz", dsp::FREQ_SEMITONE, dsp::FREQ_C4);
 		configParam(FM_PARAM, -1.f, 1.f, 0.f, "Frequency modulation", "%", 0.f, 100.f);
 		getParamQuantity(FM_PARAM)->randomizeEnabled = false;
 		configParam(PW_PARAM, 0.01f, 0.99f, 0.5f, "Pulse width", "%", 0.f, 100.f);

From 40ae8916387609ac9a563e02f039177fac65ea15 Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Tue, 6 Jan 2026 04:51:19 -0500
Subject: [PATCH 16/18] VCF: Clean up comments.

---
 src/VCF.cpp | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/src/VCF.cpp b/src/VCF.cpp
index 6dd0e19..daafb61 100644
--- a/src/VCF.cpp
+++ b/src/VCF.cpp
@@ -62,7 +62,7 @@ inline T ln_cosh_3_4(T x) {
 }
 
 
-/** First-order ADAA for tanh, caching F(x) between samples. */
+/** 1st-order antiderivative anti-aliasing (ADAA) for tanh. */
 template <typename T>
 struct TanhADAA1 {
 	T xPrev = T(0);
@@ -92,8 +92,7 @@ struct TanhADAA1 {
 };
 
 
-/** 4-pole (24dB/oct) transistor ladder filter using TPT/ZDF (Zero-Delay Feedback).
-Uses first-order ADAA on tanh saturation.
+/** 4-pole transistor ladder filter using TPT/ZDF (Zero-Delay Feedback).
 */
 template <typename T>
 struct LadderFilter {
@@ -102,14 +101,13 @@ struct LadderFilter {
 
 	struct Frame {
 		T input;
-		/** Normalized cutoff frequency: 0 <= fc/fs < 0.5. */
+		/** Normalized cutoff frequency fc/fs in [0, 0.5) */
 		T cutoff;
-		/** Resonance: 0-1, self-oscillation begins around 0.7. */
 		T resonance;
 
-		/** 4-pole (24dB/oct) lowpass output. */
+		/** 4-pole lowpass output. */
 		T lowpass4;
-		/** 4-pole (24dB/oct) highpass output. */
+		/** 4-pole highpass output. */
 		T highpass4;
 	};
 
@@ -127,13 +125,13 @@ struct LadderFilter {
 	}
 
 	void process(Frame& frame) {
-		// Pre-warp cutoff frequency using bilinear transform
+		// Pre-warped frequency
 		T g = tan_pi_1_2(frame.cutoff);
-		// Compute the one-pole lowpass gain coefficient G = g/(1+g)
+		// Integrator gain
 		T G = g / (T(1) + g);
 
-		// Global feedback path
-		// Apply resonance scaling and soft-clip with tanh (with ADAA)
+		// Feedback path
+		// Apply resonance scaling and soft-clip with tanh
 		T feedback = adaa[4].process(frame.resonance * state[3]);
 		T u = frame.input - feedback;
 
@@ -166,8 +164,6 @@ struct LadderFilter {
 		state[3] = y3 + v3;
 
 		frame.lowpass4 = y3;
-
-		// Binomial expansion (1-H)^4 = 1 - 4H + 6H^2 - 4H^3 + H^4
 		frame.highpass4 = sat0 - T(4) * y0 + T(6) * y1 - T(4) * y2 + y3;
 	}
 };
@@ -276,7 +272,7 @@ struct VCF : Module {
 			// Resonance
 			float_4 resonance = resParam + inputs[RES_INPUT].getPolyVoltageSimd<float_4>(c) / 10.f * resCvParam;
 			resonance = simd::clamp(resonance, 0.f, 1.f);
-			frame.resonance = resonance * resonance * 10.f;
+			frame.resonance = simd::pow(resonance, 2) * 10.f;
 
 			// Cutoff frequency
 			float_4 pitch = freqParam + inputs[FREQ_INPUT].getPolyVoltageSimd<float_4>(c) * freqCvParam;

From 5fe39fa5be340c1eed278d15246c82d0579e4c8f Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Tue, 6 Jan 2026 15:17:41 -0500
Subject: [PATCH 17/18] VCF: Add citations for TPT and ADAA.

---
 src/VCF.cpp | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/src/VCF.cpp b/src/VCF.cpp
index daafb61..2c343f5 100644
--- a/src/VCF.cpp
+++ b/src/VCF.cpp
@@ -9,15 +9,15 @@ inline T rcp_newton1(T x) {
 }
 
 
-/** Approximates tan(pi*x) for x in [0, 0.5).
-Optimized coefficients for max relative error: 1.18e-05.
+/** Approximates tan(x) for x in [0, pi*0.5).
+Optimized coefficients for max relative error: 2.78e-05.
 */
 template <typename T>
-inline T tan_pi_1_2(T x) {
+inline T tan_1_2(T x) {
 	T x2 = x * x;
-	T num = T(1) + T(-0.9622845476351338) * x2;
-	T den = T(1) + x2 * (T(-4.252711329946427) + T(1.0108965067534537) * x2);
-	return T(M_PI) * x * num / den;
+	T num = T(1) + x2 * T(-0.09776575533683811);
+	T den = T(1) + x2 * (T(-0.43119539396382) + x2 * T(0.0105011966117302));
+	return x * num / den;
 }
 
 
@@ -62,7 +62,7 @@ inline T ln_cosh_3_4(T x) {
 }
 
 
-/** 1st-order antiderivative anti-aliasing (ADAA) for tanh. */
+/** 1st-order ADAA for tanh. */
 template <typename T>
 struct TanhADAA1 {
 	T xPrev = T(0);
@@ -92,7 +92,19 @@ struct TanhADAA1 {
 };
 
 
-/** 4-pole transistor ladder filter using TPT/ZDF (Zero-Delay Feedback).
+/** 4-pole transistor ladder filter using TPT (Topology-Preserving Transform).
+Feedback uses previous sample (i.e. 1 sample delayed) to avoid iterative solving.
+
+For the equivalent state / trapezoidal integrator approach:
+Zavalishin, V. "The Art of VA Filter Design". 2018
+https://www.native-instruments.com/fileadmin/ni_media/downloads/pdf/VAFilterDesign_2.1.2.pdf
+
+For Cytomic's formulation:
+https://cytomic.com/files/dsp/SvfLinearTrapOptimised2.pdf
+
+For antiderivative anti-aliasing (ADAA):
+Parker, J., Zavalishin, V., Le Bihan, E. "Reducing the Aliasing of Nonlinear Waveshaping Using Continuous-Time Convolution". DAFx 2016
+https://dafx.de/paper-archive/2016/dafxpapers/20-DAFx-16_paper_41-PN.pdf
 */
 template <typename T>
 struct LadderFilter {
@@ -126,13 +138,14 @@ struct LadderFilter {
 
 	void process(Frame& frame) {
 		// Pre-warped frequency
-		T g = tan_pi_1_2(frame.cutoff);
+		T g = tan_1_2(T(M_PI) * frame.cutoff);
 		// Integrator gain
 		T G = g / (T(1) + g);
 
 		// Feedback path
 		// Apply resonance scaling and soft-clip with tanh
 		T feedback = adaa[4].process(frame.resonance * state[3]);
+		// T feedback = tanh_2_3(frame.resonance * state[3]);
 		T u = frame.input - feedback;
 
 		// Stage 0

From cca3480c14d89758a0e3a0e44387698761130a62 Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Tue, 6 Jan 2026 17:03:20 -0500
Subject: [PATCH 18/18] VCF: Use x/sqrt(x^2+1) soft clipping instead of
 tanh(x).

---
 src/VCF.cpp | 81 ++++++++++++++++-------------------------------------
 1 file changed, 24 insertions(+), 57 deletions(-)

diff --git a/src/VCF.cpp b/src/VCF.cpp
index 2c343f5..8f8f399 100644
--- a/src/VCF.cpp
+++ b/src/VCF.cpp
@@ -9,6 +9,14 @@ inline T rcp_newton1(T x) {
 }
 
 
+/** Approximates 1/sqrt(x), using the rsqrt() instruction and 1 Newton-Raphson refinement */
+template <typename T>
+inline T rsqrt_newton1(T x) {
+	T y = simd::rsqrt(x);
+	return y * (T(3) - x * y * y) * T(0.5);
+}
+
+
 /** Approximates tan(x) for x in [0, pi*0.5).
 Optimized coefficients for max relative error: 2.78e-05.
 */
@@ -21,72 +29,37 @@ inline T tan_1_2(T x) {
 }
 
 
-/** Approximates tanh(x) for x in [-4, 4].
-Optimized coefficients for max relative error: 5.56e-07.
-*/
-template <typename T>
-inline T tanh_2_3(T x) {
-	x = simd::clamp(x, T(-4), T(4));
-	T x2 = x * x;
-	T num = T(1) + x2 * (T(0.11823399425250458) + x2 * T(0.0017593473306865004));
-	T den = T(1) + x2 * (T(0.4515649138214517) + x2 * (T(0.01895237471013944) + x2 * T(7.340776670083926e-05)));
-	return x * num / den;
-}
-
-
-/** Approximates ln(cosh(x)) for all x.
-Optimized coefficients for max derivative relative error: 2.18e-04.
-Max absolute error: 2.33e-04.
-*/
-template <typename T>
-inline T ln_cosh_3_3(T x) {
-	x = simd::abs(x);
-	T x2 = x * x;
-	T num = T(1) + x * (T(0.5536821172234367) + x * (T(0.25181887093756017) + x * T(-0.00022483065845568806)));
-	T den = T(1) + x * (T(0.5571167780242312) + x * (T(0.4011383699340152) + x * T(0.12250932588907415)));
-	return x2 * T(0.5) * num / den;
-}
-
-
-/** Approximates ln(cosh(x)) for all x.
-Optimized coefficients for max derivative relative error: 1.73e-05.
-Max absolute error: 1.59e-05.
-*/
-template <typename T>
-inline T ln_cosh_3_4(T x) {
-	x = simd::abs(x);
-	T x2 = x * x;
-	T num = T(1) + x * (T(0.6818733052030992) + x * (T(0.3310714761981197) + x * T(0.07328250587472884)));
-	T den = T(1) + x * (T(0.6818667996836973) + x * (T(0.4970931090131887) + x * (T(0.18905704720922234) + x * T(0.0366985697841935))));
-	return x2 * T(0.5) * num / den;
-}
-
-
-/** 1st-order ADAA for tanh. */
+/** 1st-order ADAA for softclip: f(x) = x/sqrt(x²+1), F(x) = sqrt(x²+1). */
 template <typename T>
-struct TanhADAA1 {
+struct SoftclipADAA1 {
 	T xPrev = T(0);
-	T FxPrev = T(0);
+	T fPrev = T(0); // f(0) = 0
+	T FPrev = T(1); // F(0) = sqrt(0+1) = 1
 
 	void reset() {
 		xPrev = T(0);
-		FxPrev = T(0);
+		fPrev = T(0);
+		FPrev = T(1);
 	}
 
 	T process(T x) {
 		const T eps = T(1e-5);
 		T diff = x - xPrev;
 
-		T Fx = ln_cosh_3_3(x);
-		T adaaResult = (Fx - FxPrev) / diff;
+		// f = x/sqrt(x^2+1), F = sqrt(x^2+1)
+		T x2p1 = x * x + T(1);
+		T r = rsqrt_newton1(x2p1);
+		T f = x * r;
+		T F = x2p1 * r;
 
-		T midpoint = (x + xPrev) * T(0.5);
-		T fallbackResult = tanh_2_3(midpoint);
+		T adaaResult = (F - FPrev) * rcp_newton1(diff);
+		T fallbackResult = (f + fPrev) * T(0.5);
 
 		T y = simd::ifelse(simd::abs(diff) > eps, adaaResult, fallbackResult);
 
 		xPrev = x;
-		FxPrev = Fx;
+		fPrev = f;
+		FPrev = F;
 		return y;
 	}
 };
@@ -109,7 +82,7 @@ https://dafx.de/paper-archive/2016/dafxpapers/20-DAFx-16_paper_41-PN.pdf
 template <typename T>
 struct LadderFilter {
 	T state[4];
-	TanhADAA1<T> adaa[5];
+	SoftclipADAA1<T> adaa[5];
 
 	struct Frame {
 		T input;
@@ -143,35 +116,29 @@ struct LadderFilter {
 		T G = g / (T(1) + g);
 
 		// Feedback path
-		// Apply resonance scaling and soft-clip with tanh
 		T feedback = adaa[4].process(frame.resonance * state[3]);
-		// T feedback = tanh_2_3(frame.resonance * state[3]);
 		T u = frame.input - feedback;
 
 		// Stage 0
 		T sat0 = adaa[0].process(u);
-		// T sat0 = tanh_2_3(u);
 		T v0 = G * (sat0 - state[0]);
 		T y0 = v0 + state[0];
 		state[0] = y0 + v0;
 
 		// Stage 1
 		T sat1 = adaa[1].process(y0);
-		// T sat1 = tanh_2_3(y0);
 		T v1 = G * (sat1 - state[1]);
 		T y1 = v1 + state[1];
 		state[1] = y1 + v1;
 
 		// Stage 2
 		T sat2 = adaa[2].process(y1);
-		// T sat2 = tanh_2_3(y1);
 		T v2 = G * (sat2 - state[2]);
 		T y2 = v2 + state[2];
 		state[2] = y2 + v2;
 
 		// Stage 3
 		T sat3 = adaa[3].process(y2);
-		// T sat3 = tanh_2_3(y2);
 		T v3 = G * (sat3 - state[3]);
 		T y3 = v3 + state[3];
 		state[3] = y3 + v3;