From eae28b72c040b9ece5fb85f634f6c826c79f7399 Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Wed, 31 Dec 2025 07:35:00 -0500
Subject: [PATCH] VCO: Correctly handle events like phase crossing, pulse width
 crossing, 0.5 saw crossing, and sync in any order. WIP.

---
 src/VCO.cpp | 321 ++++++++++++++++++++++++++++------------------------
 1 file changed, 176 insertions(+), 145 deletions(-)
diff --git a/src/VCO.cpp b/src/VCO.cpp
index 5cb955e..bd5c7e1 100644
--- a/src/VCO.cpp
+++ b/src/VCO.cpp
@@ -17,7 +17,7 @@ inline T sin_pi_9(T x) {
 t is the fractional position between y1 and y2.
 */
 template <typename T>
-T cubicInterp(T y0, T y1, T y2, T y3, T t) {
+T catmullRomInterpolate(T y0, T y1, T y2, T y3, T t) {
 	T t2 = t * t;
 	T t3 = t2 * t;
 	return y1 + T(0.5) * t * (y2 - y0)
@@ -90,11 +90,13 @@ struct OnePoleHighpass : OnePoleLowpass {
 };
 
 
+/** Holds a precomputed minBLEP impulse response, reordered to efficiently insert into an audio buffer.
+*/
 template <int Z, int O>
 struct MinBlep {
 	/** Reordered impulse response for cubic interpolation, minus 1.0.
 	Z dimension has +1 padding at start (for z-1 wrap) and +4 at end (for SIMD + z+1 wrap).
-	Access via getImpulse() which handles o wrapping and z offset.
+	Access via getImpulse() which handles O wrapping and Z offset.
 	*/
 	float impulseReordered[O][1 + 2 * Z + 4] = {};
 
@@ -102,7 +104,7 @@ struct MinBlep {
 		float impulse[2 * Z][O];
 		dsp::minBlepImpulse(Z, O, &impulse[0][0]);
 
-		// Fill impulseReordered with transposed data, minus 1.
+		// Fill impulseReordered with transposed and pre-subtracted data
 		// Storage index = z + 1 (to allow z = -1 access at index 0)
 		for (int o = 0; o < O; o++) {
 			// z = -1: before impulse starts
@@ -120,30 +122,25 @@ struct MinBlep {
 
 	/** Get pointer to impulse data, handling o wrapping and z offset. */
 	const float* getImpulse(int o, int z) const {
-		int index = z * O + o;
-		z = index / O;
-		o = index % O;
+		z += o / O;
+		o = o % O;
 		if (o < 0) {
 			z -= 1;
 			o += O;
 		}
+		// assert(0 <= o && o < O);
+		// assert(-1 <= z && z < 2 * Z + 4);
 		return &impulseReordered[o][z + 1];
 	}
 
-	template <typename T = float>
-	struct State {
-		// Double buffer of length 2 * Z
-		T buffer[4 * Z] = {};
-		int32_t bufferIndex = 0;
-	};
-
-	/** Places a discontinuity with magnitude `x` at 0 < p <= 1 relative to the current frame.
-	For example if a square wave will jump from 1 to -1 in 0.1 frames, use insertDiscontinuity(0.1, -2.0).
+	/** Places a discontinuity at 0 < p <= 1 relative to the current frame.
+	`out` must have enough space to write 2*Z floats, spaced by `stride` floats.
+	`p` is the subsample position to insert a discontinuity of `magnitude`.
+	For example if a square wave will jump from 1 to -1 in 0.1 frames, use insertDiscontinuity(out, 1, 0.1f, -2.f).
 
 	Note: In the deprecated MinBlepGenerator, `p` was in the range (-1, 0], so add 1 to p if updating to MinBlep.
 	*/
-	template <typename T>
-	void insertDiscontinuity(State<T>& s, float p, T x) const {
+	void insertDiscontinuity(float* out, int stride, float p, float magnitude) const {
 		if (!(0 < p && p <= 1))
 			return;
 
@@ -152,35 +149,61 @@ struct MinBlep {
 		int o = (int) subsample;
 		float t = subsample - o;
 
-		// For each zero crossing, cubic interpolate impulse response
+		// For each zero crossing, interpolate impulse response from oversample points
+		using simd::float_4;
 		for (int z = 0; z < 2 * Z; z += 4) {
-			simd::float_4 y0 = simd::float_4::load(getImpulse(o - 1, z));
-			simd::float_4 y1 = simd::float_4::load(getImpulse(o, z));
-			simd::float_4 y2 = simd::float_4::load(getImpulse(o + 1, z));
-			simd::float_4 y3 = simd::float_4::load(getImpulse(o + 2, z));
-			simd::float_4 ir = cubicInterp(y0, y1, y2, y3, T(t));
-
-			// Write to double buffer
-			for (int zz = 0; zz < 4; zz++) {
-				s.buffer[s.bufferIndex + z + zz] += ir[zz] * x;
+			float_4 y0 = float_4::load(getImpulse(o - 1, z));
+			float_4 y1 = float_4::load(getImpulse(o, z));
+			float_4 y2 = float_4::load(getImpulse(o + 1, z));
+			float_4 y3 = float_4::load(getImpulse(o + 2, z));
+			float_4 y = catmullRomInterpolate(y0, y1, y2, y3, float_4(t));
+			y *= magnitude;
+
+			// Write all 4 samples to buffer
+			for (int zi = 0; zi < 4; zi++) {
+				out[(z + zi) * stride] += y[zi];
 			}
 		}
 	}
+};
 
-	/** Should be called every frame after inserting any discontinuities. */
-	template <typename T>
-	T process(State<T>& s) const {
-		T v = s.buffer[s.bufferIndex];
-		s.buffer[s.bufferIndex] = T(0);
-		s.bufferIndex++;
-		if (s.bufferIndex >= 2 * Z) {
+
+/** Buffer that allows reading/writing up to N future elements contiguously.
+*/
+template <int N, typename T>
+struct MinBlepBuffer {
+	T buffer[2 * N] = {};
+	int32_t bufferIndex = 0;
+
+	T* startData() {
+		return &buffer[bufferIndex];
+	}
+
+	/** Returns the current element and advances the buffer.
+	*/
+	T shift() {
+		T v = buffer[bufferIndex];
+		bufferIndex++;
+		if (bufferIndex >= N) {
 			// Move second half of buffer to beginning
-			std::memcpy(s.buffer, s.buffer + 2 * Z, 2 * Z * sizeof(T));
-			std::memset(s.buffer + 2 * Z, 0, 2 * Z * sizeof(T));
-			s.bufferIndex = 0;
+			std::memcpy(buffer, buffer + N, N * sizeof(T));
+			std::memset(buffer + N, 0, N * sizeof(T));
+			bufferIndex = 0;
 		}
 		return v;
 	}
+
+	/** Copies `n` elements to `out` and advances the buffer.
+	*/
+	void shiftBuffer(T* out, size_t n) {
+		std::memcpy(out, buffer + bufferIndex, n * sizeof(T));
+		bufferIndex += n;
+		if (bufferIndex >= N) {
+			std::memcpy(buffer, buffer + N, N * sizeof(T));
+			std::memset(buffer + N, 0, N * sizeof(T));
+			bufferIndex = 0;
+		}
+	}
 };
 
 
@@ -190,9 +213,8 @@ static MinBlep<16, 16> minBlep;
 template <typename T>
 struct VCOProcessor {
 	T phase = 0.f;
-	T lastSyncValue = 0.f;
+	T lastSync = 0.f;
 	T syncDirection = 1.f;
-	T sqrState = 1.f;
 	T triFilterState = 0.f;
 
 	OnePoleHighpass dcFilter;
@@ -201,9 +223,9 @@ struct VCOProcessor {
 	OnePoleHighpass::State<T> dcFilterStateTri;
 	OnePoleHighpass::State<T> dcFilterStateSin;
 
-	MinBlep<16, 16>::State<T> sqrMinBlep;
-	MinBlep<16, 16>::State<T> sawMinBlep;
-	MinBlep<16, 16>::State<T> sinMinBlep;
+	MinBlepBuffer<16*2, T> sqrMinBlep;
+	MinBlepBuffer<16*2, T> sawMinBlep;
+	MinBlepBuffer<16*2, T> sinMinBlep;
 
 	void setSampleTime(float sampleTime) {
 		dcFilter.setCutoff(std::min(0.4f, 20.f * sampleTime));
@@ -231,143 +253,153 @@ struct VCOProcessor {
 		T sin = 0.f;
 	};
 
-	void process(Frame& frame, float deltaTime) {
-		// Advance phase
-		T deltaPhase = simd::clamp(frame.freq * deltaTime, 0.f, 0.49f);
+	void process(Frame& frame, float sampleTime) {
+		// Compute deltaPhase for full frame
+		T deltaPhase = simd::clamp(frame.freq * sampleTime, 0.f, 0.49f);
 		if (frame.soft) {
-			// Reverse direction
 			deltaPhase *= syncDirection;
 		}
 		else {
-			// Reset back to forward
 			syncDirection = 1.f;
 		}
-		phase += deltaPhase;
-
-		// Wrap phase
-		T phaseFloor = simd::floor(phase);
-		phase -= phaseFloor;
 
-		if (frame.sqrEnabled || frame.triEnabled) {
-			// Jump sqr when phase crosses 1, or crosses 0 if running backwards
-			T wrapMask = (phaseFloor != 0.f);
-			int wrapM = simd::movemask(wrapMask);
-			if (wrapM) {
-				T wrapPhase = (syncDirection == -1.f) & 1.f;
-				T wrapCrossing = (wrapPhase - (phase - deltaPhase)) / deltaPhase;
-				for (int i = 0; i < frame.channels; i++) {
-					if (wrapM & (1 << i)) {
-						T mask = simd::movemaskInverse<T>(1 << i);
-						// TODO: MinBlepGenerator::insertDiscontinuity() should handle subframes outside the range -1 < p <= 0 instead of failing silently.
-						float p = clamp(wrapCrossing[i], 0.f, 1.f);
-						T x = mask & (2.f * syncDirection);
-						minBlep.insertDiscontinuity(sqrMinBlep, p, x);
-					}
+		T prevPhase = phase;
+		const float pwMin = 0.01f;
+		T pulseWidth = simd::clamp(frame.pulseWidth, pwMin, 1.f - pwMin);
+
+		// Inserts minBLEP for each channel where mask is true.
+		// Serial but does nothing if mask is all zero.
+		auto insertMinBlep = [&](T mask, T p, T magnitude, MinBlepBuffer<16*2, T>& minBlepBuffer) {
+			int m = simd::movemask(mask);
+			if (!m)
+				return;
+			float* buffer = (float*) minBlepBuffer.startData();
+			for (int i = 0; i < frame.channels; i++) {
+				if (m & (1 << i)) {
+					minBlep.insertDiscontinuity(&buffer[i], 4, p[i], magnitude[i]);
 				}
 			}
-			sqrState = simd::ifelse(wrapMask, syncDirection, sqrState);
-
-			// Pulse width
-			const float pwMin = 0.01f;
-			T pulseWidth = simd::clamp(frame.pulseWidth, pwMin, 1.f - pwMin);
-
-			// Jump sqr when crossing `pulseWidth`
-			T pwMask = (syncDirection == sqrState) & ((syncDirection == 1.f) ^ (phase < pulseWidth));
-			int pw = simd::movemask(pwMask);
-			if (pw) {
-				T pulseCrossing = (pulseWidth - (phase - deltaPhase)) / deltaPhase;
-				for (int i = 0; i < frame.channels; i++) {
-					if (pw & (1 << i)) {
-						T mask = simd::movemaskInverse<T>(1 << i);
-						float p = clamp(pulseCrossing[i], 0.f, 1.f);
-						T x = mask & (-2.f * syncDirection);
-						minBlep.insertDiscontinuity(sqrMinBlep, p, x);
-					}
-				}
+		};
+
+		// Computes subsample time where phase crosses threshold.
+		auto getCrossing = [](T threshold, T startPhase, T endPhase, T startSubsample, T endSubsample) -> T {
+			// Wrap threshold mod 1 to be in (startPhase, startPhase + 1]
+			threshold -= simd::floor(threshold - startPhase);
+			// Map to (0, 1]
+			T p = (threshold - startPhase) / (endPhase - startPhase);
+			// Map to (startSubsample, endSubsample]
+			return startSubsample + p * (endSubsample - startSubsample);
+		};
+
+		// Processes wrap/pulse/saw crossings between startPhase and endPhase.
+		// startSubsample and endSubsample define the time range within the frame.
+		// channelMask limits processing to specific channels.
+		auto processCrossings = [&](T startPhase, T endPhase, T startSubsample, T endSubsample, T channelMask) {
+			if (frame.sqrEnabled || frame.triEnabled) {
+				// Wrap crossing (phase crosses 0 mod 1)
+				T wrapSubsample = getCrossing(1.f, startPhase, endPhase, startSubsample, endSubsample);
+				T wrapMask = channelMask & (startSubsample < wrapSubsample) & (wrapSubsample <= endSubsample);
+				insertMinBlep(wrapMask, wrapSubsample, 2.f * syncDirection, sqrMinBlep);
+
+				// Pulse width crossing
+				T pulseSubsample = getCrossing(pulseWidth, startPhase, endPhase, startSubsample, endSubsample);
+				T pulseMask = channelMask & (startSubsample < pulseSubsample) & (pulseSubsample <= endSubsample);
+				insertMinBlep(pulseMask, pulseSubsample, -2.f * syncDirection, sqrMinBlep);
 			}
-			sqrState = simd::ifelse(pwMask, -syncDirection, sqrState);
-		}
 
-		if (frame.sawEnabled) {
-			// Jump saw when crossing 0.5
-			T halfCrossing = (0.5f - (phase - deltaPhase)) / deltaPhase;
-			int halfMask = simd::movemask((0 < halfCrossing) & (halfCrossing <= 1.f));
-			if (halfMask) {
-				for (int i = 0; i < frame.channels; i++) {
-					if (halfMask & (1 << i)) {
-						T mask = simd::movemaskInverse<T>(1 << i);
-						float p = halfCrossing[i];
-						T x = mask & (-2.f * syncDirection);
-						minBlep.insertDiscontinuity(sawMinBlep, p, x);
-					}
-				}
+			if (frame.sawEnabled) {
+				// Saw crossing at 0.5
+				T sawSubsample = getCrossing(0.5f, startPhase, endPhase, startSubsample, endSubsample);
+				T sawMask = channelMask & (startSubsample < sawSubsample) & (sawSubsample <= endSubsample);
+				insertMinBlep(sawMask, sawSubsample, -2.f * syncDirection, sawMinBlep);
 			}
+		};
+
+		// Main logic
+		if (!frame.syncEnabled) {
+			// No sync. Process full frame
+			T endPhase = prevPhase + deltaPhase;
+			processCrossings(prevPhase, endPhase, 0.f, 1.f, T::mask());
+			phase = endPhase;
 		}
+		else {
+			// Compute sync subsample position
+			T deltaSync = frame.sync - lastSync;
+			T syncSubsample = -lastSync / deltaSync;
+			lastSync = frame.sync;
+			T syncOccurred = (0.f < syncSubsample) & (syncSubsample <= 1.f) & (frame.sync >= 0.f);
+			T noSync = ~syncOccurred;
+
+			if (simd::movemask(noSync)) {
+				// No sync for these channels. Process full frame
+				T endPhase = prevPhase + deltaPhase;
+				processCrossings(prevPhase, endPhase, 0.f, 1.f, noSync);
+				phase = simd::ifelse(noSync, endPhase, phase);
+			}
+
+			if (simd::movemask(syncOccurred)) {
+				// Process crossings before sync
+				T syncPhase = prevPhase + deltaPhase * syncSubsample;
+				processCrossings(prevPhase, syncPhase, 0.f, syncSubsample, syncOccurred);
 
-		// Detect sync
-		if (frame.syncEnabled) {
-			T deltaSync = frame.sync - lastSyncValue;
-			// Sample position where sync crosses 0.
-			// Might be NAN or outside of (0, 1] range
-			T syncCrossing = -lastSyncValue / deltaSync;
-			lastSyncValue = frame.sync;
-			T sync = (0.f < syncCrossing) & (syncCrossing <= 1.f) & (frame.sync >= 0.f);
-			int syncMask = simd::movemask(sync);
-			if (syncMask) {
 				if (frame.soft) {
-					syncDirection = simd::ifelse(sync, -syncDirection, syncDirection);
+					// Soft sync: Reverse direction, continue from syncPhase
+					syncDirection = simd::ifelse(syncOccurred, -syncDirection, syncDirection);
+					T endPhase = syncPhase + (-deltaPhase) * (1.f - syncSubsample);
+					processCrossings(syncPhase, endPhase, syncSubsample, 1.f, syncOccurred);
+					phase = simd::ifelse(syncOccurred, endPhase, phase);
 				}
 				else {
-					T newPhase = simd::ifelse(sync, (1.f - syncCrossing) * deltaPhase, phase);
-					// Insert minBLEP for sync
-					for (int i = 0; i < frame.channels; i++) {
-						if (syncMask & (1 << i)) {
-							T mask = simd::movemaskInverse<T>(1 << i);
-							float p = syncCrossing[i];
-							if (frame.sqrEnabled || frame.triEnabled) {
-								// Assume that hard-syncing a square always resets it to HIGH
-								T x = mask & (1.f - sqrState);
-								sqrState = simd::ifelse(mask, 1.f, sqrState);
-								minBlep.insertDiscontinuity(sqrMinBlep, p, x);
-								DEBUG("%f %f %f", p, x[i], sqrState[i]);
-							}
-							if (frame.sawEnabled) {
-								T x = mask & (saw(newPhase) - saw(phase));
-								minBlep.insertDiscontinuity(sawMinBlep, p, x);
-							}
-							if (frame.sinEnabled) {
-								T x = mask & (sin(newPhase) - sin(phase));
-								minBlep.insertDiscontinuity(sinMinBlep, p, x);
-							}
-						}
+					// Hard sync: reset to 0, insert discontinuities
+					if (frame.sqrEnabled || frame.triEnabled) {
+						// Sqr jumps from current state to +1
+						T sqrJump = (syncPhase - simd::floor(syncPhase) < pulseWidth);
+						insertMinBlep(syncOccurred & sqrJump, syncSubsample, 2.f, sqrMinBlep);
+					}
+					if (frame.triEnabled) {
+						// TODO Insert minBLEP to Tri
+					}
+					if (frame.sawEnabled) {
+						// Saw jumps from saw(syncPhase) to saw(0) = 0
+						insertMinBlep(syncOccurred, syncSubsample, -saw(syncPhase), sawMinBlep);
+					}
+					if (frame.sinEnabled) {
+						// sin jumps from sin(syncPhase) to sin(0) = 0
+						insertMinBlep(syncOccurred, syncSubsample, -sin(syncPhase), sinMinBlep);
 					}
-					phase = newPhase;
+
+					// Process crossings after sync (starting from phase 0)
+					T endPhase = deltaPhase * (1.f - syncSubsample);
+					processCrossings(0.f, endPhase, syncSubsample, 1.f, syncOccurred);
+					phase = simd::ifelse(syncOccurred, endPhase, phase);
 				}
 			}
 		}
 
-		// Saw
+		// Wrap phase to [0, 1)
+		phase -= simd::floor(phase);
+
+		// Generate outputs
 		if (frame.sawEnabled) {
 			frame.saw = saw(phase);
-			frame.saw += minBlep.process(sawMinBlep);
+			frame.saw += sawMinBlep.shift();
 			frame.saw = dcFilter.process(dcFilterStateSaw, frame.saw);
 		}
 
-		// Square
 		if (frame.sqrEnabled || frame.triEnabled) {
-			frame.sqr = sqrState;
-			frame.sqr += minBlep.process(sqrMinBlep);
+			T sqrHigh = (phase < pulseWidth) ^ syncDirection;
+			frame.sqr = simd::ifelse(sqrHigh, 1.f, -1.f);
+			frame.sqr += sqrMinBlep.shift();
 			T triSqr = frame.sqr;
 			frame.sqr = dcFilter.process(dcFilterStateSqr, frame.sqr);
 
-			// Tri
 			if (frame.triEnabled) {
 				// Integrate square wave
 				const float triShape = 0.2f;
-				T triFreq = deltaTime * triShape * frame.freq;
-				// T alpha = 1.f - simd::exp(-2.f * M_PI * triFreq);
+				T triFreq = sampleTime * triShape * frame.freq;
 				// Use bilinear transform to derive alpha
 				T alpha = 1 / (1 + 1 / (M_PI * triFreq));
+				// T alpha = 1.f - simd::exp(-2.f * M_PI * triFreq);
 				triFilterState += alpha * (triSqr - triFilterState);
 				// Apply gain to roughly have unit amplitude at 0.5 pulseWidth. Depends on triShape.
 				frame.tri = triFilterState * 6.6f;
@@ -375,10 +407,9 @@ struct VCOProcessor {
 			}
 		}
 
-		// Sin
 		if (frame.sinEnabled) {
 			frame.sin = sin(phase);
-			frame.sin += minBlep.process(sinMinBlep);
+			frame.sin += sinMinBlep.shift();
 			frame.sin = dcFilter.process(dcFilterStateSin, frame.sin);
 		}
 	}