Add simd to various dsp functions.

6 years ago · ea8eca4cd3
--- a/include/dsp/common.hpp
+++ b/include/dsp/common.hpp
@@ -87,9 +87,9 @@ T exponentialBipolar(T b, T x) {


 /** Useful for storing arrays of samples in ring buffers and casting them to `float*` to be used by interleaved processors, like SampleRateConverter */
 template <size_t CHANNELS>
 template <size_t CHANNELS, typename T = float>
 struct Frame {
 	float samples[CHANNELS];
 	T samples[CHANNELS];
 };


--- a/include/dsp/filter.hpp
+++ b/include/dsp/filter.hpp
@@ -6,128 +6,168 @@ namespace rack {
 namespace dsp {


 struct RCFilter {
 	float c = 0.f;
 	float xstate[1] = {};
 	float ystate[1] = {};
 /** The simplest possible analog filter using an Euler solver.
 https://en.wikipedia.org/wiki/RC_circuit
 Use two RC filters in series for a bandpass filter.
 */
 template <typename T = float>
 struct TRCFilter {
 	T c = 0.f;
 	T xstate[1];
 	T ystate[1];

 	TRCFilter() {
 		reset();
 	}

 	void reset() {
 		xstate[0] = 0.f;
 		ystate[0] = 0.f;
 	}

 	// `r` is the ratio between the cutoff frequency and sample rate, i.e. r = f_c / f_s
 	void setCutoff(float r) {
 	/** Sets the cutoff frequency.
 	`r` is the ratio between the cutoff frequency and sample rate, i.e. r = f_c / f_s
 	*/
 	void setCutoff(T r) {
 		c = 2.f / r;
 	}
 	void process(float x) {
 		float y = (x + xstate[0] - ystate[0] * (1 - c)) / (1 + c);
 	void process(T x) {
 		T y = (x + xstate[0] - ystate[0] * (1 - c)) / (1 + c);
 		xstate[0] = x;
 		ystate[0] = y;
 	}
 	float lowpass() {
 	T lowpass() {
 		return ystate[0];
 	}
 	float highpass() {
 	T highpass() {
 		return xstate[0] - ystate[0];
 	}
 };

 typedef TRCFilter<> RCFilter;


 struct PeakFilter {
 	float state = 0.f;
 	float c = 0.f;
 /** Applies exponential smoothing to a signal with the ODE
 \f$ \frac{dy}{dt} = x \lambda \f$.
 */
 template <typename T = float>
 struct TExponentialFilter {
 	T out = 0.f;
 	T lambda = 0.f;

 	void reset() {
 		out = 0.f;
 	}

 	/** Rate is lambda / sampleRate */
 	void setRate(float r) {
 		c = 1.f - r;
 	void setLambda(T lambda) {
 		this->lambda = lambda;
 	}
 	void process(float x) {
 		if (x > state)
 			state = x;
 		state *= c;

 	T process(T deltaTime, T in) {
 		T y = out + (in - out) * lambda * deltaTime;
 		// If no change was made between the old and new output, assume T granularity is too small and snap output to input
 		out = simd::ifelse(out == y, in, y);
 		return out;
 	}
 	float peak() {
 		return state;

 	DEPRECATED T process(T in) {
 		return process(1.f, in);
 	}
 };

 typedef TExponentialFilter<> ExponentialFilter;


 /** Like ExponentialFilter but jumps immediately to higher values.
 */
 template <typename T = float>
 struct TPeakFilter {
 	T out = 0.f;
 	T lambda = 0.f;

 	void reset() {
 		out = 0.f;
 	}

 struct SlewLimiter {
 	float rise = 0.f;
 	float fall = 0.f;
 	float out = NAN;

 	float process(float deltaTime, float in) {
 		if (std::isnan(out)) {
 			out = in;
 		}
 		else if (out < in) {
 			float y = out + rise * deltaTime;
 			out = std::fmin(y, in);
 		}
 		else if (out > in) {
 			float y = out - fall * deltaTime;
 			out = std::fmax(y, in);
 		}
 	void setLambda(T lambda) {
 		this->lambda = lambda;
 	}

 	T process(T deltaTime, T in) {
 		T y = out + (in - out) * lambda * deltaTime;
 		out = simd::fmax(y, in);
 		return out;
 	}
 	DEPRECATED float process(float in) {
 		return process(1.f, in);
 	/** Use the return value of process() instead. */
 	DEPRECATED T peak() {
 		return out;
 	}
 	DEPRECATED void setRiseFall(float rise, float fall) {
 		this->rise = rise;
 		this->fall = fall;
 	/** Use setLambda() instead. */
 	DEPRECATED void setRate(T r) {
 		lambda = 1.f - r;
 	}
 	DEPRECATED T process(T x) {
 		return process(1.f, x);
 	}
 };

 typedef TPeakFilter<> PeakFilter;


 template <typename T = float>
 struct TSlewLimiter {
 	T out = 0.f;
 	T rise = 0.f;
 	T fall = 0.f;

 struct ExponentialSlewLimiter {
 	float riseLambda = 0.f;
 	float fallLambda = 0.f;
 	float out = NAN;

 	float process(float deltaTime, float in) {
 		if (std::isnan(out)) {
 			out = in;
 		}
 		else if (out < in) {
 			float y = out + (in - out) * riseLambda * deltaTime;
 			out = (out == y) ? in : y;
 		}
 		else if (out > in) {
 			float y = out + (in - out) * fallLambda * deltaTime;
 			out = (out == y) ? in : y;
 		}
 	void reset() {
 		out = 0.f;
 	}

 	void setRiseFall(T rise, T fall) {
 		this->rise = rise;
 		this->fall = fall;
 	}
 	T process(T deltaTime, T in) {
 		out = simd::clamp(in, out - fall * deltaTime, out + rise * deltaTime);
 		return out;
 	}
 	DEPRECATED float process(float in) {
 	DEPRECATED T process(T in) {
 		return process(1.f, in);
 	}
 };

 typedef TSlewLimiter<> SlewLimiter;

 /** Applies exponential smoothing to a signal with the ODE
 \f$ \frac{dy}{dt} = x \lambda \f$.
 */
 struct ExponentialFilter {
 	float out = 0.f;
 	float lambda = 0.f;

 template <typename T = float>
 struct TExponentialSlewLimiter {
 	T out = 0.f;
 	T riseLambda = 0.f;
 	T fallLambda = 0.f;

 	void reset() {
 		out = 0.f;
 	}

 	float process(float deltaTime, float in) {
 		float y = out + (in - out) * lambda * deltaTime;
 		// If no change was made between the old and new output, assume float granularity is too small and snap output to input
 		if (out == y)
 			out = in;
 		else
 			out = y;
 	void setRiseFall(T riseLambda, T fallLambda) {
 		this->riseLambda = riseLambda;
 		this->fallLambda = fallLambda;
 	}
 	T process(T deltaTime, T in) {
 		T lambda = simd::ifelse(in > out, riseLambda, fallLambda);
 		T y = out + (in - out) * lambda * deltaTime;
 		// If the change from the old out to the new out is too small for floats, set `in` directly.
 		out = simd::ifelse(out == y, in, y);
 		return out;
 	}

 	DEPRECATED float process(float in) {
 	DEPRECATED T process(T in) {
 		return process(1.f, in);
 	}
 };

 typedef TExponentialSlewLimiter<> ExponentialSlewLimiter;


 } // namespace dsp
 } // namespace rack
--- a/include/dsp/ode.hpp
+++ b/include/dsp/ode.hpp
@@ -26,9 +26,9 @@ For example, the following solves the system x''(t) = -x(t) using a fixed timest
 */

 /** Solves an ODE system using the 1st order Euler method */
 template <typename F>
 void stepEuler(float t, float dt, float x[], int len, F f) {
 	float k[len];
 template <typename T, typename F>
 void stepEuler(T t, T dt, T x[], int len, F f) {
 	T k[len];

 	f(t, x, k);
 	for (int i = 0; i < len; i++) {
@@ -37,11 +37,11 @@ void stepEuler(float t, float dt, float x[], int len, F f) {
 }

 /** Solves an ODE system using the 2nd order Runge-Kutta method */
 template <typename F>
 void stepRK2(float t, float dt, float x[], int len, F f) {
 	float k1[len];
 	float k2[len];
 	float yi[len];
 template <typename T, typename F>
 void stepRK2(T t, T dt, T x[], int len, F f) {
 	T k1[len];
 	T k2[len];
 	T yi[len];

 	f(t, x, k1);

@@ -56,13 +56,13 @@ void stepRK2(float t, float dt, float x[], int len, F f) {
 }

 /** Solves an ODE system using the 4th order Runge-Kutta method */
 template <typename F>
 void stepRK4(float t, float dt, float x[], int len, F f) {
 	float k1[len];
 	float k2[len];
 	float k3[len];
 	float k4[len];
 	float yi[len];
 template <typename T, typename F>
 void stepRK4(T t, T dt, T x[], int len, F f) {
 	T k1[len];
 	T k2[len];
 	T k3[len];
 	T k4[len];
 	T yi[len];

 	f(t, x, k1);

--- a/include/dsp/window.hpp
+++ b/include/dsp/window.hpp
@@ -10,8 +10,9 @@ namespace dsp {
 p: proportion from [0, 1], usually `i / (len - 1)`
 https://en.wikipedia.org/wiki/Window_function#Hann_and_Hamming_windows
 */
 inline float hann(float p) {
 	return 0.5f * (1.f - std::cos(2*M_PI * p));
 template <typename T>
 inline T hann(T p) {
 	return 0.5f * (1.f - simd::cos(2*M_PI * p));
 }

 /** Multiplies the Hann window by a signal `x` of length `len` in-place. */
@@ -25,11 +26,12 @@ inline void hannWindow(float *x, int len) {
 https://en.wikipedia.org/wiki/Window_function#Blackman_window
 A typical alpha value is 0.16.
 */
 inline float blackman(float alpha, float p) {
 template <typename T>
 inline T blackman(T alpha, T p) {
 	return
 		+ (1 - alpha) / 2.f
 		- 1 / 2.f * std::cos(2*M_PI * p)
 		+ alpha / 2.f * std::cos(4*M_PI * p);
 		- 1 / 2.f * simd::cos(2*M_PI * p)
 		+ alpha / 2.f * simd::cos(4*M_PI * p);
 }

 inline void blackmanWindow(float alpha, float *x, int len) {
@@ -42,12 +44,13 @@ inline void blackmanWindow(float alpha, float *x, int len) {
 /** Blackman-Nuttall window function.
 https://en.wikipedia.org/wiki/Window_function#Blackman%E2%80%93Nuttall_window
 */
 inline float blackmanNuttall(float p) {
 template <typename T>
 inline T blackmanNuttall(T p) {
 	return
 		+ 0.3635819f
 		- 0.4891775f * std::cos(2*M_PI * p)
 		+ 0.1365995f * std::cos(4*M_PI * p)
 		- 0.0106411f * std::cos(6*M_PI * p);
 		- 0.4891775f * simd::cos(2*M_PI * p)
 		+ 0.1365995f * simd::cos(4*M_PI * p)
 		- 0.0106411f * simd::cos(6*M_PI * p);
 }

 inline void blackmanNuttallWindow(float *x, int len) {
@@ -59,12 +62,13 @@ inline void blackmanNuttallWindow(float *x, int len) {
 /** Blackman-Harris window function.
 https://en.wikipedia.org/wiki/Window_function#Blackman%E2%80%93Harris_window
 */
 inline float blackmanHarris(float p) {
 template <typename T>
 inline T blackmanHarris(T p) {
 	return
 		+ 0.35875f
 		- 0.48829f * std::cos(2*M_PI * p)
 		+ 0.14128f * std::cos(4*M_PI * p)
 		- 0.01168f * std::cos(6*M_PI * p);
 		- 0.48829f * simd::cos(2*M_PI * p)
 		+ 0.14128f * simd::cos(4*M_PI * p)
 		- 0.01168f * simd::cos(6*M_PI * p);
 }

 inline void blackmanHarrisWindow(float *x, int len) {
--- a/include/simd/functions.hpp
+++ b/include/simd/functions.hpp
@@ -52,6 +52,12 @@ inline f32_4 log10(f32_4 x) {
 	return f32_4(sse_mathfun_log_ps(x.v)) / std::log(10.f);
 }

 using std::log2;

 inline f32_4 log2(f32_4 x) {
 	return f32_4(sse_mathfun_log_ps(x.v)) / std::log(2.f);
 }

 using std::exp;

 inline f32_4 exp(f32_4 x) {
@@ -118,6 +124,16 @@ inline f32_4 pow(float a, f32_4 b) {

 // Nonstandard functions

 inline float ifelse(bool cond, float a, float b) {
 	return cond ? a : b;
 }

 /** Given a mask, returns a if mask is 0xffffffff per element, b if mask is 0x00000000 */
 inline f32_4 ifelse(f32_4 mask, f32_4 a, f32_4 b) {
 	return (a & mask) | andnot(mask, b);
 }


 /** Returns the approximate reciprocal square root.
 Much faster than `1/sqrt(x)`.
 */
--- a/include/simd/vector.hpp
+++ b/include/simd/vector.hpp
@@ -187,11 +187,6 @@ inline f32_4 andnot(const f32_4 &a, const f32_4 &b) {
 	return f32_4(_mm_andnot_ps(a.v, b.v));
 }

 /** Given a mask, returns a if mask is 0xffffffff per element, b if mask is 0x00000000 */
 inline f32_4 ifelse(const f32_4 &mask, const f32_4 &a, const f32_4 &b) {
 	return (a & mask) | andnot(mask, b);
 }


 } // namespace simd
 } // namespace rack