Browse Source

Tweak buffer ops for GCC auto vectorization.

tags/non-daw-v1.2.0
Jonathan Moore Liles 12 years ago
parent
commit
19487a72e4
8 changed files with 88 additions and 57 deletions
  1. +4
    -4
      mixer/src/Chain.C
  2. +2
    -18
      mixer/src/Meter_Module.C
  3. +1
    -1
      mixer/src/Module.H
  4. +64
    -19
      nonlib/dsp.C
  5. +4
    -1
      nonlib/dsp.h
  6. +5
    -4
      timeline/src/Engine/Playback_DS.C
  7. +6
    -8
      timeline/src/Engine/Record_DS.C
  8. +2
    -2
      wscript

+ 4
- 4
mixer/src/Chain.C View File

@@ -176,7 +176,7 @@ Chain::~Chain ( )
client()->lock(); client()->lock();


for ( unsigned int i = scratch_port.size(); i--; ) for ( unsigned int i = scratch_port.size(); i--; )
delete[] (sample_t*)scratch_port[i].buffer();
free( (sample_t*)scratch_port[i].buffer() );
/* if we leave this up to FLTK, it will happen after we've /* if we leave this up to FLTK, it will happen after we've
already destroyed the client */ already destroyed the client */
@@ -376,13 +376,13 @@ Chain::configure_ports ( void )
if ( scratch_port.size() < req_buffers ) if ( scratch_port.size() < req_buffers )
{ {
for ( unsigned int i = scratch_port.size(); i--; ) for ( unsigned int i = scratch_port.size(); i--; )
delete[] (sample_t*)scratch_port[i].buffer();
free(scratch_port[i].buffer());
scratch_port.clear(); scratch_port.clear();


for ( unsigned int i = 0; i < req_buffers; ++i ) for ( unsigned int i = 0; i < req_buffers; ++i )
{ {
Module::Port p( NULL, Module::Port::OUTPUT, Module::Port::AUDIO ); Module::Port p( NULL, Module::Port::OUTPUT, Module::Port::AUDIO );
p.connect_to( new sample_t[client()->nframes()] );
p.connect_to( buffer_alloc( client()->nframes() ) );
buffer_fill_with_silence( (sample_t*)p.buffer(), client()->nframes() ); buffer_fill_with_silence( (sample_t*)p.buffer(), client()->nframes() );
scratch_port.push_back( p ); scratch_port.push_back( p );
} }
@@ -812,7 +812,7 @@ void
Chain::buffer_size ( nframes_t nframes ) Chain::buffer_size ( nframes_t nframes )
{ {
for ( unsigned int i = scratch_port.size(); i--; ) for ( unsigned int i = scratch_port.size(); i--; )
delete[] (sample_t*)scratch_port[i].buffer();
free(scratch_port[i].buffer());
scratch_port.clear(); scratch_port.clear();


configure_ports(); configure_ports();


+ 2
- 18
mixer/src/Meter_Module.C View File

@@ -29,6 +29,7 @@
#include "Meter_Module.H" #include "Meter_Module.H"
#include "DPM.H" #include "DPM.H"
#include "JACK/Port.H" #include "JACK/Port.H"
#include "dsp.h"




@@ -170,23 +171,6 @@ Meter_Module::handle ( int m )
/* Engine */ /* Engine */
/**********/ /**********/


static float
get_peak_sample ( const sample_t* buf, nframes_t nframes )
{
float p = 0.0f;

const sample_t *f = buf;

for ( int j = nframes; j--; ++f )
{
const float s = fabs( *f );

if ( s > p )
p = s;
}

return p;
}


void void
Meter_Module::process ( nframes_t nframes ) Meter_Module::process ( nframes_t nframes )
@@ -196,7 +180,7 @@ Meter_Module::process ( nframes_t nframes )
if ( audio_input[i].connected() ) if ( audio_input[i].connected() )
{ {
// float dB = 20 * log10( get_peak_sample( (float*)audio_input[i].buffer(), nframes ) / 2.0f ); // float dB = 20 * log10( get_peak_sample( (float*)audio_input[i].buffer(), nframes ) / 2.0f );
float dB = 20 * log10( get_peak_sample( (float*)audio_input[i].buffer(), nframes ) );
float dB = 20 * log10( buffer_get_peak( (sample_t*) audio_input[i].buffer(), nframes ) );


((float*)control_output[0].buffer())[i] = dB; ((float*)control_output[0].buffer())[i] = dB;
if (dB > control_value[i]) if (dB > control_value[i])


+ 1
- 1
mixer/src/Module.H View File

@@ -316,7 +316,7 @@ public:
LOG_NAME_FUNC( Module ); LOG_NAME_FUNC( Module );


nframes_t nframes ( void ) const { return _nframes; } nframes_t nframes ( void ) const { return _nframes; }
void resize_buffers ( nframes_t v ) { _nframes = v; }
virtual void resize_buffers ( nframes_t v ) { _nframes = v; }




int instances ( void ) const { return _instances; } int instances ( void ) const { return _instances; }


+ 64
- 19
nonlib/dsp.C View File

@@ -21,47 +21,73 @@


#include "dsp.h" #include "dsp.h"
#include "string.h" // for memset. #include "string.h" // for memset.
#include <stdlib.h>


/* TODO: these functions are all targets for optimization (SSE?) */
static const int ALIGNMENT = 16;

sample_t *
buffer_alloc ( nframes_t size )
{
void *p;
posix_memalign( &p, ALIGNMENT, size * sizeof( sample_t ) );

return (sample_t*)p;
}


void void
buffer_apply_gain ( sample_t *buf, nframes_t nframes, float g )
buffer_apply_gain ( sample_t * __restrict__ buf, nframes_t nframes, float g )
{ {
sample_t * buf_ = (sample_t*) __builtin_assume_aligned(buf,ALIGNMENT);
if ( g != 1.0f ) if ( g != 1.0f )
while ( nframes-- ) while ( nframes-- )
*(buf++) *= g;
*(buf_++) *= g;
} }


void void
buffer_apply_gain_buffer ( sample_t *buf, const sample_t *gainbuf, nframes_t nframes )
buffer_apply_gain_buffer ( sample_t * __restrict__ buf, const sample_t * __restrict__ gainbuf, nframes_t nframes )
{ {
sample_t * buf_ = (sample_t*) __builtin_assume_aligned(buf,ALIGNMENT);
const sample_t * gainbuf_ = (const sample_t*) __builtin_assume_aligned(gainbuf,ALIGNMENT);

while ( nframes-- ) while ( nframes-- )
*(buf++) *= *(gainbuf++);
*(buf_++) *= *(gainbuf_++);
} }


void void
buffer_copy_and_apply_gain_buffer ( sample_t *dst, const sample_t *src, const sample_t *gainbuf, nframes_t nframes )
buffer_copy_and_apply_gain_buffer ( sample_t * __restrict__ dst, const sample_t * __restrict__ src, const sample_t * __restrict__ gainbuf, nframes_t nframes )
{ {
while ( nframes-- )
*(dst++) = *(src++) * *(gainbuf++);
sample_t * dst_ = (sample_t*) __builtin_assume_aligned(dst,ALIGNMENT);
const sample_t * src_ = (const sample_t*) __builtin_assume_aligned(src,ALIGNMENT);
const sample_t * gainbuf_ = (const sample_t*) __builtin_assume_aligned(gainbuf,ALIGNMENT);
while ( nframes-- )
*(dst_++) = *(src_++) * *(gainbuf_++);
} }


void void
buffer_mix ( sample_t *dst, const sample_t *src, nframes_t nframes )
buffer_mix ( sample_t * __restrict__ dst, const sample_t * __restrict__ src, nframes_t nframes )
{ {
sample_t * dst_ = (sample_t*) __builtin_assume_aligned(dst,ALIGNMENT);
const sample_t * src_ = (const sample_t*) __builtin_assume_aligned(src,ALIGNMENT);

while ( nframes-- ) while ( nframes-- )
*(dst++) += *(src++);
*(dst_++) += *(src_++);
} }


void void
buffer_mix_with_gain ( sample_t *dst, const sample_t *src, nframes_t nframes, float g )
buffer_mix_with_gain ( sample_t * __restrict__ dst, const sample_t * __restrict__ src, nframes_t nframes, float g )
{ {
sample_t * dst_ = (sample_t*) __builtin_assume_aligned(dst,ALIGNMENT);
const sample_t * src_ = (const sample_t*) __builtin_assume_aligned(src,ALIGNMENT);
while ( nframes-- ) while ( nframes-- )
*(dst++) += *(src++) * g;
*(dst_++) += *(src_++) * g;
} }


void void
buffer_interleave_one_channel ( sample_t *dst, const sample_t *src, int channel, int channels, nframes_t nframes )
buffer_interleave_one_channel ( sample_t * __restrict__ dst, const sample_t * __restrict__ src, int channel, int channels, nframes_t nframes )
{ {
dst += channel; dst += channel;


@@ -73,7 +99,7 @@ buffer_interleave_one_channel ( sample_t *dst, const sample_t *src, int channel,
} }


void void
buffer_interleave_one_channel_and_mix ( sample_t *dst, const sample_t *src, int channel, int channels, nframes_t nframes )
buffer_interleave_one_channel_and_mix ( sample_t *__restrict__ dst, const sample_t * __restrict__ src, int channel, int channels, nframes_t nframes )
{ {
dst += channel; dst += channel;


@@ -85,7 +111,7 @@ buffer_interleave_one_channel_and_mix ( sample_t *dst, const sample_t *src, int
} }


void void
buffer_deinterleave_one_channel ( sample_t *dst, const sample_t *src, int channel, int channels, nframes_t nframes )
buffer_deinterleave_one_channel ( sample_t * __restrict__ dst, const sample_t * __restrict__ src, int channel, int channels, nframes_t nframes )
{ {
src += channel; src += channel;


@@ -115,19 +141,36 @@ buffer_is_digital_black ( sample_t *buf, nframes_t nframes )
return true; return true;
} }


float
buffer_get_peak ( const sample_t * __restrict__ buf, nframes_t nframes )
{
const sample_t * buf_ = (const sample_t*) __builtin_assume_aligned(buf,ALIGNMENT);

float p = 0.0f;
while ( nframes-- )
{
const float s = fabs(*(buf_++));
p = s > p ? s : p;
}

return p;
}

void void
buffer_copy ( sample_t *dst, const sample_t *src, nframes_t nframes )
buffer_copy ( sample_t * __restrict__ dst, const sample_t * __restrict__ src, nframes_t nframes )
{ {
memcpy( dst, src, nframes * sizeof( sample_t ) ); memcpy( dst, src, nframes * sizeof( sample_t ) );
} }


void void
buffer_copy_and_apply_gain ( sample_t *dst, const sample_t *src, nframes_t nframes, float gain )
buffer_copy_and_apply_gain ( sample_t * __restrict__ dst, const sample_t * __restrict__ src, nframes_t nframes, float gain )
{ {
memcpy( dst, src, nframes * sizeof( sample_t ) ); memcpy( dst, src, nframes * sizeof( sample_t ) );
buffer_apply_gain( dst, nframes, gain ); buffer_apply_gain( dst, nframes, gain );
} }



void void
Value_Smoothing_Filter::sample_rate ( nframes_t n ) Value_Smoothing_Filter::sample_rate ( nframes_t n )
{ {
@@ -138,8 +181,10 @@ Value_Smoothing_Filter::sample_rate ( nframes_t n )
} }


bool bool
Value_Smoothing_Filter::apply( sample_t *dst, nframes_t nframes, float gt )
Value_Smoothing_Filter::apply( sample_t * __restrict__ dst, nframes_t nframes, float gt )
{ {
sample_t * dst_ = (sample_t*) __builtin_assume_aligned(dst,ALIGNMENT);
const float a = 0.07f; const float a = 0.07f;
const float b = 1 + a; const float b = 1 + a;
@@ -155,7 +200,7 @@ Value_Smoothing_Filter::apply( sample_t *dst, nframes_t nframes, float gt )
{ {
g1 += w * (gm - g1 - a * g2); g1 += w * (gm - g1 - a * g2);
g2 += w * (g1 - g2); g2 += w * (g1 - g2);
dst[i] = g2;
dst_[i] = g2;
} }


if ( fabsf( gt - g2 ) < 0.0001f ) if ( fabsf( gt - g2 ) < 0.0001f )


+ 4
- 1
nonlib/dsp.h View File

@@ -22,6 +22,8 @@
#include "JACK/Client.H" #include "JACK/Client.H"
#include <math.h> #include <math.h>



sample_t *buffer_alloc ( nframes_t size );
void buffer_apply_gain ( sample_t *buf, nframes_t nframes, float g ); void buffer_apply_gain ( sample_t *buf, nframes_t nframes, float g );
void buffer_apply_gain_buffer ( sample_t *buf, const sample_t *gainbuf, nframes_t nframes ); void buffer_apply_gain_buffer ( sample_t *buf, const sample_t *gainbuf, nframes_t nframes );
void buffer_copy_and_apply_gain_buffer ( sample_t *dst, const sample_t *src, const sample_t *gainbuf, nframes_t nframes ); void buffer_copy_and_apply_gain_buffer ( sample_t *dst, const sample_t *src, const sample_t *gainbuf, nframes_t nframes );
@@ -31,7 +33,8 @@ void buffer_interleave_one_channel ( sample_t *dst, const sample_t *src, int cha
void buffer_interleave_one_channel_and_mix ( sample_t *dst, const sample_t *src, int channel, int channels, nframes_t nframes ); void buffer_interleave_one_channel_and_mix ( sample_t *dst, const sample_t *src, int channel, int channels, nframes_t nframes );
void buffer_deinterleave_one_channel ( sample_t *dst, const sample_t *src, int channel, int channels, nframes_t nframes ); void buffer_deinterleave_one_channel ( sample_t *dst, const sample_t *src, int channel, int channels, nframes_t nframes );
void buffer_fill_with_silence ( sample_t *buf, nframes_t nframes ); void buffer_fill_with_silence ( sample_t *buf, nframes_t nframes );
bool buffer_is_digital_black ( sample_t *buf, nframes_t nframes );
bool buffer_is_digital_black ( const sample_t *buf, nframes_t nframes );
float buffer_get_peak ( const sample_t *buf, nframes_t nframes );
void buffer_copy ( sample_t *dst, const sample_t *src, nframes_t nframes ); void buffer_copy ( sample_t *dst, const sample_t *src, nframes_t nframes );
void buffer_copy_and_apply_gain ( sample_t *dst, const sample_t *src, nframes_t nframes, float gain ); void buffer_copy_and_apply_gain ( sample_t *dst, const sample_t *src, nframes_t nframes, float gain );




+ 5
- 4
timeline/src/Engine/Playback_DS.C View File

@@ -119,9 +119,9 @@ Playback_DS::disk_thread ( void )
DMESSAGE( "playback thread running" ); DMESSAGE( "playback thread running" );


/* buffer to hold the interleaved data returned by the track reader */ /* buffer to hold the interleaved data returned by the track reader */
sample_t *buf = new sample_t[ _nframes * channels() * _disk_io_blocks ];
sample_t *buf = buffer_alloc( _nframes * channels() * _disk_io_blocks );
#ifndef AVOID_UNNECESSARY_COPYING #ifndef AVOID_UNNECESSARY_COPYING
sample_t *cbuf = new sample_t[ _nframes * _disk_io_blocks ];
sample_t *cbuf = buffer_alloc( _nframes * _disk_io_blocks );
#endif #endif


int blocks_ready = 0; int blocks_ready = 0;
@@ -168,6 +168,7 @@ Playback_DS::disk_thread ( void )
{ {


#ifdef AVOID_UNNECESSARY_COPYING #ifdef AVOID_UNNECESSARY_COPYING

/* deinterleave direcectly into the ringbuffer to avoid /* deinterleave direcectly into the ringbuffer to avoid
* unnecessary copying */ * unnecessary copying */


@@ -217,9 +218,9 @@ done:


DMESSAGE( "playback thread terminating" ); DMESSAGE( "playback thread terminating" );


delete[] buf;
free(buf);
#ifndef AVOID_UNNECESSARY_COPYING #ifndef AVOID_UNNECESSARY_COPYING
delete[] cbuf;
free(cbuf);
#endif #endif


_terminate = false; _terminate = false;


+ 6
- 8
timeline/src/Engine/Record_DS.C View File

@@ -77,9 +77,9 @@ Record_DS::disk_thread ( void )
const nframes_t nframes = _nframes * _disk_io_blocks; const nframes_t nframes = _nframes * _disk_io_blocks;


/* buffer to hold the interleaved data returned by the track reader */ /* buffer to hold the interleaved data returned by the track reader */
sample_t *buf = new sample_t[ nframes * channels() ];
sample_t *buf = buffer_alloc( nframes * channels() );
#ifndef AVOID_UNNECESSARY_COPYING #ifndef AVOID_UNNECESSARY_COPYING
sample_t *cbuf = new sample_t[ nframes ];
sample_t *cbuf = buffer_alloc( nframes );
#endif #endif


const size_t block_size = nframes * sizeof( sample_t ); const size_t block_size = nframes * sizeof( sample_t );
@@ -98,7 +98,6 @@ Record_DS::disk_thread ( void )
{ {


#ifdef AVOID_UNNECESSARY_COPYING #ifdef AVOID_UNNECESSARY_COPYING

/* interleave direcectly from the ringbuffer to avoid /* interleave direcectly from the ringbuffer to avoid
* unnecessary copying */ * unnecessary copying */


@@ -122,7 +121,6 @@ Record_DS::disk_thread ( void )
const nframes_t f = rbd[ 0 ].len / sizeof( sample_t ); const nframes_t f = rbd[ 0 ].len / sizeof( sample_t );


/* do the first half */ /* do the first half */
buffer_deinterleave_one_channel( (sample_t*)rbd[ 0 ].buf, buf, i, channels(), f );
buffer_interleave_one_channel( buf, (sample_t*)rbd[ 0 ].buf, i, channels(), f ); buffer_interleave_one_channel( buf, (sample_t*)rbd[ 0 ].buf, i, channels(), f );


assert( rbd[ 1 ].len >= ( nframes - f ) * sizeof( sample_t ) ); assert( rbd[ 1 ].len >= ( nframes - f ) * sizeof( sample_t ) );
@@ -158,7 +156,7 @@ Record_DS::disk_thread ( void )
const size_t block_size = _nframes * sizeof( sample_t ); const size_t block_size = _nframes * sizeof( sample_t );


#ifdef AVOID_UNNECESSARY_COPYING #ifdef AVOID_UNNECESSARY_COPYING
sample_t *cbuf = new sample_t[ nframes ];
sample_t *cbuf = buffer_alloc( nframes );
#endif #endif


while ( blocks_ready-- > 0 || ( ! sem_trywait( &_blocks ) && errno != EAGAIN ) ) while ( blocks_ready-- > 0 || ( ! sem_trywait( &_blocks ) && errno != EAGAIN ) )
@@ -184,14 +182,14 @@ Record_DS::disk_thread ( void )
} }


#ifdef AVOID_UNNECESSARY_COPYING #ifdef AVOID_UNNECESSARY_COPYING
delete[] cbuf;
free(cbuf);
#endif #endif


} }


delete[] buf;
free(buf);
#ifndef AVOID_UNNECESSARY_COPYING #ifndef AVOID_UNNECESSARY_COPYING
delete[] cbuf;
free(cbuf);
#endif #endif


DMESSAGE( "finalzing capture" ); DMESSAGE( "finalzing capture" );


+ 2
- 2
wscript View File

@@ -76,8 +76,8 @@ def configure(conf):
print('Using SSE optimization') print('Using SSE optimization')
optimization_flags.extend( [ optimization_flags.extend( [
"-msse2", "-msse2",
"-mfpmath=sse",
"-ftree-vectorize" ] )
"-mfpmath=sse" ] );
conf.define( 'USE_SSE', 1 ) conf.define( 'USE_SSE', 1 )


debug_flags = [ '-O0', '-g3' ] debug_flags = [ '-O0', '-g3' ]


Loading…
Cancel
Save