Browse Source

fixed a bug in the tmp buffer

fixed the color range for yuv
fixed the width %8!=0 bug (another 1% speed loss)

Originally committed as revision 2286 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
tags/v0.5
Michael Niedermayer 24 years ago
parent
commit
658a85f26f
3 changed files with 103 additions and 8 deletions
  1. +50
    -4
      postproc/postprocess.c
  2. +3
    -0
      postproc/postprocess.h
  3. +50
    -4
      postproc/postprocess_template.c

+ 50
- 4
postproc/postprocess.c View File

@@ -122,7 +122,7 @@ static uint64_t temp3=0;
static uint64_t temp4=0;
static uint64_t temp5=0;
static uint64_t pQPb=0;
static uint8_t tempBlock[16*16];
static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data

int hFlatnessThreshold= 56 - 16;
int vFlatnessThreshold= 56 - 16;
@@ -132,7 +132,7 @@ double maxClippedThreshold= 0.01;

int maxAllowedY=255;
//FIXME can never make a movie´s black brighter (anyone needs that?)
int minAllowedY=0;
int minAllowedY=16;

#ifdef TIMING
static inline long long rdtsc()
@@ -2398,6 +2398,13 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
static uint8_t *tempDst= NULL;
static uint8_t *tempSrc= NULL;

/* Temporary buffers for handling the last block */
static uint8_t *tempDstBlock= NULL;
static uint8_t *tempSrcBlock= NULL;

uint8_t *dstBlockPtrBackup;
uint8_t *srcBlockPtrBackup;

#ifdef TIMING
long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0;
sumTime= rdtsc();
@@ -2407,6 +2414,8 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
{
tempDst= (uint8_t*)memalign(8, 1024*24);
tempSrc= (uint8_t*)memalign(8, 1024*24);
tempDstBlock= (uint8_t*)memalign(8, 1024*24);
tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
}

if(!yHistogram)
@@ -2414,6 +2423,12 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
int i;
yHistogram= (uint64_t*)malloc(8*256);
for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256;

if(mode & FULL_Y_RANGE)
{
maxAllowedY=255;
minAllowedY=0;
}
}

if(!isColor)
@@ -2505,6 +2520,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
srcBlock= tempSrc;
}

// From this point on it is guranteed that we can read and write 16 lines downward
// finish 1 block before the next otherwise we´ll might have a problem
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
for(x=0; x<width; x+=BLOCK_SIZE)
@@ -2545,6 +2561,23 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri

if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++;

//can we mess with a 8x16 block, if not use a temp buffer, yes again
if(x+7 >= width)
{
int i;
dstBlockPtrBackup= dstBlock;
srcBlockPtrBackup= srcBlock;

for(i=0;i<BLOCK_SIZE*2; i++)
{
memcpy(tempSrcBlock+i*srcStride, srcBlock+i*srcStride, width-x);
memcpy(tempDstBlock+i*dstStride, dstBlock+i*dstStride, width-x);
}

dstBlock= tempDstBlock;
srcBlock= tempSrcBlock;
}

blockCopy(dstBlock + dstStride*5, dstStride,
srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX);

@@ -2593,7 +2626,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
}

/* check if we have a previous block to deblock it with dstBlock */
if(x - 8 >= 0 && x<width)
if(x - 8 >= 0)
{
#ifdef MORE_TIMING
T0= rdtsc();
@@ -2624,12 +2657,25 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
dering(dstBlock - stride*9 + width-9, stride, QP);
//FIXME dering filter will not be applied to last block (bottom right)

/* did we use a tmp-block buffer */
if(x+7 >= width)
{
int i;
dstBlock= dstBlockPtrBackup;
srcBlock= srcBlockPtrBackup;

for(i=0;i<BLOCK_SIZE*2; i++)
{
memcpy(dstBlock+i*dstStride, tempDstBlock+i*dstStride, width-x);
}
}

dstBlock+=8;
srcBlock+=8;
}

/* did we use a tmp buffer */
if(y+15 > height)
if(y+15 >= height)
{
uint8_t *dstBlock= &(dst[y*dstStride]);
memcpy(dstBlock, tempDst, dstStride*(height-y) );


+ 3
- 0
postproc/postprocess.h View File

@@ -46,6 +46,9 @@
#define H_RK1_FILTER 0x1000 // 4096 (not implemented yet)
#define H_X1_FILTER 0x2000 // 8192

// select between full y range (255-0) or standart one (
#define FULL_Y_RANGE 0x8000 // 32768

//Deinterlacing Filters
#define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536
#define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072


+ 50
- 4
postproc/postprocess_template.c View File

@@ -122,7 +122,7 @@ static uint64_t temp3=0;
static uint64_t temp4=0;
static uint64_t temp5=0;
static uint64_t pQPb=0;
static uint8_t tempBlock[16*16];
static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data

int hFlatnessThreshold= 56 - 16;
int vFlatnessThreshold= 56 - 16;
@@ -132,7 +132,7 @@ double maxClippedThreshold= 0.01;

int maxAllowedY=255;
//FIXME can never make a movie´s black brighter (anyone needs that?)
int minAllowedY=0;
int minAllowedY=16;

#ifdef TIMING
static inline long long rdtsc()
@@ -2398,6 +2398,13 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
static uint8_t *tempDst= NULL;
static uint8_t *tempSrc= NULL;

/* Temporary buffers for handling the last block */
static uint8_t *tempDstBlock= NULL;
static uint8_t *tempSrcBlock= NULL;

uint8_t *dstBlockPtrBackup;
uint8_t *srcBlockPtrBackup;

#ifdef TIMING
long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0;
sumTime= rdtsc();
@@ -2407,6 +2414,8 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
{
tempDst= (uint8_t*)memalign(8, 1024*24);
tempSrc= (uint8_t*)memalign(8, 1024*24);
tempDstBlock= (uint8_t*)memalign(8, 1024*24);
tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
}

if(!yHistogram)
@@ -2414,6 +2423,12 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
int i;
yHistogram= (uint64_t*)malloc(8*256);
for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256;

if(mode & FULL_Y_RANGE)
{
maxAllowedY=255;
minAllowedY=0;
}
}

if(!isColor)
@@ -2505,6 +2520,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
srcBlock= tempSrc;
}

// From this point on it is guranteed that we can read and write 16 lines downward
// finish 1 block before the next otherwise we´ll might have a problem
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
for(x=0; x<width; x+=BLOCK_SIZE)
@@ -2545,6 +2561,23 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri

if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++;

//can we mess with a 8x16 block, if not use a temp buffer, yes again
if(x+7 >= width)
{
int i;
dstBlockPtrBackup= dstBlock;
srcBlockPtrBackup= srcBlock;

for(i=0;i<BLOCK_SIZE*2; i++)
{
memcpy(tempSrcBlock+i*srcStride, srcBlock+i*srcStride, width-x);
memcpy(tempDstBlock+i*dstStride, dstBlock+i*dstStride, width-x);
}

dstBlock= tempDstBlock;
srcBlock= tempSrcBlock;
}

blockCopy(dstBlock + dstStride*5, dstStride,
srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX);

@@ -2593,7 +2626,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
}

/* check if we have a previous block to deblock it with dstBlock */
if(x - 8 >= 0 && x<width)
if(x - 8 >= 0)
{
#ifdef MORE_TIMING
T0= rdtsc();
@@ -2624,12 +2657,25 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
dering(dstBlock - stride*9 + width-9, stride, QP);
//FIXME dering filter will not be applied to last block (bottom right)

/* did we use a tmp-block buffer */
if(x+7 >= width)
{
int i;
dstBlock= dstBlockPtrBackup;
srcBlock= srcBlockPtrBackup;

for(i=0;i<BLOCK_SIZE*2; i++)
{
memcpy(dstBlock+i*dstStride, tempDstBlock+i*dstStride, width-x);
}
}

dstBlock+=8;
srcBlock+=8;
}

/* did we use a tmp buffer */
if(y+15 > height)
if(y+15 >= height)
{
uint8_t *dstBlock= &(dst[y*dstStride]);
memcpy(dstBlock, tempDst, dstStride*(height-y) );


Loading…
Cancel
Save