Originally committed as revision 12557 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
@@ -926,8 +926,8 @@ static void reallocBuffers(PPContext *c, int width, int height, int stride, int | |||||
for(i=0; i<3; i++){ | for(i=0; i<3; i++){ | ||||
//Note: The +17*1024 is just there so i do not have to worry about r/w over the end. | //Note: The +17*1024 is just there so i do not have to worry about r/w over the end. | ||||
reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024); | |||||
reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size | |||||
reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024); | |||||
reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size | |||||
} | } | ||||
reallocAlign((void **)&c->deintTemp, 8, 2*width+32); | reallocAlign((void **)&c->deintTemp, 8, 2*width+32); | ||||
@@ -969,8 +969,8 @@ void pp_free_context(void *vc){ | |||||
PPContext *c = (PPContext*)vc; | PPContext *c = (PPContext*)vc; | ||||
int i; | int i; | ||||
for(i=0; i<3; i++) av_free(c->tempBlured[i]); | |||||
for(i=0; i<3; i++) av_free(c->tempBluredPast[i]); | |||||
for(i=0; i<3; i++) av_free(c->tempBlurred[i]); | |||||
for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]); | |||||
av_free(c->tempBlocks); | av_free(c->tempBlocks); | ||||
av_free(c->yHistogram); | av_free(c->yHistogram); | ||||
@@ -800,7 +800,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||||
#define do_a_deblock_altivec(a...) do_a_deblock_C(a) | #define do_a_deblock_altivec(a...) do_a_deblock_C(a) | ||||
static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | ||||
uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) | |||||
uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise) | |||||
{ | { | ||||
const vector signed int zero = vec_splat_s32(0); | const vector signed int zero = vec_splat_s32(0); | ||||
const vector signed short vsint16_1 = vec_splat_s16(1); | const vector signed short vsint16_1 = vec_splat_s16(1); | ||||
@@ -808,9 +808,9 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||||
vector signed int v_sysdp = zero; | vector signed int v_sysdp = zero; | ||||
int d, sysd, i; | int d, sysd, i; | ||||
tempBluredPast[127]= maxNoise[0]; | |||||
tempBluredPast[128]= maxNoise[1]; | |||||
tempBluredPast[129]= maxNoise[2]; | |||||
tempBlurredPast[127]= maxNoise[0]; | |||||
tempBlurredPast[128]= maxNoise[1]; | |||||
tempBlurredPast[129]= maxNoise[2]; | |||||
#define LOAD_LINE(src, i) \ | #define LOAD_LINE(src, i) \ | ||||
register int j##src##i = i * stride; \ | register int j##src##i = i * stride; \ | ||||
@@ -832,18 +832,18 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||||
LOAD_LINE(src, 6); | LOAD_LINE(src, 6); | ||||
LOAD_LINE(src, 7); | LOAD_LINE(src, 7); | ||||
LOAD_LINE(tempBlured, 0); | |||||
LOAD_LINE(tempBlured, 1); | |||||
LOAD_LINE(tempBlured, 2); | |||||
LOAD_LINE(tempBlured, 3); | |||||
LOAD_LINE(tempBlured, 4); | |||||
LOAD_LINE(tempBlured, 5); | |||||
LOAD_LINE(tempBlured, 6); | |||||
LOAD_LINE(tempBlured, 7); | |||||
LOAD_LINE(tempBlurred, 0); | |||||
LOAD_LINE(tempBlurred, 1); | |||||
LOAD_LINE(tempBlurred, 2); | |||||
LOAD_LINE(tempBlurred, 3); | |||||
LOAD_LINE(tempBlurred, 4); | |||||
LOAD_LINE(tempBlurred, 5); | |||||
LOAD_LINE(tempBlurred, 6); | |||||
LOAD_LINE(tempBlurred, 7); | |||||
#undef LOAD_LINE | #undef LOAD_LINE | ||||
#define ACCUMULATE_DIFFS(i) \ | #define ACCUMULATE_DIFFS(i) \ | ||||
vector signed short v_d##i = vec_sub(v_tempBluredAss##i, \ | |||||
vector signed short v_d##i = vec_sub(v_tempBlurredAss##i, \ | |||||
v_srcAss##i); \ | v_srcAss##i); \ | ||||
v_dp = vec_msums(v_d##i, v_d##i, v_dp); \ | v_dp = vec_msums(v_d##i, v_d##i, v_dp); \ | ||||
v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp) | v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp) | ||||
@@ -869,16 +869,16 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||||
i = d; | i = d; | ||||
d = (4*d | d = (4*d | ||||
+(*(tempBluredPast-256)) | |||||
+(*(tempBluredPast-1))+ (*(tempBluredPast+1)) | |||||
+(*(tempBluredPast+256)) | |||||
+(*(tempBlurredPast-256)) | |||||
+(*(tempBlurredPast-1))+ (*(tempBlurredPast+1)) | |||||
+(*(tempBlurredPast+256)) | |||||
+4)>>3; | +4)>>3; | ||||
*tempBluredPast=i; | |||||
*tempBlurredPast=i; | |||||
if (d > maxNoise[1]) { | if (d > maxNoise[1]) { | ||||
if (d < maxNoise[2]) { | if (d < maxNoise[2]) { | ||||
#define OP(i) v_tempBluredAss##i = vec_avg(v_tempBluredAss##i, v_srcAss##i); | |||||
#define OP(i) v_tempBlurredAss##i = vec_avg(v_tempBlurredAss##i, v_srcAss##i); | |||||
OP(0); | OP(0); | ||||
OP(1); | OP(1); | ||||
@@ -890,7 +890,7 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||||
OP(7); | OP(7); | ||||
#undef OP | #undef OP | ||||
} else { | } else { | ||||
#define OP(i) v_tempBluredAss##i = v_srcAss##i; | |||||
#define OP(i) v_tempBlurredAss##i = v_srcAss##i; | |||||
OP(0); | OP(0); | ||||
OP(1); | OP(1); | ||||
@@ -910,11 +910,11 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||||
#define OP(i) \ | #define OP(i) \ | ||||
const vector signed short v_temp##i = \ | const vector signed short v_temp##i = \ | ||||
vec_mladd(v_tempBluredAss##i, \ | |||||
vec_mladd(v_tempBlurredAss##i, \ | |||||
vsint16_7, v_srcAss##i); \ | vsint16_7, v_srcAss##i); \ | ||||
const vector signed short v_temp2##i = \ | const vector signed short v_temp2##i = \ | ||||
vec_add(v_temp##i, vsint16_4); \ | vec_add(v_temp##i, vsint16_4); \ | ||||
v_tempBluredAss##i = vec_sr(v_temp2##i, vuint16_3) | |||||
v_tempBlurredAss##i = vec_sr(v_temp2##i, vuint16_3) | |||||
OP(0); | OP(0); | ||||
OP(1); | OP(1); | ||||
@@ -931,11 +931,11 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||||
#define OP(i) \ | #define OP(i) \ | ||||
const vector signed short v_temp##i = \ | const vector signed short v_temp##i = \ | ||||
vec_mladd(v_tempBluredAss##i, \ | |||||
vec_mladd(v_tempBlurredAss##i, \ | |||||
vsint16_3, v_srcAss##i); \ | vsint16_3, v_srcAss##i); \ | ||||
const vector signed short v_temp2##i = \ | const vector signed short v_temp2##i = \ | ||||
vec_add(v_temp##i, vsint16_2); \ | vec_add(v_temp##i, vsint16_2); \ | ||||
v_tempBluredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2) | |||||
v_tempBlurredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2) | |||||
OP(0); | OP(0); | ||||
OP(1); | OP(1); | ||||
@@ -957,7 +957,7 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||||
const vector unsigned char perms##src##i = \ | const vector unsigned char perms##src##i = \ | ||||
vec_lvsr(i * stride, src); \ | vec_lvsr(i * stride, src); \ | ||||
const vector unsigned char vf##src##i = \ | const vector unsigned char vf##src##i = \ | ||||
vec_packsu(v_tempBluredAss##i, (vector signed short)zero); \ | |||||
vec_packsu(v_tempBlurredAss##i, (vector signed short)zero); \ | |||||
const vector unsigned char vg##src##i = \ | const vector unsigned char vg##src##i = \ | ||||
vec_perm(vf##src##i, v_##src##A##i, permHH); \ | vec_perm(vf##src##i, v_##src##A##i, permHH); \ | ||||
const vector unsigned char mask##src##i = \ | const vector unsigned char mask##src##i = \ | ||||
@@ -979,14 +979,14 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||||
PACK_AND_STORE(src, 5); | PACK_AND_STORE(src, 5); | ||||
PACK_AND_STORE(src, 6); | PACK_AND_STORE(src, 6); | ||||
PACK_AND_STORE(src, 7); | PACK_AND_STORE(src, 7); | ||||
PACK_AND_STORE(tempBlured, 0); | |||||
PACK_AND_STORE(tempBlured, 1); | |||||
PACK_AND_STORE(tempBlured, 2); | |||||
PACK_AND_STORE(tempBlured, 3); | |||||
PACK_AND_STORE(tempBlured, 4); | |||||
PACK_AND_STORE(tempBlured, 5); | |||||
PACK_AND_STORE(tempBlured, 6); | |||||
PACK_AND_STORE(tempBlured, 7); | |||||
PACK_AND_STORE(tempBlurred, 0); | |||||
PACK_AND_STORE(tempBlurred, 1); | |||||
PACK_AND_STORE(tempBlurred, 2); | |||||
PACK_AND_STORE(tempBlurred, 3); | |||||
PACK_AND_STORE(tempBlurred, 4); | |||||
PACK_AND_STORE(tempBlurred, 5); | |||||
PACK_AND_STORE(tempBlurred, 6); | |||||
PACK_AND_STORE(tempBlurred, 7); | |||||
#undef PACK_AND_STORE | #undef PACK_AND_STORE | ||||
} | } | ||||
@@ -130,8 +130,8 @@ typedef struct PPContext{ | |||||
DECLARE_ALIGNED(8, uint64_t, packedYScale); | DECLARE_ALIGNED(8, uint64_t, packedYScale); | ||||
/** Temporal noise reducing buffers */ | /** Temporal noise reducing buffers */ | ||||
uint8_t *tempBlured[3]; | |||||
int32_t *tempBluredPast[3]; | |||||
uint8_t *tempBlurred[3]; | |||||
int32_t *tempBlurredPast[3]; | |||||
/** Temporary buffers for handling the last row(s) */ | /** Temporary buffers for handling the last row(s) */ | ||||
uint8_t *tempDst; | uint8_t *tempDst; | ||||
@@ -2182,12 +2182,12 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src) | |||||
#ifndef HAVE_ALTIVEC | #ifndef HAVE_ALTIVEC | ||||
static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | ||||
uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) | |||||
uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise) | |||||
{ | { | ||||
// to save a register (FIXME do this outside of the loops) | // to save a register (FIXME do this outside of the loops) | ||||
tempBluredPast[127]= maxNoise[0]; | |||||
tempBluredPast[128]= maxNoise[1]; | |||||
tempBluredPast[129]= maxNoise[2]; | |||||
tempBlurredPast[127]= maxNoise[0]; | |||||
tempBlurredPast[128]= maxNoise[1]; | |||||
tempBlurredPast[129]= maxNoise[2]; | |||||
#define FAST_L2_DIFF | #define FAST_L2_DIFF | ||||
//#define L1_DIFF //u should change the thresholds too if u try that one | //#define L1_DIFF //u should change the thresholds too if u try that one | ||||
@@ -2476,7 +2476,7 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc)) | |||||
"4: \n\t" | "4: \n\t" | ||||
:: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast) | |||||
:: "r" (src), "r" (tempBlurred), "r"((long)stride), "m" (tempBlurredPast) | |||||
: "%"REG_a, "%"REG_d, "%"REG_c, "memory" | : "%"REG_a, "%"REG_d, "%"REG_c, "memory" | ||||
); | ); | ||||
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | ||||
@@ -2489,7 +2489,7 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc)) | |||||
for(y=0; y<8; y++){ | for(y=0; y<8; y++){ | ||||
int x; | int x; | ||||
for(x=0; x<8; x++){ | for(x=0; x<8; x++){ | ||||
int ref= tempBlured[ x + y*stride ]; | |||||
int ref= tempBlurred[ x + y*stride ]; | |||||
int cur= src[ x + y*stride ]; | int cur= src[ x + y*stride ]; | ||||
int d1=ref - cur; | int d1=ref - cur; | ||||
// if(x==0 || x==7) d1+= d1>>1; | // if(x==0 || x==7) d1+= d1>>1; | ||||
@@ -2502,12 +2502,12 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc)) | |||||
i=d; | i=d; | ||||
d= ( | d= ( | ||||
4*d | 4*d | ||||
+(*(tempBluredPast-256)) | |||||
+(*(tempBluredPast-1))+ (*(tempBluredPast+1)) | |||||
+(*(tempBluredPast+256)) | |||||
+(*(tempBlurredPast-256)) | |||||
+(*(tempBlurredPast-1))+ (*(tempBlurredPast+1)) | |||||
+(*(tempBlurredPast+256)) | |||||
+4)>>3; | +4)>>3; | ||||
*tempBluredPast=i; | |||||
// ((*tempBluredPast)*3 + d + 2)>>2; | |||||
*tempBlurredPast=i; | |||||
// ((*tempBlurredPast)*3 + d + 2)>>2; | |||||
/* | /* | ||||
Switch between | Switch between | ||||
@@ -2521,9 +2521,9 @@ Switch between | |||||
for(y=0; y<8; y++){ | for(y=0; y<8; y++){ | ||||
int x; | int x; | ||||
for(x=0; x<8; x++){ | for(x=0; x<8; x++){ | ||||
int ref= tempBlured[ x + y*stride ]; | |||||
int ref= tempBlurred[ x + y*stride ]; | |||||
int cur= src[ x + y*stride ]; | int cur= src[ x + y*stride ]; | ||||
tempBlured[ x + y*stride ]= | |||||
tempBlurred[ x + y*stride ]= | |||||
src[ x + y*stride ]= | src[ x + y*stride ]= | ||||
(ref + cur + 1)>>1; | (ref + cur + 1)>>1; | ||||
} | } | ||||
@@ -2532,7 +2532,7 @@ Switch between | |||||
for(y=0; y<8; y++){ | for(y=0; y<8; y++){ | ||||
int x; | int x; | ||||
for(x=0; x<8; x++){ | for(x=0; x<8; x++){ | ||||
tempBlured[ x + y*stride ]= src[ x + y*stride ]; | |||||
tempBlurred[ x + y*stride ]= src[ x + y*stride ]; | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -2541,9 +2541,9 @@ Switch between | |||||
for(y=0; y<8; y++){ | for(y=0; y<8; y++){ | ||||
int x; | int x; | ||||
for(x=0; x<8; x++){ | for(x=0; x<8; x++){ | ||||
int ref= tempBlured[ x + y*stride ]; | |||||
int ref= tempBlurred[ x + y*stride ]; | |||||
int cur= src[ x + y*stride ]; | int cur= src[ x + y*stride ]; | ||||
tempBlured[ x + y*stride ]= | |||||
tempBlurred[ x + y*stride ]= | |||||
src[ x + y*stride ]= | src[ x + y*stride ]= | ||||
(ref*7 + cur + 4)>>3; | (ref*7 + cur + 4)>>3; | ||||
} | } | ||||
@@ -2552,9 +2552,9 @@ Switch between | |||||
for(y=0; y<8; y++){ | for(y=0; y<8; y++){ | ||||
int x; | int x; | ||||
for(x=0; x<8; x++){ | for(x=0; x<8; x++){ | ||||
int ref= tempBlured[ x + y*stride ]; | |||||
int ref= tempBlurred[ x + y*stride ]; | |||||
int cur= src[ x + y*stride ]; | int cur= src[ x + y*stride ]; | ||||
tempBlured[ x + y*stride ]= | |||||
tempBlurred[ x + y*stride ]= | |||||
src[ x + y*stride ]= | src[ x + y*stride ]= | ||||
(ref*3 + cur + 2)>>2; | (ref*3 + cur + 2)>>2; | ||||
} | } | ||||
@@ -3650,8 +3650,8 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | |||||
if(mode & TEMP_NOISE_FILTER) | if(mode & TEMP_NOISE_FILTER) | ||||
{ | { | ||||
RENAME(tempNoiseReducer)(dstBlock-8, stride, | RENAME(tempNoiseReducer)(dstBlock-8, stride, | ||||
c.tempBlured[isColor] + y*dstStride + x, | |||||
c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3), | |||||
c.tempBlurred[isColor] + y*dstStride + x, | |||||
c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3), | |||||
c.ppMode.maxTmpNoise); | c.ppMode.maxTmpNoise); | ||||
} | } | ||||
} | } | ||||
@@ -3672,8 +3672,8 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | |||||
if((mode & TEMP_NOISE_FILTER)){ | if((mode & TEMP_NOISE_FILTER)){ | ||||
RENAME(tempNoiseReducer)(dstBlock-8, dstStride, | RENAME(tempNoiseReducer)(dstBlock-8, dstStride, | ||||
c.tempBlured[isColor] + y*dstStride + x, | |||||
c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3), | |||||
c.tempBlurred[isColor] + y*dstStride + x, | |||||
c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3), | |||||
c.ppMode.maxTmpNoise); | c.ppMode.maxTmpNoise); | ||||
} | } | ||||