Originally committed as revision 2743 to svn://svn.mplayerhq.hu/mplayer/trunk/postproctags/v0.5
| @@ -115,45 +115,50 @@ Notes: | |||
| #define GET_MODE_BUFFER_SIZE 500 | |||
| #define OPTIONS_ARRAY_SIZE 10 | |||
| #ifdef HAVE_MMX | |||
| static uint64_t __attribute__((aligned(8))) packedYOffset= 0x0000000000000000LL; | |||
| static uint64_t __attribute__((aligned(8))) packedYScale= 0x0100010001000100LL; | |||
| static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL; | |||
| static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL; | |||
| static uint64_t __attribute__((aligned(8))) w1400= 0x1400140014001400LL; | |||
| static uint64_t __attribute__((aligned(8))) bm00000001= 0x00000000000000FFLL; | |||
| static uint64_t __attribute__((aligned(8))) bm00010000= 0x000000FF00000000LL; | |||
| static uint64_t __attribute__((aligned(8))) bm00001000= 0x00000000FF000000LL; | |||
| static uint64_t __attribute__((aligned(8))) bm10000000= 0xFF00000000000000LL; | |||
| static uint64_t __attribute__((aligned(8))) bm10000001= 0xFF000000000000FFLL; | |||
| static uint64_t __attribute__((aligned(8))) bm11000011= 0xFFFF00000000FFFFLL; | |||
| static uint64_t __attribute__((aligned(8))) bm00000011= 0x000000000000FFFFLL; | |||
| static uint64_t __attribute__((aligned(8))) bm11111110= 0xFFFFFFFFFFFFFF00LL; | |||
| static uint64_t __attribute__((aligned(8))) bm11000000= 0xFFFF000000000000LL; | |||
| static uint64_t __attribute__((aligned(8))) bm00011000= 0x000000FFFF000000LL; | |||
| static uint64_t __attribute__((aligned(8))) bm00110011= 0x0000FFFF0000FFFFLL; | |||
| static uint64_t __attribute__((aligned(8))) bm11001100= 0xFFFF0000FFFF0000LL; | |||
| static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL; | |||
| static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL; | |||
| static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL; | |||
| static uint64_t __attribute__((aligned(8))) b0F= 0x0F0F0F0F0F0F0F0FLL; | |||
| static uint64_t __attribute__((aligned(8))) b04= 0x0404040404040404LL; | |||
| static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL; | |||
| static uint64_t __attribute__((aligned(8))) bFF= 0xFFFFFFFFFFFFFFFFLL; | |||
| static uint64_t __attribute__((aligned(8))) b20= 0x2020202020202020LL; | |||
| static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL; | |||
| static uint64_t __attribute__((aligned(8))) b7E= 0x7E7E7E7E7E7E7E7ELL; | |||
| static uint64_t __attribute__((aligned(8))) b7C= 0x7C7C7C7C7C7C7C7CLL; | |||
| static uint64_t __attribute__((aligned(8))) b3F= 0x3F3F3F3F3F3F3F3FLL; | |||
| static uint64_t __attribute__((aligned(8))) temp0=0; | |||
| static uint64_t __attribute__((aligned(8))) temp1=0; | |||
| static uint64_t __attribute__((aligned(8))) temp2=0; | |||
| static uint64_t __attribute__((aligned(8))) temp3=0; | |||
| static uint64_t __attribute__((aligned(8))) temp4=0; | |||
| static uint64_t __attribute__((aligned(8))) temp5=0; | |||
| static uint64_t __attribute__((aligned(8))) pQPb=0; | |||
| static uint64_t __attribute__((aligned(8))) pQPb2=0; | |||
| static uint8_t __attribute__((aligned(8))) tempBlocks[8*16*2]; //used for the horizontal code | |||
| #else | |||
| static uint64_t packedYOffset= 0x0000000000000000LL; | |||
| static uint64_t packedYScale= 0x0100010001000100LL; | |||
| static uint64_t w05= 0x0005000500050005LL; | |||
| static uint64_t w20= 0x0020002000200020LL; | |||
| static uint64_t w1400= 0x1400140014001400LL; | |||
| static uint64_t bm00000001= 0x00000000000000FFLL; | |||
| static uint64_t bm00010000= 0x000000FF00000000LL; | |||
| static uint64_t bm00001000= 0x00000000FF000000LL; | |||
| static uint64_t bm10000000= 0xFF00000000000000LL; | |||
| static uint64_t bm10000001= 0xFF000000000000FFLL; | |||
| static uint64_t bm11000011= 0xFFFF00000000FFFFLL; | |||
| static uint64_t bm00000011= 0x000000000000FFFFLL; | |||
| static uint64_t bm11111110= 0xFFFFFFFFFFFFFF00LL; | |||
| static uint64_t bm11000000= 0xFFFF000000000000LL; | |||
| static uint64_t bm00011000= 0x000000FFFF000000LL; | |||
| static uint64_t bm00110011= 0x0000FFFF0000FFFFLL; | |||
| static uint64_t bm11001100= 0xFFFF0000FFFF0000LL; | |||
| static uint64_t b00= 0x0000000000000000LL; | |||
| static uint64_t b01= 0x0101010101010101LL; | |||
| static uint64_t b02= 0x0202020202020202LL; | |||
| static uint64_t b0F= 0x0F0F0F0F0F0F0F0FLL; | |||
| static uint64_t b04= 0x0404040404040404LL; | |||
| static uint64_t b08= 0x0808080808080808LL; | |||
| static uint64_t bFF= 0xFFFFFFFFFFFFFFFFLL; | |||
| static uint64_t b20= 0x2020202020202020LL; | |||
| static uint64_t b80= 0x8080808080808080LL; | |||
| static uint64_t b7E= 0x7E7E7E7E7E7E7E7ELL; | |||
| static uint64_t b7C= 0x7C7C7C7C7C7C7C7CLL; | |||
| static uint64_t b3F= 0x3F3F3F3F3F3F3F3FLL; | |||
| static uint64_t temp0=0; | |||
| static uint64_t temp1=0; | |||
| static uint64_t temp2=0; | |||
| static uint64_t temp3=0; | |||
| static uint64_t temp4=0; | |||
| static uint64_t temp5=0; | |||
| static uint64_t pQPb=0; | |||
| static uint64_t pQPb2=0; | |||
| static uint8_t tempBlocks[8*16*2]; //used for the horizontal code | |||
| #endif | |||
| int hFlatnessThreshold= 56 - 16; | |||
| int vFlatnessThreshold= 56 - 16; | |||
| @@ -189,15 +194,17 @@ static char *replaceTable[]= | |||
| NULL //End Marker | |||
| }; | |||
| #ifdef HAVE_MMX | |||
| static inline void unusedVariableWarningFixer() | |||
| { | |||
| if( | |||
| packedYOffset + packedYScale + w05 + w20 + w1400 + bm00000001 + bm00010000 | |||
| + bm00001000 + bm10000000 + bm10000001 + bm11000011 + bm00000011 + bm11111110 | |||
| + bm11000000 + bm00011000 + bm00110011 + bm11001100 + b00 + b01 + b02 + b0F | |||
| + bFF + b20 + b80 + b7E + b7C + b3F + temp0 + temp1 + temp2 + temp3 + temp4 | |||
| + bFF + b20 + b04+ b08 + pQPb2 + b80 + b7E + b7C + b3F + temp0 + temp1 + temp2 + temp3 + temp4 | |||
| + temp5 + pQPb== 0) b00=0; | |||
| } | |||
| #endif | |||
| #ifdef TIMING | |||
| static inline long long rdtsc() | |||
| @@ -3108,7 +3115,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||
| if(mode & LEVEL_FIX) QPCorrecture= packedYScale &0xFFFF; | |||
| else QPCorrecture= 256; | |||
| /* line before the first one */ | |||
| /* copy & deinterlace first row of blocks */ | |||
| y=-BLOCK_SIZE; | |||
| { | |||
| //1% speedup if these are here instead of the inner loop | |||
| @@ -3247,7 +3254,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||
| if(!isColor) | |||
| { | |||
| QP= (QP* QPCorrecture)>>8; | |||
| yHistogram[ srcBlock[srcStride*4 + 4] ]++; | |||
| yHistogram[ srcBlock[srcStride*12 + 4] ]++; | |||
| } | |||
| #ifdef HAVE_MMX | |||
| asm volatile( | |||
| @@ -115,45 +115,50 @@ Notes: | |||
| #define GET_MODE_BUFFER_SIZE 500 | |||
| #define OPTIONS_ARRAY_SIZE 10 | |||
| #ifdef HAVE_MMX | |||
| static uint64_t __attribute__((aligned(8))) packedYOffset= 0x0000000000000000LL; | |||
| static uint64_t __attribute__((aligned(8))) packedYScale= 0x0100010001000100LL; | |||
| static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL; | |||
| static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL; | |||
| static uint64_t __attribute__((aligned(8))) w1400= 0x1400140014001400LL; | |||
| static uint64_t __attribute__((aligned(8))) bm00000001= 0x00000000000000FFLL; | |||
| static uint64_t __attribute__((aligned(8))) bm00010000= 0x000000FF00000000LL; | |||
| static uint64_t __attribute__((aligned(8))) bm00001000= 0x00000000FF000000LL; | |||
| static uint64_t __attribute__((aligned(8))) bm10000000= 0xFF00000000000000LL; | |||
| static uint64_t __attribute__((aligned(8))) bm10000001= 0xFF000000000000FFLL; | |||
| static uint64_t __attribute__((aligned(8))) bm11000011= 0xFFFF00000000FFFFLL; | |||
| static uint64_t __attribute__((aligned(8))) bm00000011= 0x000000000000FFFFLL; | |||
| static uint64_t __attribute__((aligned(8))) bm11111110= 0xFFFFFFFFFFFFFF00LL; | |||
| static uint64_t __attribute__((aligned(8))) bm11000000= 0xFFFF000000000000LL; | |||
| static uint64_t __attribute__((aligned(8))) bm00011000= 0x000000FFFF000000LL; | |||
| static uint64_t __attribute__((aligned(8))) bm00110011= 0x0000FFFF0000FFFFLL; | |||
| static uint64_t __attribute__((aligned(8))) bm11001100= 0xFFFF0000FFFF0000LL; | |||
| static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL; | |||
| static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL; | |||
| static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL; | |||
| static uint64_t __attribute__((aligned(8))) b0F= 0x0F0F0F0F0F0F0F0FLL; | |||
| static uint64_t __attribute__((aligned(8))) b04= 0x0404040404040404LL; | |||
| static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL; | |||
| static uint64_t __attribute__((aligned(8))) bFF= 0xFFFFFFFFFFFFFFFFLL; | |||
| static uint64_t __attribute__((aligned(8))) b20= 0x2020202020202020LL; | |||
| static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL; | |||
| static uint64_t __attribute__((aligned(8))) b7E= 0x7E7E7E7E7E7E7E7ELL; | |||
| static uint64_t __attribute__((aligned(8))) b7C= 0x7C7C7C7C7C7C7C7CLL; | |||
| static uint64_t __attribute__((aligned(8))) b3F= 0x3F3F3F3F3F3F3F3FLL; | |||
| static uint64_t __attribute__((aligned(8))) temp0=0; | |||
| static uint64_t __attribute__((aligned(8))) temp1=0; | |||
| static uint64_t __attribute__((aligned(8))) temp2=0; | |||
| static uint64_t __attribute__((aligned(8))) temp3=0; | |||
| static uint64_t __attribute__((aligned(8))) temp4=0; | |||
| static uint64_t __attribute__((aligned(8))) temp5=0; | |||
| static uint64_t __attribute__((aligned(8))) pQPb=0; | |||
| static uint64_t __attribute__((aligned(8))) pQPb2=0; | |||
| static uint8_t __attribute__((aligned(8))) tempBlocks[8*16*2]; //used for the horizontal code | |||
| #else | |||
| static uint64_t packedYOffset= 0x0000000000000000LL; | |||
| static uint64_t packedYScale= 0x0100010001000100LL; | |||
| static uint64_t w05= 0x0005000500050005LL; | |||
| static uint64_t w20= 0x0020002000200020LL; | |||
| static uint64_t w1400= 0x1400140014001400LL; | |||
| static uint64_t bm00000001= 0x00000000000000FFLL; | |||
| static uint64_t bm00010000= 0x000000FF00000000LL; | |||
| static uint64_t bm00001000= 0x00000000FF000000LL; | |||
| static uint64_t bm10000000= 0xFF00000000000000LL; | |||
| static uint64_t bm10000001= 0xFF000000000000FFLL; | |||
| static uint64_t bm11000011= 0xFFFF00000000FFFFLL; | |||
| static uint64_t bm00000011= 0x000000000000FFFFLL; | |||
| static uint64_t bm11111110= 0xFFFFFFFFFFFFFF00LL; | |||
| static uint64_t bm11000000= 0xFFFF000000000000LL; | |||
| static uint64_t bm00011000= 0x000000FFFF000000LL; | |||
| static uint64_t bm00110011= 0x0000FFFF0000FFFFLL; | |||
| static uint64_t bm11001100= 0xFFFF0000FFFF0000LL; | |||
| static uint64_t b00= 0x0000000000000000LL; | |||
| static uint64_t b01= 0x0101010101010101LL; | |||
| static uint64_t b02= 0x0202020202020202LL; | |||
| static uint64_t b0F= 0x0F0F0F0F0F0F0F0FLL; | |||
| static uint64_t b04= 0x0404040404040404LL; | |||
| static uint64_t b08= 0x0808080808080808LL; | |||
| static uint64_t bFF= 0xFFFFFFFFFFFFFFFFLL; | |||
| static uint64_t b20= 0x2020202020202020LL; | |||
| static uint64_t b80= 0x8080808080808080LL; | |||
| static uint64_t b7E= 0x7E7E7E7E7E7E7E7ELL; | |||
| static uint64_t b7C= 0x7C7C7C7C7C7C7C7CLL; | |||
| static uint64_t b3F= 0x3F3F3F3F3F3F3F3FLL; | |||
| static uint64_t temp0=0; | |||
| static uint64_t temp1=0; | |||
| static uint64_t temp2=0; | |||
| static uint64_t temp3=0; | |||
| static uint64_t temp4=0; | |||
| static uint64_t temp5=0; | |||
| static uint64_t pQPb=0; | |||
| static uint64_t pQPb2=0; | |||
| static uint8_t tempBlocks[8*16*2]; //used for the horizontal code | |||
| #endif | |||
| int hFlatnessThreshold= 56 - 16; | |||
| int vFlatnessThreshold= 56 - 16; | |||
| @@ -189,15 +194,17 @@ static char *replaceTable[]= | |||
| NULL //End Marker | |||
| }; | |||
| #ifdef HAVE_MMX | |||
| static inline void unusedVariableWarningFixer() | |||
| { | |||
| if( | |||
| packedYOffset + packedYScale + w05 + w20 + w1400 + bm00000001 + bm00010000 | |||
| + bm00001000 + bm10000000 + bm10000001 + bm11000011 + bm00000011 + bm11111110 | |||
| + bm11000000 + bm00011000 + bm00110011 + bm11001100 + b00 + b01 + b02 + b0F | |||
| + bFF + b20 + b80 + b7E + b7C + b3F + temp0 + temp1 + temp2 + temp3 + temp4 | |||
| + bFF + b20 + b04+ b08 + pQPb2 + b80 + b7E + b7C + b3F + temp0 + temp1 + temp2 + temp3 + temp4 | |||
| + temp5 + pQPb== 0) b00=0; | |||
| } | |||
| #endif | |||
| #ifdef TIMING | |||
| static inline long long rdtsc() | |||
| @@ -3108,7 +3115,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||
| if(mode & LEVEL_FIX) QPCorrecture= packedYScale &0xFFFF; | |||
| else QPCorrecture= 256; | |||
| /* line before the first one */ | |||
| /* copy & deinterlace first row of blocks */ | |||
| y=-BLOCK_SIZE; | |||
| { | |||
| //1% speedup if these are here instead of the inner loop | |||
| @@ -3247,7 +3254,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||
| if(!isColor) | |||
| { | |||
| QP= (QP* QPCorrecture)>>8; | |||
| yHistogram[ srcBlock[srcStride*4 + 4] ]++; | |||
| yHistogram[ srcBlock[srcStride*12 + 4] ]++; | |||
| } | |||
| #ifdef HAVE_MMX | |||
| asm volatile( | |||