Originally committed as revision 2743 to svn://svn.mplayerhq.hu/mplayer/trunk/postproctags/v0.5
| @@ -115,45 +115,50 @@ Notes: | |||||
| #define GET_MODE_BUFFER_SIZE 500 | #define GET_MODE_BUFFER_SIZE 500 | ||||
| #define OPTIONS_ARRAY_SIZE 10 | #define OPTIONS_ARRAY_SIZE 10 | ||||
| #ifdef HAVE_MMX | |||||
| static uint64_t __attribute__((aligned(8))) packedYOffset= 0x0000000000000000LL; | |||||
| static uint64_t __attribute__((aligned(8))) packedYScale= 0x0100010001000100LL; | |||||
| static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL; | |||||
| static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL; | |||||
| static uint64_t __attribute__((aligned(8))) w1400= 0x1400140014001400LL; | |||||
| static uint64_t __attribute__((aligned(8))) bm00000001= 0x00000000000000FFLL; | |||||
| static uint64_t __attribute__((aligned(8))) bm00010000= 0x000000FF00000000LL; | |||||
| static uint64_t __attribute__((aligned(8))) bm00001000= 0x00000000FF000000LL; | |||||
| static uint64_t __attribute__((aligned(8))) bm10000000= 0xFF00000000000000LL; | |||||
| static uint64_t __attribute__((aligned(8))) bm10000001= 0xFF000000000000FFLL; | |||||
| static uint64_t __attribute__((aligned(8))) bm11000011= 0xFFFF00000000FFFFLL; | |||||
| static uint64_t __attribute__((aligned(8))) bm00000011= 0x000000000000FFFFLL; | |||||
| static uint64_t __attribute__((aligned(8))) bm11111110= 0xFFFFFFFFFFFFFF00LL; | |||||
| static uint64_t __attribute__((aligned(8))) bm11000000= 0xFFFF000000000000LL; | |||||
| static uint64_t __attribute__((aligned(8))) bm00011000= 0x000000FFFF000000LL; | |||||
| static uint64_t __attribute__((aligned(8))) bm00110011= 0x0000FFFF0000FFFFLL; | |||||
| static uint64_t __attribute__((aligned(8))) bm11001100= 0xFFFF0000FFFF0000LL; | |||||
| static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL; | |||||
| static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL; | |||||
| static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL; | |||||
| static uint64_t __attribute__((aligned(8))) b0F= 0x0F0F0F0F0F0F0F0FLL; | |||||
| static uint64_t __attribute__((aligned(8))) b04= 0x0404040404040404LL; | |||||
| static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL; | |||||
| static uint64_t __attribute__((aligned(8))) bFF= 0xFFFFFFFFFFFFFFFFLL; | |||||
| static uint64_t __attribute__((aligned(8))) b20= 0x2020202020202020LL; | |||||
| static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL; | |||||
| static uint64_t __attribute__((aligned(8))) b7E= 0x7E7E7E7E7E7E7E7ELL; | |||||
| static uint64_t __attribute__((aligned(8))) b7C= 0x7C7C7C7C7C7C7C7CLL; | |||||
| static uint64_t __attribute__((aligned(8))) b3F= 0x3F3F3F3F3F3F3F3FLL; | |||||
| static uint64_t __attribute__((aligned(8))) temp0=0; | |||||
| static uint64_t __attribute__((aligned(8))) temp1=0; | |||||
| static uint64_t __attribute__((aligned(8))) temp2=0; | |||||
| static uint64_t __attribute__((aligned(8))) temp3=0; | |||||
| static uint64_t __attribute__((aligned(8))) temp4=0; | |||||
| static uint64_t __attribute__((aligned(8))) temp5=0; | |||||
| static uint64_t __attribute__((aligned(8))) pQPb=0; | |||||
| static uint64_t __attribute__((aligned(8))) pQPb2=0; | |||||
| static uint8_t __attribute__((aligned(8))) tempBlocks[8*16*2]; //used for the horizontal code | |||||
| #else | |||||
| static uint64_t packedYOffset= 0x0000000000000000LL; | static uint64_t packedYOffset= 0x0000000000000000LL; | ||||
| static uint64_t packedYScale= 0x0100010001000100LL; | static uint64_t packedYScale= 0x0100010001000100LL; | ||||
| static uint64_t w05= 0x0005000500050005LL; | |||||
| static uint64_t w20= 0x0020002000200020LL; | |||||
| static uint64_t w1400= 0x1400140014001400LL; | |||||
| static uint64_t bm00000001= 0x00000000000000FFLL; | |||||
| static uint64_t bm00010000= 0x000000FF00000000LL; | |||||
| static uint64_t bm00001000= 0x00000000FF000000LL; | |||||
| static uint64_t bm10000000= 0xFF00000000000000LL; | |||||
| static uint64_t bm10000001= 0xFF000000000000FFLL; | |||||
| static uint64_t bm11000011= 0xFFFF00000000FFFFLL; | |||||
| static uint64_t bm00000011= 0x000000000000FFFFLL; | |||||
| static uint64_t bm11111110= 0xFFFFFFFFFFFFFF00LL; | |||||
| static uint64_t bm11000000= 0xFFFF000000000000LL; | |||||
| static uint64_t bm00011000= 0x000000FFFF000000LL; | |||||
| static uint64_t bm00110011= 0x0000FFFF0000FFFFLL; | |||||
| static uint64_t bm11001100= 0xFFFF0000FFFF0000LL; | |||||
| static uint64_t b00= 0x0000000000000000LL; | |||||
| static uint64_t b01= 0x0101010101010101LL; | |||||
| static uint64_t b02= 0x0202020202020202LL; | |||||
| static uint64_t b0F= 0x0F0F0F0F0F0F0F0FLL; | |||||
| static uint64_t b04= 0x0404040404040404LL; | |||||
| static uint64_t b08= 0x0808080808080808LL; | |||||
| static uint64_t bFF= 0xFFFFFFFFFFFFFFFFLL; | |||||
| static uint64_t b20= 0x2020202020202020LL; | |||||
| static uint64_t b80= 0x8080808080808080LL; | |||||
| static uint64_t b7E= 0x7E7E7E7E7E7E7E7ELL; | |||||
| static uint64_t b7C= 0x7C7C7C7C7C7C7C7CLL; | |||||
| static uint64_t b3F= 0x3F3F3F3F3F3F3F3FLL; | |||||
| static uint64_t temp0=0; | |||||
| static uint64_t temp1=0; | |||||
| static uint64_t temp2=0; | |||||
| static uint64_t temp3=0; | |||||
| static uint64_t temp4=0; | |||||
| static uint64_t temp5=0; | |||||
| static uint64_t pQPb=0; | |||||
| static uint64_t pQPb2=0; | |||||
| static uint8_t tempBlocks[8*16*2]; //used for the horizontal code | static uint8_t tempBlocks[8*16*2]; //used for the horizontal code | ||||
| #endif | |||||
| int hFlatnessThreshold= 56 - 16; | int hFlatnessThreshold= 56 - 16; | ||||
| int vFlatnessThreshold= 56 - 16; | int vFlatnessThreshold= 56 - 16; | ||||
| @@ -189,15 +194,17 @@ static char *replaceTable[]= | |||||
| NULL //End Marker | NULL //End Marker | ||||
| }; | }; | ||||
| #ifdef HAVE_MMX | |||||
| static inline void unusedVariableWarningFixer() | static inline void unusedVariableWarningFixer() | ||||
| { | { | ||||
| if( | if( | ||||
| packedYOffset + packedYScale + w05 + w20 + w1400 + bm00000001 + bm00010000 | packedYOffset + packedYScale + w05 + w20 + w1400 + bm00000001 + bm00010000 | ||||
| + bm00001000 + bm10000000 + bm10000001 + bm11000011 + bm00000011 + bm11111110 | + bm00001000 + bm10000000 + bm10000001 + bm11000011 + bm00000011 + bm11111110 | ||||
| + bm11000000 + bm00011000 + bm00110011 + bm11001100 + b00 + b01 + b02 + b0F | + bm11000000 + bm00011000 + bm00110011 + bm11001100 + b00 + b01 + b02 + b0F | ||||
| + bFF + b20 + b80 + b7E + b7C + b3F + temp0 + temp1 + temp2 + temp3 + temp4 | |||||
| + bFF + b20 + b04+ b08 + pQPb2 + b80 + b7E + b7C + b3F + temp0 + temp1 + temp2 + temp3 + temp4 | |||||
| + temp5 + pQPb== 0) b00=0; | + temp5 + pQPb== 0) b00=0; | ||||
| } | } | ||||
| #endif | |||||
| #ifdef TIMING | #ifdef TIMING | ||||
| static inline long long rdtsc() | static inline long long rdtsc() | ||||
| @@ -3108,7 +3115,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||||
| if(mode & LEVEL_FIX) QPCorrecture= packedYScale &0xFFFF; | if(mode & LEVEL_FIX) QPCorrecture= packedYScale &0xFFFF; | ||||
| else QPCorrecture= 256; | else QPCorrecture= 256; | ||||
| /* line before the first one */ | |||||
| /* copy & deinterlace first row of blocks */ | |||||
| y=-BLOCK_SIZE; | y=-BLOCK_SIZE; | ||||
| { | { | ||||
| //1% speedup if these are here instead of the inner loop | //1% speedup if these are here instead of the inner loop | ||||
| @@ -3247,7 +3254,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||||
| if(!isColor) | if(!isColor) | ||||
| { | { | ||||
| QP= (QP* QPCorrecture)>>8; | QP= (QP* QPCorrecture)>>8; | ||||
| yHistogram[ srcBlock[srcStride*4 + 4] ]++; | |||||
| yHistogram[ srcBlock[srcStride*12 + 4] ]++; | |||||
| } | } | ||||
| #ifdef HAVE_MMX | #ifdef HAVE_MMX | ||||
| asm volatile( | asm volatile( | ||||
| @@ -115,45 +115,50 @@ Notes: | |||||
| #define GET_MODE_BUFFER_SIZE 500 | #define GET_MODE_BUFFER_SIZE 500 | ||||
| #define OPTIONS_ARRAY_SIZE 10 | #define OPTIONS_ARRAY_SIZE 10 | ||||
| #ifdef HAVE_MMX | |||||
| static uint64_t __attribute__((aligned(8))) packedYOffset= 0x0000000000000000LL; | |||||
| static uint64_t __attribute__((aligned(8))) packedYScale= 0x0100010001000100LL; | |||||
| static uint64_t __attribute__((aligned(8))) w05= 0x0005000500050005LL; | |||||
| static uint64_t __attribute__((aligned(8))) w20= 0x0020002000200020LL; | |||||
| static uint64_t __attribute__((aligned(8))) w1400= 0x1400140014001400LL; | |||||
| static uint64_t __attribute__((aligned(8))) bm00000001= 0x00000000000000FFLL; | |||||
| static uint64_t __attribute__((aligned(8))) bm00010000= 0x000000FF00000000LL; | |||||
| static uint64_t __attribute__((aligned(8))) bm00001000= 0x00000000FF000000LL; | |||||
| static uint64_t __attribute__((aligned(8))) bm10000000= 0xFF00000000000000LL; | |||||
| static uint64_t __attribute__((aligned(8))) bm10000001= 0xFF000000000000FFLL; | |||||
| static uint64_t __attribute__((aligned(8))) bm11000011= 0xFFFF00000000FFFFLL; | |||||
| static uint64_t __attribute__((aligned(8))) bm00000011= 0x000000000000FFFFLL; | |||||
| static uint64_t __attribute__((aligned(8))) bm11111110= 0xFFFFFFFFFFFFFF00LL; | |||||
| static uint64_t __attribute__((aligned(8))) bm11000000= 0xFFFF000000000000LL; | |||||
| static uint64_t __attribute__((aligned(8))) bm00011000= 0x000000FFFF000000LL; | |||||
| static uint64_t __attribute__((aligned(8))) bm00110011= 0x0000FFFF0000FFFFLL; | |||||
| static uint64_t __attribute__((aligned(8))) bm11001100= 0xFFFF0000FFFF0000LL; | |||||
| static uint64_t __attribute__((aligned(8))) b00= 0x0000000000000000LL; | |||||
| static uint64_t __attribute__((aligned(8))) b01= 0x0101010101010101LL; | |||||
| static uint64_t __attribute__((aligned(8))) b02= 0x0202020202020202LL; | |||||
| static uint64_t __attribute__((aligned(8))) b0F= 0x0F0F0F0F0F0F0F0FLL; | |||||
| static uint64_t __attribute__((aligned(8))) b04= 0x0404040404040404LL; | |||||
| static uint64_t __attribute__((aligned(8))) b08= 0x0808080808080808LL; | |||||
| static uint64_t __attribute__((aligned(8))) bFF= 0xFFFFFFFFFFFFFFFFLL; | |||||
| static uint64_t __attribute__((aligned(8))) b20= 0x2020202020202020LL; | |||||
| static uint64_t __attribute__((aligned(8))) b80= 0x8080808080808080LL; | |||||
| static uint64_t __attribute__((aligned(8))) b7E= 0x7E7E7E7E7E7E7E7ELL; | |||||
| static uint64_t __attribute__((aligned(8))) b7C= 0x7C7C7C7C7C7C7C7CLL; | |||||
| static uint64_t __attribute__((aligned(8))) b3F= 0x3F3F3F3F3F3F3F3FLL; | |||||
| static uint64_t __attribute__((aligned(8))) temp0=0; | |||||
| static uint64_t __attribute__((aligned(8))) temp1=0; | |||||
| static uint64_t __attribute__((aligned(8))) temp2=0; | |||||
| static uint64_t __attribute__((aligned(8))) temp3=0; | |||||
| static uint64_t __attribute__((aligned(8))) temp4=0; | |||||
| static uint64_t __attribute__((aligned(8))) temp5=0; | |||||
| static uint64_t __attribute__((aligned(8))) pQPb=0; | |||||
| static uint64_t __attribute__((aligned(8))) pQPb2=0; | |||||
| static uint8_t __attribute__((aligned(8))) tempBlocks[8*16*2]; //used for the horizontal code | |||||
| #else | |||||
| static uint64_t packedYOffset= 0x0000000000000000LL; | static uint64_t packedYOffset= 0x0000000000000000LL; | ||||
| static uint64_t packedYScale= 0x0100010001000100LL; | static uint64_t packedYScale= 0x0100010001000100LL; | ||||
| static uint64_t w05= 0x0005000500050005LL; | |||||
| static uint64_t w20= 0x0020002000200020LL; | |||||
| static uint64_t w1400= 0x1400140014001400LL; | |||||
| static uint64_t bm00000001= 0x00000000000000FFLL; | |||||
| static uint64_t bm00010000= 0x000000FF00000000LL; | |||||
| static uint64_t bm00001000= 0x00000000FF000000LL; | |||||
| static uint64_t bm10000000= 0xFF00000000000000LL; | |||||
| static uint64_t bm10000001= 0xFF000000000000FFLL; | |||||
| static uint64_t bm11000011= 0xFFFF00000000FFFFLL; | |||||
| static uint64_t bm00000011= 0x000000000000FFFFLL; | |||||
| static uint64_t bm11111110= 0xFFFFFFFFFFFFFF00LL; | |||||
| static uint64_t bm11000000= 0xFFFF000000000000LL; | |||||
| static uint64_t bm00011000= 0x000000FFFF000000LL; | |||||
| static uint64_t bm00110011= 0x0000FFFF0000FFFFLL; | |||||
| static uint64_t bm11001100= 0xFFFF0000FFFF0000LL; | |||||
| static uint64_t b00= 0x0000000000000000LL; | |||||
| static uint64_t b01= 0x0101010101010101LL; | |||||
| static uint64_t b02= 0x0202020202020202LL; | |||||
| static uint64_t b0F= 0x0F0F0F0F0F0F0F0FLL; | |||||
| static uint64_t b04= 0x0404040404040404LL; | |||||
| static uint64_t b08= 0x0808080808080808LL; | |||||
| static uint64_t bFF= 0xFFFFFFFFFFFFFFFFLL; | |||||
| static uint64_t b20= 0x2020202020202020LL; | |||||
| static uint64_t b80= 0x8080808080808080LL; | |||||
| static uint64_t b7E= 0x7E7E7E7E7E7E7E7ELL; | |||||
| static uint64_t b7C= 0x7C7C7C7C7C7C7C7CLL; | |||||
| static uint64_t b3F= 0x3F3F3F3F3F3F3F3FLL; | |||||
| static uint64_t temp0=0; | |||||
| static uint64_t temp1=0; | |||||
| static uint64_t temp2=0; | |||||
| static uint64_t temp3=0; | |||||
| static uint64_t temp4=0; | |||||
| static uint64_t temp5=0; | |||||
| static uint64_t pQPb=0; | |||||
| static uint64_t pQPb2=0; | |||||
| static uint8_t tempBlocks[8*16*2]; //used for the horizontal code | static uint8_t tempBlocks[8*16*2]; //used for the horizontal code | ||||
| #endif | |||||
| int hFlatnessThreshold= 56 - 16; | int hFlatnessThreshold= 56 - 16; | ||||
| int vFlatnessThreshold= 56 - 16; | int vFlatnessThreshold= 56 - 16; | ||||
| @@ -189,15 +194,17 @@ static char *replaceTable[]= | |||||
| NULL //End Marker | NULL //End Marker | ||||
| }; | }; | ||||
| #ifdef HAVE_MMX | |||||
| static inline void unusedVariableWarningFixer() | static inline void unusedVariableWarningFixer() | ||||
| { | { | ||||
| if( | if( | ||||
| packedYOffset + packedYScale + w05 + w20 + w1400 + bm00000001 + bm00010000 | packedYOffset + packedYScale + w05 + w20 + w1400 + bm00000001 + bm00010000 | ||||
| + bm00001000 + bm10000000 + bm10000001 + bm11000011 + bm00000011 + bm11111110 | + bm00001000 + bm10000000 + bm10000001 + bm11000011 + bm00000011 + bm11111110 | ||||
| + bm11000000 + bm00011000 + bm00110011 + bm11001100 + b00 + b01 + b02 + b0F | + bm11000000 + bm00011000 + bm00110011 + bm11001100 + b00 + b01 + b02 + b0F | ||||
| + bFF + b20 + b80 + b7E + b7C + b3F + temp0 + temp1 + temp2 + temp3 + temp4 | |||||
| + bFF + b20 + b04+ b08 + pQPb2 + b80 + b7E + b7C + b3F + temp0 + temp1 + temp2 + temp3 + temp4 | |||||
| + temp5 + pQPb== 0) b00=0; | + temp5 + pQPb== 0) b00=0; | ||||
| } | } | ||||
| #endif | |||||
| #ifdef TIMING | #ifdef TIMING | ||||
| static inline long long rdtsc() | static inline long long rdtsc() | ||||
| @@ -3108,7 +3115,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||||
| if(mode & LEVEL_FIX) QPCorrecture= packedYScale &0xFFFF; | if(mode & LEVEL_FIX) QPCorrecture= packedYScale &0xFFFF; | ||||
| else QPCorrecture= 256; | else QPCorrecture= 256; | ||||
| /* line before the first one */ | |||||
| /* copy & deinterlace first row of blocks */ | |||||
| y=-BLOCK_SIZE; | y=-BLOCK_SIZE; | ||||
| { | { | ||||
| //1% speedup if these are here instead of the inner loop | //1% speedup if these are here instead of the inner loop | ||||
| @@ -3247,7 +3254,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||||
| if(!isColor) | if(!isColor) | ||||
| { | { | ||||
| QP= (QP* QPCorrecture)>>8; | QP= (QP* QPCorrecture)>>8; | ||||
| yHistogram[ srcBlock[srcStride*4 + 4] ]++; | |||||
| yHistogram[ srcBlock[srcStride*12 + 4] ]++; | |||||
| } | } | ||||
| #ifdef HAVE_MMX | #ifdef HAVE_MMX | ||||
| asm volatile( | asm volatile( | ||||