added deinterlace filters (linear interpolate, linear blend, median) minor cleanups (removed some outcommented stuff) Originally committed as revision 2204 to svn://svn.mplayerhq.hu/mplayer/trunk/postproctags/v0.5
| @@ -17,19 +17,22 @@ | |||||
| */ | */ | ||||
| /* | /* | ||||
| C MMX MMX2 3DNow* | |||||
| C MMX MMX2 3DNow | |||||
| isVertDC Ec Ec | isVertDC Ec Ec | ||||
| isVertMinMaxOk Ec Ec | isVertMinMaxOk Ec Ec | ||||
| doVertLowPass E e e* | |||||
| doVertLowPass E e e | |||||
| doVertDefFilter Ec Ec Ec | doVertDefFilter Ec Ec Ec | ||||
| isHorizDC Ec Ec | isHorizDC Ec Ec | ||||
| isHorizMinMaxOk a | isHorizMinMaxOk a | ||||
| doHorizLowPass E a a* | |||||
| doHorizLowPass E a a | |||||
| doHorizDefFilter E ac ac | doHorizDefFilter E ac ac | ||||
| deRing | deRing | ||||
| Vertical RKAlgo1 E a a* | |||||
| Vertical X1 a E E* | |||||
| Horizontal X1 a E E* | |||||
| Vertical RKAlgo1 E a a | |||||
| Vertical X1 a E E | |||||
| Horizontal X1 a E E | |||||
| LinIpolDeinterlace a E E* | |||||
| LinBlendDeinterlace a E E* | |||||
| MedianDeinterlace a E | |||||
| * i dont have a 3dnow CPU -> its untested | * i dont have a 3dnow CPU -> its untested | ||||
| @@ -55,6 +58,7 @@ make the mainloop more flexible (variable number of blocks at once | |||||
| compare the quality & speed of all filters | compare the quality & speed of all filters | ||||
| implement a few simple deinterlacing filters | implement a few simple deinterlacing filters | ||||
| split this huge file | split this huge file | ||||
| fix warnings (unused vars, ...) | |||||
| ... | ... | ||||
| Notes: | Notes: | ||||
| @@ -63,6 +67,9 @@ Notes: | |||||
| /* | /* | ||||
| Changelog: use the CVS log | Changelog: use the CVS log | ||||
| rewrote the horizontal lowpass filter to fix a bug which caused a blocky look | |||||
| added deinterlace filters (linear interpolate, linear blend, median) | |||||
| minor cleanups (removed some outcommented stuff) | |||||
| 0.1.3 | 0.1.3 | ||||
| bugfixes: last 3 lines not brightness/contrast corrected | bugfixes: last 3 lines not brightness/contrast corrected | ||||
| brightness statistics messed up with initial black pic | brightness statistics messed up with initial black pic | ||||
| @@ -194,13 +201,11 @@ static inline void prefetcht2(void *p) | |||||
| * Check if the middle 8x8 Block in the given 8x10 block is flat | * Check if the middle 8x8 Block in the given 8x10 block is flat | ||||
| */ | */ | ||||
| static inline int isVertDC(uint8_t src[], int stride){ | static inline int isVertDC(uint8_t src[], int stride){ | ||||
| // return true; | |||||
| int numEq= 0; | int numEq= 0; | ||||
| int y; | int y; | ||||
| src+= stride; // src points to begin of the 8x8 Block | src+= stride; // src points to begin of the 8x8 Block | ||||
| #ifdef HAVE_MMX | #ifdef HAVE_MMX | ||||
| asm volatile( | asm volatile( | ||||
| // "int $3 \n\t" | |||||
| "pushl %1\n\t" | "pushl %1\n\t" | ||||
| "movq b7E, %%mm7 \n\t" // mm7 = 0x7F | "movq b7E, %%mm7 \n\t" // mm7 = 0x7F | ||||
| "movq b7C, %%mm6 \n\t" // mm6 = 0x7D | "movq b7C, %%mm6 \n\t" // mm6 = 0x7D | ||||
| @@ -1577,9 +1582,9 @@ static inline void doHorizDefFilterAndCopyBack(uint8_t dst[], int stride, int QP | |||||
| } | } | ||||
| /** | /** | ||||
| * Do a horizontal low pass filter on the 8x8 block | |||||
| * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) | |||||
| * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) | * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) | ||||
| * useing approximately the 7-Tap Filter (1,2,3,4,3,2,1)/16 (MMX2/3DNOW version) | |||||
| * useing the 7-Tap Filter (2,2,2,4,2,2,2)/16 (MMX2/3DNOW version) | |||||
| */ | */ | ||||
| static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) | static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) | ||||
| { | { | ||||
| @@ -1635,14 +1640,6 @@ static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) | |||||
| */ | */ | ||||
| // approximately a 7-Tap Filter with Vector (1,2,3,4,3,2,1)/16 | // approximately a 7-Tap Filter with Vector (1,2,3,4,3,2,1)/16 | ||||
| /* | /* | ||||
| 31 | |||||
| 121 | |||||
| 121 | |||||
| 121 | |||||
| 121 | |||||
| 121 | |||||
| 121 | |||||
| 13 | |||||
| Implemented Exact 7-Tap | Implemented Exact 7-Tap | ||||
| 9421 A321 | 9421 A321 | ||||
| 36421 64321 | 36421 64321 | ||||
| @@ -1654,6 +1651,7 @@ Implemented Exact 7-Tap | |||||
| 1249 123A | 1249 123A | ||||
| */ | */ | ||||
| #ifdef HAVE_MMX2 | #ifdef HAVE_MMX2 | ||||
| #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ | #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ | ||||
| "movq %%mm0, %%mm1 \n\t"\ | "movq %%mm0, %%mm1 \n\t"\ | ||||
| @@ -1680,12 +1678,12 @@ Implemented Exact 7-Tap | |||||
| #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ | #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ | ||||
| "movq %%mm0, %%mm1 \n\t"\ | "movq %%mm0, %%mm1 \n\t"\ | ||||
| "movq %%mm0, %%mm2 \n\t"\ | "movq %%mm0, %%mm2 \n\t"\ | ||||
| "movq %%mm0, %%mm3 \n\t"\ | |||||
| "movq %%mm0, %%mm4 \n\t"\ | |||||
| "movd -4(%0), %%mm3 \n\t" /*0001000*/\ | |||||
| "movd 8(%0), %%mm4 \n\t" /*0001000*/\ | |||||
| "psllq $8, %%mm1 \n\t"\ | "psllq $8, %%mm1 \n\t"\ | ||||
| "psrlq $8, %%mm2 \n\t"\ | "psrlq $8, %%mm2 \n\t"\ | ||||
| "pand bm00000001, %%mm3 \n\t"\ | |||||
| "pand bm10000000, %%mm4 \n\t"\ | |||||
| "psrlq $24, %%mm3 \n\t"\ | |||||
| "psllq $56, %%mm4 \n\t"\ | |||||
| "por %%mm3, %%mm1 \n\t"\ | "por %%mm3, %%mm1 \n\t"\ | ||||
| "por %%mm4, %%mm2 \n\t"\ | "por %%mm4, %%mm2 \n\t"\ | ||||
| PAVGB(%%mm2, %%mm1)\ | PAVGB(%%mm2, %%mm1)\ | ||||
| @@ -1708,7 +1706,80 @@ Implemented Exact 7-Tap | |||||
| "movd %%mm0, 4(%0) \n\t" | "movd %%mm0, 4(%0) \n\t" | ||||
| #endif | #endif | ||||
| #define HLP(i) HLP3(i) | |||||
| /* uses the 7-Tap Filter: 1112111 */ | |||||
| #define NEW_HLP(i)\ | |||||
| "movq " #i "(%%eax), %%mm0 \n\t"\ | |||||
| "movq %%mm0, %%mm1 \n\t"\ | |||||
| "movq %%mm0, %%mm2 \n\t"\ | |||||
| "movd -4(%0), %%mm3 \n\t" /*0001000*/\ | |||||
| "movd 8(%0), %%mm4 \n\t" /*0001000*/\ | |||||
| "psllq $8, %%mm1 \n\t"\ | |||||
| "psrlq $8, %%mm2 \n\t"\ | |||||
| "psrlq $24, %%mm3 \n\t"\ | |||||
| "psllq $56, %%mm4 \n\t"\ | |||||
| "por %%mm3, %%mm1 \n\t"\ | |||||
| "por %%mm4, %%mm2 \n\t"\ | |||||
| "movq %%mm1, %%mm5 \n\t"\ | |||||
| PAVGB(%%mm2, %%mm1)\ | |||||
| PAVGB(%%mm1, %%mm0)\ | |||||
| "psllq $8, %%mm5 \n\t"\ | |||||
| "psrlq $8, %%mm2 \n\t"\ | |||||
| "por %%mm3, %%mm5 \n\t"\ | |||||
| "por %%mm4, %%mm2 \n\t"\ | |||||
| "movq %%mm5, %%mm1 \n\t"\ | |||||
| PAVGB(%%mm2, %%mm5)\ | |||||
| "psllq $8, %%mm1 \n\t"\ | |||||
| "psrlq $8, %%mm2 \n\t"\ | |||||
| "por %%mm3, %%mm1 \n\t"\ | |||||
| "por %%mm4, %%mm2 \n\t"\ | |||||
| PAVGB(%%mm2, %%mm1)\ | |||||
| PAVGB(%%mm1, %%mm5)\ | |||||
| PAVGB(%%mm5, %%mm0)\ | |||||
| "movd %%mm0, (%0) \n\t"\ | |||||
| "psrlq $32, %%mm0 \n\t"\ | |||||
| "movd %%mm0, 4(%0) \n\t" | |||||
| /* uses the 9-Tap Filter: 112242211 */ | |||||
| #define NEW_HLP2(i)\ | |||||
| "movq " #i "(%%eax), %%mm0 \n\t" /*0001000*/\ | |||||
| "movq %%mm0, %%mm1 \n\t" /*0001000*/\ | |||||
| "movq %%mm0, %%mm2 \n\t" /*0001000*/\ | |||||
| "movd -4(%0), %%mm3 \n\t" /*0001000*/\ | |||||
| "movd 8(%0), %%mm4 \n\t" /*0001000*/\ | |||||
| "psllq $8, %%mm1 \n\t"\ | |||||
| "psrlq $8, %%mm2 \n\t"\ | |||||
| "psrlq $24, %%mm3 \n\t"\ | |||||
| "psllq $56, %%mm4 \n\t"\ | |||||
| "por %%mm3, %%mm1 \n\t" /*0010000*/\ | |||||
| "por %%mm4, %%mm2 \n\t" /*0000100*/\ | |||||
| "movq %%mm1, %%mm5 \n\t" /*0010000*/\ | |||||
| PAVGB(%%mm2, %%mm1) /*0010100*/\ | |||||
| PAVGB(%%mm1, %%mm0) /*0012100*/\ | |||||
| "psllq $8, %%mm5 \n\t"\ | |||||
| "psrlq $8, %%mm2 \n\t"\ | |||||
| "por %%mm3, %%mm5 \n\t" /*0100000*/\ | |||||
| "por %%mm4, %%mm2 \n\t" /*0000010*/\ | |||||
| "movq %%mm5, %%mm1 \n\t" /*0100000*/\ | |||||
| PAVGB(%%mm2, %%mm5) /*0100010*/\ | |||||
| "psllq $8, %%mm1 \n\t"\ | |||||
| "psrlq $8, %%mm2 \n\t"\ | |||||
| "por %%mm3, %%mm1 \n\t" /*1000000*/\ | |||||
| "por %%mm4, %%mm2 \n\t" /*0000001*/\ | |||||
| "movq %%mm1, %%mm6 \n\t" /*1000000*/\ | |||||
| PAVGB(%%mm2, %%mm1) /*1000001*/\ | |||||
| "psllq $8, %%mm6 \n\t"\ | |||||
| "psrlq $8, %%mm2 \n\t"\ | |||||
| "por %%mm3, %%mm6 \n\t"/*100000000*/\ | |||||
| "por %%mm4, %%mm2 \n\t"/*000000001*/\ | |||||
| PAVGB(%%mm2, %%mm6) /*100000001*/\ | |||||
| PAVGB(%%mm6, %%mm1) /*110000011*/\ | |||||
| PAVGB(%%mm1, %%mm5) /*112000211*/\ | |||||
| PAVGB(%%mm5, %%mm0) /*112242211*/\ | |||||
| "movd %%mm0, (%0) \n\t"\ | |||||
| "psrlq $32, %%mm0 \n\t"\ | |||||
| "movd %%mm0, 4(%0) \n\t" | |||||
| #define HLP(i) NEW_HLP(i) | |||||
| HLP(0) | HLP(0) | ||||
| "addl %1, %0 \n\t" | "addl %1, %0 \n\t" | ||||
| @@ -1828,6 +1899,363 @@ FIND_MIN_MAX(%%ebx, %1, 2) | |||||
| #endif | #endif | ||||
| } | } | ||||
| /** | |||||
| * Deinterlaces the given block | |||||
| * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block | |||||
| */ | |||||
| static inline void deInterlaceInterpolateLinear(uint8_t src[], int stride) | |||||
| { | |||||
| #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |||||
| asm volatile( | |||||
| "leal (%0, %1), %%eax \n\t" | |||||
| "leal (%%eax, %1, 4), %%ebx \n\t" | |||||
| // 0 1 2 3 4 5 6 7 8 9 | |||||
| // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |||||
| "movq (%0), %%mm0 \n\t" | |||||
| "movq (%%eax, %1), %%mm1 \n\t" | |||||
| PAVGB(%%mm1, %%mm0)\ | |||||
| "movq %%mm0, (%%eax) \n\t" | |||||
| "movq (%0, %1, 4), %%mm0 \n\t" | |||||
| PAVGB(%%mm0, %%mm1)\ | |||||
| "movq %%mm1, (%%eax, %1, 2) \n\t" | |||||
| "movq (%%ebx, %1), %%mm1 \n\t" | |||||
| PAVGB(%%mm1, %%mm0)\ | |||||
| "movq %%mm0, (%%ebx) \n\t" | |||||
| "movq (%0, %1, 8), %%mm0 \n\t" | |||||
| PAVGB(%%mm0, %%mm1)\ | |||||
| "movq %%mm1, (%%ebx, %1, 2) \n\t" | |||||
| : : "r" (src), "r" (stride) | |||||
| : "%eax", "%ebx" | |||||
| ); | |||||
| #else | |||||
| int x; | |||||
| for(x=0; x<8; x++) | |||||
| { | |||||
| src[stride] = (src[0] + src[stride*2])>>1; | |||||
| src[stride*3] = (src[stride*2] + src[stride*4])>>1; | |||||
| src[stride*5] = (src[stride*4] + src[stride*6])>>1; | |||||
| src[stride*7] = (src[stride*6] + src[stride*8])>>1; | |||||
| src++; | |||||
| } | |||||
| #endif | |||||
| } | |||||
| /** | |||||
| * Deinterlaces the given block | |||||
| * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block | |||||
| */ | |||||
| static inline void deInterlaceInterpolateLinearLastRow(uint8_t src[], int stride) | |||||
| { | |||||
| #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |||||
| asm volatile( | |||||
| "leal (%0, %1), %%eax \n\t" | |||||
| "leal (%%eax, %1, 4), %%ebx \n\t" | |||||
| // 0 1 2 3 4 5 6 7 8 9 | |||||
| // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |||||
| "movq (%0), %%mm0 \n\t" | |||||
| "movq (%%eax, %1), %%mm1 \n\t" | |||||
| PAVGB(%%mm1, %%mm0)\ | |||||
| "movq %%mm0, (%%eax) \n\t" | |||||
| "movq (%0, %1, 4), %%mm0 \n\t" | |||||
| PAVGB(%%mm0, %%mm1)\ | |||||
| "movq %%mm1, (%%eax, %1, 2) \n\t" | |||||
| "movq (%%ebx, %1), %%mm1 \n\t" | |||||
| PAVGB(%%mm1, %%mm0)\ | |||||
| "movq %%mm0, (%%ebx) \n\t" | |||||
| "movq %%mm1, (%%ebx, %1, 2) \n\t" | |||||
| : : "r" (src), "r" (stride) | |||||
| : "%eax", "%ebx" | |||||
| ); | |||||
| #else | |||||
| int x; | |||||
| for(x=0; x<8; x++) | |||||
| { | |||||
| src[stride] = (src[0] + src[stride*2])>>1; | |||||
| src[stride*3] = (src[stride*2] + src[stride*4])>>1; | |||||
| src[stride*5] = (src[stride*4] + src[stride*6])>>1; | |||||
| src[stride*7] = src[stride*6]; | |||||
| src++; | |||||
| } | |||||
| #endif | |||||
| } | |||||
| /** | |||||
| * Deinterlaces the given block | |||||
| * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block | |||||
| * will shift the image up by 1 line (FIXME if this is a problem) | |||||
| */ | |||||
| static inline void deInterlaceBlendLinear(uint8_t src[], int stride) | |||||
| { | |||||
| #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |||||
| asm volatile( | |||||
| "leal (%0, %1), %%eax \n\t" | |||||
| "leal (%%eax, %1, 4), %%ebx \n\t" | |||||
| // 0 1 2 3 4 5 6 7 8 9 | |||||
| // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |||||
| "movq (%0), %%mm0 \n\t" // L0 | |||||
| "movq (%%eax, %1), %%mm1 \n\t" // L2 | |||||
| PAVGB(%%mm1, %%mm0) // L0+L2 | |||||
| "movq (%%eax), %%mm2 \n\t" // L1 | |||||
| PAVGB(%%mm2, %%mm0) | |||||
| "movq %%mm0, (%0) \n\t" | |||||
| "movq (%%eax, %1, 2), %%mm0 \n\t" // L3 | |||||
| PAVGB(%%mm0, %%mm2) // L1+L3 | |||||
| PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 | |||||
| "movq %%mm2, (%%eax) \n\t" | |||||
| "movq (%0, %1, 4), %%mm2 \n\t" // L4 | |||||
| PAVGB(%%mm2, %%mm1) // L2+L4 | |||||
| PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 | |||||
| "movq %%mm1, (%%eax, %1) \n\t" | |||||
| "movq (%%ebx), %%mm1 \n\t" // L5 | |||||
| PAVGB(%%mm1, %%mm0) // L3+L5 | |||||
| PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 | |||||
| "movq %%mm0, (%%eax, %1, 2) \n\t" | |||||
| "movq (%%ebx, %1), %%mm0 \n\t" // L6 | |||||
| PAVGB(%%mm0, %%mm2) // L4+L6 | |||||
| PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 | |||||
| "movq %%mm2, (%0, %1, 4) \n\t" | |||||
| "movq (%%ebx, %1, 2), %%mm2 \n\t" // L7 | |||||
| PAVGB(%%mm2, %%mm1) // L5+L7 | |||||
| PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 | |||||
| "movq %%mm1, (%%ebx) \n\t" | |||||
| "movq (%0, %1, 8), %%mm1 \n\t" // L8 | |||||
| PAVGB(%%mm1, %%mm0) // L6+L8 | |||||
| PAVGB(%%mm2, %%mm0) // 2L7 + L6 + L8 | |||||
| "movq %%mm0, (%%ebx, %1) \n\t" | |||||
| "movq (%%ebx, %1, 4), %%mm0 \n\t" // L9 | |||||
| PAVGB(%%mm0, %%mm2) // L7+L9 | |||||
| PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9 | |||||
| "movq %%mm2, (%%ebx, %1, 2) \n\t" | |||||
| : : "r" (src), "r" (stride) | |||||
| : "%eax", "%ebx" | |||||
| ); | |||||
| #else | |||||
| int x; | |||||
| for(x=0; x<8; x++) | |||||
| { | |||||
| src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; | |||||
| src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; | |||||
| src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; | |||||
| src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; | |||||
| src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; | |||||
| src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; | |||||
| src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2; | |||||
| src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2; | |||||
| src++; | |||||
| } | |||||
| #endif | |||||
| } | |||||
| /** | |||||
| * Deinterlaces the given block | |||||
| * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block | |||||
| * will shift the image up by 1 line (FIXME if this is a problem) | |||||
| */ | |||||
| static inline void deInterlaceBlendLinearLastRow(uint8_t src[], int stride) | |||||
| { | |||||
| #if defined (HAVE_MMSX2) || defined (HAVE_3DNOW) | |||||
| asm volatile( | |||||
| "leal (%0, %1), %%eax \n\t" | |||||
| "leal (%%eax, %1, 4), %%ebx \n\t" | |||||
| // 0 1 2 3 4 5 6 7 8 9 | |||||
| // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |||||
| "movq (%0), %%mm0 \n\t" // L0 | |||||
| "movq (%%eax, %1), %%mm1 \n\t" // L2 | |||||
| PAVGB(%%mm1, %%mm0) // L0+L2 | |||||
| "movq (%%eax), %%mm2 \n\t" // L1 | |||||
| PAVGB(%%mm2, %%mm0) | |||||
| "movq %%mm0, (%0) \n\t" | |||||
| "movq (%%eax, %1, 2), %%mm0 \n\t" // L3 | |||||
| PAVGB(%%mm0, %%mm2) // L1+L3 | |||||
| PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 | |||||
| "movq %%mm2, (%%eax) \n\t" | |||||
| "movq (%0, %1, 4), %%mm2 \n\t" // L4 | |||||
| PAVGB(%%mm2, %%mm1) // L2+L4 | |||||
| PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 | |||||
| "movq %%mm1, (%%eax, %1) \n\t" | |||||
| "movq (%%ebx), %%mm1 \n\t" // L5 | |||||
| PAVGB(%%mm1, %%mm0) // L3+L5 | |||||
| PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 | |||||
| "movq %%mm0, (%%eax, %1, 2) \n\t" | |||||
| "movq (%%ebx, %1), %%mm0 \n\t" // L6 | |||||
| PAVGB(%%mm0, %%mm2) // L4+L6 | |||||
| PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 | |||||
| "movq %%mm2, (%0, %1, 4) \n\t" | |||||
| "movq (%%ebx, %1, 2), %%mm2 \n\t" // L7 | |||||
| PAVGB(%%mm2, %%mm1) // L5+L7 | |||||
| PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 | |||||
| "movq %%mm1, (%%ebx) \n\t" | |||||
| PAVGB(%%mm2, %%mm0) // L7 + L8 | |||||
| "movq %%mm0, (%%ebx, %1) \n\t" | |||||
| "movq %%mm0, (%%ebx, %1, 2) \n\t" | |||||
| : : "r" (src), "r" (stride) | |||||
| : "%eax", "%ebx" | |||||
| ); | |||||
| #else | |||||
| int x; | |||||
| for(x=0; x<8; x++) | |||||
| { | |||||
| src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; | |||||
| src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; | |||||
| src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; | |||||
| src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; | |||||
| src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; | |||||
| src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; | |||||
| src[stride*6] = (src[stride*6] + src[stride*7])>>1; | |||||
| src[stride*7] = src[stride*6]; | |||||
| src++; | |||||
| } | |||||
| #endif | |||||
| } | |||||
| /** | |||||
| * Deinterlaces the given block | |||||
| * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block | |||||
| */ | |||||
| static inline void deInterlaceMedian(uint8_t src[], int stride) | |||||
| { | |||||
| #if defined (HAVE_MMX2) | |||||
| asm volatile( | |||||
| "leal (%0, %1), %%eax \n\t" | |||||
| "leal (%%eax, %1, 4), %%ebx \n\t" | |||||
| // 0 1 2 3 4 5 6 7 8 9 | |||||
| // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |||||
| "movq (%0), %%mm0 \n\t" // | |||||
| "movq (%%eax, %1), %%mm2 \n\t" // | |||||
| "movq (%%eax), %%mm1 \n\t" // | |||||
| "movq %%mm0, %%mm3 \n\t" | |||||
| "pmaxub %%mm1, %%mm0 \n\t" // | |||||
| "pminub %%mm3, %%mm1 \n\t" // | |||||
| "pmaxub %%mm2, %%mm1 \n\t" // | |||||
| "pminub %%mm1, %%mm0 \n\t" | |||||
| "movq %%mm0, (%%eax) \n\t" | |||||
| "movq (%0, %1, 4), %%mm0 \n\t" // | |||||
| "movq (%%eax, %1, 2), %%mm1 \n\t" // | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "pmaxub %%mm1, %%mm2 \n\t" // | |||||
| "pminub %%mm3, %%mm1 \n\t" // | |||||
| "pmaxub %%mm0, %%mm1 \n\t" // | |||||
| "pminub %%mm1, %%mm2 \n\t" | |||||
| "movq %%mm2, (%%eax, %1, 2) \n\t" | |||||
| "movq (%%ebx), %%mm2 \n\t" // | |||||
| "movq (%%ebx, %1), %%mm1 \n\t" // | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "pmaxub %%mm0, %%mm2 \n\t" // | |||||
| "pminub %%mm3, %%mm0 \n\t" // | |||||
| "pmaxub %%mm1, %%mm0 \n\t" // | |||||
| "pminub %%mm0, %%mm2 \n\t" | |||||
| "movq %%mm2, (%%ebx) \n\t" | |||||
| "movq (%%ebx, %1, 2), %%mm2 \n\t" // | |||||
| "movq (%0, %1, 8), %%mm0 \n\t" // | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "pmaxub %%mm0, %%mm2 \n\t" // | |||||
| "pminub %%mm3, %%mm0 \n\t" // | |||||
| "pmaxub %%mm1, %%mm0 \n\t" // | |||||
| "pminub %%mm0, %%mm2 \n\t" | |||||
| "movq %%mm2, (%%ebx, %1, 2) \n\t" | |||||
| : : "r" (src), "r" (stride) | |||||
| : "%eax", "%ebx" | |||||
| ); | |||||
| #else | |||||
| //FIXME | |||||
| int x; | |||||
| for(x=0; x<8; x++) | |||||
| { | |||||
| src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; | |||||
| src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; | |||||
| src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; | |||||
| src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; | |||||
| src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; | |||||
| src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; | |||||
| src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2; | |||||
| src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2; | |||||
| src++; | |||||
| } | |||||
| #endif | |||||
| } | |||||
| /** | |||||
| * Deinterlaces the given block | |||||
| * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block | |||||
| * will shift the image up by 1 line (FIXME if this is a problem) | |||||
| */ | |||||
| static inline void deInterlaceMedianLastRow(uint8_t src[], int stride) | |||||
| { | |||||
| #if defined (HAVE_MMX2) | |||||
| asm volatile( | |||||
| "leal (%0, %1), %%eax \n\t" | |||||
| "leal (%%eax, %1, 4), %%ebx \n\t" | |||||
| // 0 1 2 3 4 5 6 7 8 9 | |||||
| // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |||||
| "movq (%0), %%mm0 \n\t" // | |||||
| "movq (%%eax, %1), %%mm2 \n\t" // | |||||
| "movq (%%eax), %%mm1 \n\t" // | |||||
| "movq %%mm0, %%mm3 \n\t" | |||||
| "pmaxub %%mm1, %%mm0 \n\t" // | |||||
| "pminub %%mm3, %%mm1 \n\t" // | |||||
| "pmaxub %%mm2, %%mm1 \n\t" // | |||||
| "pminub %%mm1, %%mm0 \n\t" | |||||
| "movq %%mm0, (%%eax) \n\t" | |||||
| "movq (%0, %1, 4), %%mm0 \n\t" // | |||||
| "movq (%%eax, %1, 2), %%mm1 \n\t" // | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "pmaxub %%mm1, %%mm2 \n\t" // | |||||
| "pminub %%mm3, %%mm1 \n\t" // | |||||
| "pmaxub %%mm0, %%mm1 \n\t" // | |||||
| "pminub %%mm1, %%mm2 \n\t" | |||||
| "movq %%mm2, (%%eax, %1, 2) \n\t" | |||||
| "movq (%%ebx), %%mm2 \n\t" // | |||||
| "movq (%%ebx, %1), %%mm1 \n\t" // | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "pmaxub %%mm0, %%mm2 \n\t" // | |||||
| "pminub %%mm3, %%mm0 \n\t" // | |||||
| "pmaxub %%mm1, %%mm0 \n\t" // | |||||
| "pminub %%mm0, %%mm2 \n\t" | |||||
| "movq %%mm2, (%%ebx) \n\t" | |||||
| "movq %%mm1, (%%ebx, %1, 2) \n\t" | |||||
| : : "r" (src), "r" (stride) | |||||
| : "%eax", "%ebx" | |||||
| ); | |||||
| #else | |||||
| //FIXME | |||||
| int x; | |||||
| for(x=0; x<8; x++) | |||||
| { | |||||
| src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; | |||||
| src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; | |||||
| src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; | |||||
| src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; | |||||
| src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; | |||||
| src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; | |||||
| src[stride*6] = (src[stride*6] + src[stride*7])>>1; | |||||
| src[stride*7] = src[stride*6]; | |||||
| src++; | |||||
| } | |||||
| #endif | |||||
| } | |||||
| #ifdef HAVE_ODIVX_POSTPROCESS | #ifdef HAVE_ODIVX_POSTPROCESS | ||||
| #include "../opendivx/postprocess.h" | #include "../opendivx/postprocess.h" | ||||
| int use_old_pp=0; | int use_old_pp=0; | ||||
| @@ -1841,7 +2269,6 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||||
| * the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63) | * the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63) | ||||
| * -63 is best quality -1 is worst | * -63 is best quality -1 is worst | ||||
| */ | */ | ||||
| //extern "C"{ | |||||
| void postprocess(unsigned char * src[], int src_stride, | void postprocess(unsigned char * src[], int src_stride, | ||||
| unsigned char * dst[], int dst_stride, | unsigned char * dst[], int dst_stride, | ||||
| int horizontal_size, int vertical_size, | int horizontal_size, int vertical_size, | ||||
| @@ -2196,6 +2623,17 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||||
| blockCopy(vertBlock + dstStride*2, dstStride, | blockCopy(vertBlock + dstStride*2, dstStride, | ||||
| vertSrcBlock + srcStride*2, srcStride, 8, mode & LEVEL_FIX); | vertSrcBlock + srcStride*2, srcStride, 8, mode & LEVEL_FIX); | ||||
| if(mode & LINEAR_IPOL_DEINT_FILTER) | |||||
| deInterlaceInterpolateLinear(dstBlock, dstStride); | |||||
| else if(mode & LINEAR_BLEND_DEINT_FILTER) | |||||
| deInterlaceBlendLinear(dstBlock, dstStride); | |||||
| else if(mode & MEDIAN_DEINT_FILTER) | |||||
| deInterlaceMedian(dstBlock, dstStride); | |||||
| /* else if(mode & CUBIC_IPOL_DEINT_FILTER) | |||||
| deInterlaceInterpolateCubic(dstBlock, dstStride); | |||||
| else if(mode & CUBIC_BLEND_DEINT_FILTER) | |||||
| deInterlaceBlendCubic(dstBlock, dstStride); | |||||
| */ | |||||
| #ifdef MORE_TIMEING | #ifdef MORE_TIMEING | ||||
| T1= rdtsc(); | T1= rdtsc(); | ||||
| @@ -2226,9 +2664,22 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||||
| #endif | #endif | ||||
| } | } | ||||
| else | else | ||||
| { | |||||
| blockCopy(vertBlock + dstStride*1, dstStride, | blockCopy(vertBlock + dstStride*1, dstStride, | ||||
| vertSrcBlock + srcStride*1, srcStride, 4, mode & LEVEL_FIX); | vertSrcBlock + srcStride*1, srcStride, 4, mode & LEVEL_FIX); | ||||
| if(mode & LINEAR_IPOL_DEINT_FILTER) | |||||
| deInterlaceInterpolateLinearLastRow(dstBlock, dstStride); | |||||
| else if(mode & LINEAR_BLEND_DEINT_FILTER) | |||||
| deInterlaceBlendLinearLastRow(dstBlock, dstStride); | |||||
| else if(mode & MEDIAN_DEINT_FILTER) | |||||
| deInterlaceMedianLastRow(dstBlock, dstStride); | |||||
| /* else if(mode & CUBIC_IPOL_DEINT_FILTER) | |||||
| deInterlaceInterpolateCubicLastRow(dstBlock, dstStride); | |||||
| else if(mode & CUBIC_BLEND_DEINT_FILTER) | |||||
| deInterlaceBlendCubicLastRow(dstBlock, dstStride); | |||||
| */ | |||||
| } | |||||
| if(x - 8 >= 0 && x<width) | if(x - 8 >= 0 && x<width) | ||||
| { | { | ||||
| @@ -22,6 +22,7 @@ | |||||
| #define BLOCK_SIZE 8 | #define BLOCK_SIZE 8 | ||||
| #define TEMP_STRIDE 8 | #define TEMP_STRIDE 8 | ||||
| //#define NUM_BLOCKS_AT_ONCE 16 //not used yet | |||||
| #define V_DEBLOCK 0x01 | #define V_DEBLOCK 0x01 | ||||
| #define H_DEBLOCK 0x02 | #define H_DEBLOCK 0x02 | ||||
| @@ -32,20 +33,28 @@ | |||||
| #define LUM_H_DEBLOCK H_DEBLOCK // 2 | #define LUM_H_DEBLOCK H_DEBLOCK // 2 | ||||
| #define CHROM_V_DEBLOCK (V_DEBLOCK<<4) // 16 | #define CHROM_V_DEBLOCK (V_DEBLOCK<<4) // 16 | ||||
| #define CHROM_H_DEBLOCK (H_DEBLOCK<<4) // 32 | #define CHROM_H_DEBLOCK (H_DEBLOCK<<4) // 32 | ||||
| #define LUM_DERING DERING // 4 | |||||
| #define CHROM_DERING (DERING<<4) // 64 | |||||
| #define LUM_DERING DERING // 4 (not implemented yet) | |||||
| #define CHROM_DERING (DERING<<4) // 64 (not implemented yet) | |||||
| #define LUM_LEVEL_FIX LEVEL_FIX // 8 | #define LUM_LEVEL_FIX LEVEL_FIX // 8 | ||||
| //not supported currently | |||||
| #define CHROM_LEVEL_FIX (LEVEL_FIX<<4) // 128 | |||||
| #define CHROM_LEVEL_FIX (LEVEL_FIX<<4) // 128 (not implemented yet) | |||||
| // Experimental vertical filters | // Experimental vertical filters | ||||
| #define V_RK1_FILTER 0x0100 // 256 | #define V_RK1_FILTER 0x0100 // 256 | ||||
| #define V_X1_FILTER 0x0200 // 512 | #define V_X1_FILTER 0x0200 // 512 | ||||
| // Experimental horizontal filters | // Experimental horizontal filters | ||||
| #define H_RK1_FILTER 0x1000 // 4096 | |||||
| #define H_RK1_FILTER 0x1000 // 4096 (not implemented yet) | |||||
| #define H_X1_FILTER 0x2000 // 8192 | #define H_X1_FILTER 0x2000 // 8192 | ||||
| //Deinterlacing Filters | |||||
| #define DEINTERLACE_FILTER_MASK 0xF0000 | |||||
| #define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536 | |||||
| #define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072 | |||||
| #define CUBIC_BLEND_DEINT_FILTER 0x30000 // 196608 (not implemented yet) | |||||
| #define CUBIC_IPOL_DEINT_FILTER 0x40000 // 262144 (not implemented yet) | |||||
| #define MEDIAN_DEINT_FILTER 0x80000 // 524288 | |||||
| #define GET_PP_QUALITY_MAX 6 | #define GET_PP_QUALITY_MAX 6 | ||||
| //#define TIMEING | //#define TIMEING | ||||
| @@ -53,18 +62,6 @@ | |||||
| #define QP_STORE_T int | #define QP_STORE_T int | ||||
| //#ifdef __cplusplus | |||||
| //#include <inttypes.h> | |||||
| //void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, | |||||
| // QP_STORE_T QPs[], int QPStride, int isColor, int mode); | |||||
| //#endif | |||||
| //#ifdef __cplusplus | |||||
| //extern "C" | |||||
| //{ | |||||
| //#endif | |||||
| void postprocess(unsigned char * src[], int src_stride, | void postprocess(unsigned char * src[], int src_stride, | ||||
| unsigned char * dst[], int dst_stride, | unsigned char * dst[], int dst_stride, | ||||
| int horizontal_size, int vertical_size, | int horizontal_size, int vertical_size, | ||||
| @@ -72,8 +69,4 @@ void postprocess(unsigned char * src[], int src_stride, | |||||
| int getPpModeForQuality(int quality); | int getPpModeForQuality(int quality); | ||||
| //#ifdef __cplusplus | |||||
| //} | |||||
| //#endif | |||||
| #endif | #endif | ||||
| @@ -17,19 +17,22 @@ | |||||
| */ | */ | ||||
| /* | /* | ||||
| C MMX MMX2 3DNow* | |||||
| C MMX MMX2 3DNow | |||||
| isVertDC Ec Ec | isVertDC Ec Ec | ||||
| isVertMinMaxOk Ec Ec | isVertMinMaxOk Ec Ec | ||||
| doVertLowPass E e e* | |||||
| doVertLowPass E e e | |||||
| doVertDefFilter Ec Ec Ec | doVertDefFilter Ec Ec Ec | ||||
| isHorizDC Ec Ec | isHorizDC Ec Ec | ||||
| isHorizMinMaxOk a | isHorizMinMaxOk a | ||||
| doHorizLowPass E a a* | |||||
| doHorizLowPass E a a | |||||
| doHorizDefFilter E ac ac | doHorizDefFilter E ac ac | ||||
| deRing | deRing | ||||
| Vertical RKAlgo1 E a a* | |||||
| Vertical X1 a E E* | |||||
| Horizontal X1 a E E* | |||||
| Vertical RKAlgo1 E a a | |||||
| Vertical X1 a E E | |||||
| Horizontal X1 a E E | |||||
| LinIpolDeinterlace a E E* | |||||
| LinBlendDeinterlace a E E* | |||||
| MedianDeinterlace a E | |||||
| * i dont have a 3dnow CPU -> its untested | * i dont have a 3dnow CPU -> its untested | ||||
| @@ -55,6 +58,7 @@ make the mainloop more flexible (variable number of blocks at once | |||||
| compare the quality & speed of all filters | compare the quality & speed of all filters | ||||
| implement a few simple deinterlacing filters | implement a few simple deinterlacing filters | ||||
| split this huge file | split this huge file | ||||
| fix warnings (unused vars, ...) | |||||
| ... | ... | ||||
| Notes: | Notes: | ||||
| @@ -63,6 +67,9 @@ Notes: | |||||
| /* | /* | ||||
| Changelog: use the CVS log | Changelog: use the CVS log | ||||
| rewrote the horizontal lowpass filter to fix a bug which caused a blocky look | |||||
| added deinterlace filters (linear interpolate, linear blend, median) | |||||
| minor cleanups (removed some outcommented stuff) | |||||
| 0.1.3 | 0.1.3 | ||||
| bugfixes: last 3 lines not brightness/contrast corrected | bugfixes: last 3 lines not brightness/contrast corrected | ||||
| brightness statistics messed up with initial black pic | brightness statistics messed up with initial black pic | ||||
| @@ -194,13 +201,11 @@ static inline void prefetcht2(void *p) | |||||
| * Check if the middle 8x8 Block in the given 8x10 block is flat | * Check if the middle 8x8 Block in the given 8x10 block is flat | ||||
| */ | */ | ||||
| static inline int isVertDC(uint8_t src[], int stride){ | static inline int isVertDC(uint8_t src[], int stride){ | ||||
| // return true; | |||||
| int numEq= 0; | int numEq= 0; | ||||
| int y; | int y; | ||||
| src+= stride; // src points to begin of the 8x8 Block | src+= stride; // src points to begin of the 8x8 Block | ||||
| #ifdef HAVE_MMX | #ifdef HAVE_MMX | ||||
| asm volatile( | asm volatile( | ||||
| // "int $3 \n\t" | |||||
| "pushl %1\n\t" | "pushl %1\n\t" | ||||
| "movq b7E, %%mm7 \n\t" // mm7 = 0x7F | "movq b7E, %%mm7 \n\t" // mm7 = 0x7F | ||||
| "movq b7C, %%mm6 \n\t" // mm6 = 0x7D | "movq b7C, %%mm6 \n\t" // mm6 = 0x7D | ||||
| @@ -1577,9 +1582,9 @@ static inline void doHorizDefFilterAndCopyBack(uint8_t dst[], int stride, int QP | |||||
| } | } | ||||
| /** | /** | ||||
| * Do a horizontal low pass filter on the 8x8 block | |||||
| * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) | |||||
| * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) | * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) | ||||
| * useing approximately the 7-Tap Filter (1,2,3,4,3,2,1)/16 (MMX2/3DNOW version) | |||||
| * useing the 7-Tap Filter (2,2,2,4,2,2,2)/16 (MMX2/3DNOW version) | |||||
| */ | */ | ||||
| static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) | static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) | ||||
| { | { | ||||
| @@ -1635,14 +1640,6 @@ static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) | |||||
| */ | */ | ||||
| // approximately a 7-Tap Filter with Vector (1,2,3,4,3,2,1)/16 | // approximately a 7-Tap Filter with Vector (1,2,3,4,3,2,1)/16 | ||||
| /* | /* | ||||
| 31 | |||||
| 121 | |||||
| 121 | |||||
| 121 | |||||
| 121 | |||||
| 121 | |||||
| 121 | |||||
| 13 | |||||
| Implemented Exact 7-Tap | Implemented Exact 7-Tap | ||||
| 9421 A321 | 9421 A321 | ||||
| 36421 64321 | 36421 64321 | ||||
| @@ -1654,6 +1651,7 @@ Implemented Exact 7-Tap | |||||
| 1249 123A | 1249 123A | ||||
| */ | */ | ||||
| #ifdef HAVE_MMX2 | #ifdef HAVE_MMX2 | ||||
| #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ | #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ | ||||
| "movq %%mm0, %%mm1 \n\t"\ | "movq %%mm0, %%mm1 \n\t"\ | ||||
| @@ -1680,12 +1678,12 @@ Implemented Exact 7-Tap | |||||
| #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ | #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ | ||||
| "movq %%mm0, %%mm1 \n\t"\ | "movq %%mm0, %%mm1 \n\t"\ | ||||
| "movq %%mm0, %%mm2 \n\t"\ | "movq %%mm0, %%mm2 \n\t"\ | ||||
| "movq %%mm0, %%mm3 \n\t"\ | |||||
| "movq %%mm0, %%mm4 \n\t"\ | |||||
| "movd -4(%0), %%mm3 \n\t" /*0001000*/\ | |||||
| "movd 8(%0), %%mm4 \n\t" /*0001000*/\ | |||||
| "psllq $8, %%mm1 \n\t"\ | "psllq $8, %%mm1 \n\t"\ | ||||
| "psrlq $8, %%mm2 \n\t"\ | "psrlq $8, %%mm2 \n\t"\ | ||||
| "pand bm00000001, %%mm3 \n\t"\ | |||||
| "pand bm10000000, %%mm4 \n\t"\ | |||||
| "psrlq $24, %%mm3 \n\t"\ | |||||
| "psllq $56, %%mm4 \n\t"\ | |||||
| "por %%mm3, %%mm1 \n\t"\ | "por %%mm3, %%mm1 \n\t"\ | ||||
| "por %%mm4, %%mm2 \n\t"\ | "por %%mm4, %%mm2 \n\t"\ | ||||
| PAVGB(%%mm2, %%mm1)\ | PAVGB(%%mm2, %%mm1)\ | ||||
| @@ -1708,7 +1706,80 @@ Implemented Exact 7-Tap | |||||
| "movd %%mm0, 4(%0) \n\t" | "movd %%mm0, 4(%0) \n\t" | ||||
| #endif | #endif | ||||
| #define HLP(i) HLP3(i) | |||||
| /* uses the 7-Tap Filter: 1112111 */ | |||||
| #define NEW_HLP(i)\ | |||||
| "movq " #i "(%%eax), %%mm0 \n\t"\ | |||||
| "movq %%mm0, %%mm1 \n\t"\ | |||||
| "movq %%mm0, %%mm2 \n\t"\ | |||||
| "movd -4(%0), %%mm3 \n\t" /*0001000*/\ | |||||
| "movd 8(%0), %%mm4 \n\t" /*0001000*/\ | |||||
| "psllq $8, %%mm1 \n\t"\ | |||||
| "psrlq $8, %%mm2 \n\t"\ | |||||
| "psrlq $24, %%mm3 \n\t"\ | |||||
| "psllq $56, %%mm4 \n\t"\ | |||||
| "por %%mm3, %%mm1 \n\t"\ | |||||
| "por %%mm4, %%mm2 \n\t"\ | |||||
| "movq %%mm1, %%mm5 \n\t"\ | |||||
| PAVGB(%%mm2, %%mm1)\ | |||||
| PAVGB(%%mm1, %%mm0)\ | |||||
| "psllq $8, %%mm5 \n\t"\ | |||||
| "psrlq $8, %%mm2 \n\t"\ | |||||
| "por %%mm3, %%mm5 \n\t"\ | |||||
| "por %%mm4, %%mm2 \n\t"\ | |||||
| "movq %%mm5, %%mm1 \n\t"\ | |||||
| PAVGB(%%mm2, %%mm5)\ | |||||
| "psllq $8, %%mm1 \n\t"\ | |||||
| "psrlq $8, %%mm2 \n\t"\ | |||||
| "por %%mm3, %%mm1 \n\t"\ | |||||
| "por %%mm4, %%mm2 \n\t"\ | |||||
| PAVGB(%%mm2, %%mm1)\ | |||||
| PAVGB(%%mm1, %%mm5)\ | |||||
| PAVGB(%%mm5, %%mm0)\ | |||||
| "movd %%mm0, (%0) \n\t"\ | |||||
| "psrlq $32, %%mm0 \n\t"\ | |||||
| "movd %%mm0, 4(%0) \n\t" | |||||
| /* uses the 9-Tap Filter: 112242211 */ | |||||
| #define NEW_HLP2(i)\ | |||||
| "movq " #i "(%%eax), %%mm0 \n\t" /*0001000*/\ | |||||
| "movq %%mm0, %%mm1 \n\t" /*0001000*/\ | |||||
| "movq %%mm0, %%mm2 \n\t" /*0001000*/\ | |||||
| "movd -4(%0), %%mm3 \n\t" /*0001000*/\ | |||||
| "movd 8(%0), %%mm4 \n\t" /*0001000*/\ | |||||
| "psllq $8, %%mm1 \n\t"\ | |||||
| "psrlq $8, %%mm2 \n\t"\ | |||||
| "psrlq $24, %%mm3 \n\t"\ | |||||
| "psllq $56, %%mm4 \n\t"\ | |||||
| "por %%mm3, %%mm1 \n\t" /*0010000*/\ | |||||
| "por %%mm4, %%mm2 \n\t" /*0000100*/\ | |||||
| "movq %%mm1, %%mm5 \n\t" /*0010000*/\ | |||||
| PAVGB(%%mm2, %%mm1) /*0010100*/\ | |||||
| PAVGB(%%mm1, %%mm0) /*0012100*/\ | |||||
| "psllq $8, %%mm5 \n\t"\ | |||||
| "psrlq $8, %%mm2 \n\t"\ | |||||
| "por %%mm3, %%mm5 \n\t" /*0100000*/\ | |||||
| "por %%mm4, %%mm2 \n\t" /*0000010*/\ | |||||
| "movq %%mm5, %%mm1 \n\t" /*0100000*/\ | |||||
| PAVGB(%%mm2, %%mm5) /*0100010*/\ | |||||
| "psllq $8, %%mm1 \n\t"\ | |||||
| "psrlq $8, %%mm2 \n\t"\ | |||||
| "por %%mm3, %%mm1 \n\t" /*1000000*/\ | |||||
| "por %%mm4, %%mm2 \n\t" /*0000001*/\ | |||||
| "movq %%mm1, %%mm6 \n\t" /*1000000*/\ | |||||
| PAVGB(%%mm2, %%mm1) /*1000001*/\ | |||||
| "psllq $8, %%mm6 \n\t"\ | |||||
| "psrlq $8, %%mm2 \n\t"\ | |||||
| "por %%mm3, %%mm6 \n\t"/*100000000*/\ | |||||
| "por %%mm4, %%mm2 \n\t"/*000000001*/\ | |||||
| PAVGB(%%mm2, %%mm6) /*100000001*/\ | |||||
| PAVGB(%%mm6, %%mm1) /*110000011*/\ | |||||
| PAVGB(%%mm1, %%mm5) /*112000211*/\ | |||||
| PAVGB(%%mm5, %%mm0) /*112242211*/\ | |||||
| "movd %%mm0, (%0) \n\t"\ | |||||
| "psrlq $32, %%mm0 \n\t"\ | |||||
| "movd %%mm0, 4(%0) \n\t" | |||||
| #define HLP(i) NEW_HLP(i) | |||||
| HLP(0) | HLP(0) | ||||
| "addl %1, %0 \n\t" | "addl %1, %0 \n\t" | ||||
| @@ -1828,6 +1899,363 @@ FIND_MIN_MAX(%%ebx, %1, 2) | |||||
| #endif | #endif | ||||
| } | } | ||||
| /** | |||||
| * Deinterlaces the given block | |||||
| * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block | |||||
| */ | |||||
| static inline void deInterlaceInterpolateLinear(uint8_t src[], int stride) | |||||
| { | |||||
| #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |||||
| asm volatile( | |||||
| "leal (%0, %1), %%eax \n\t" | |||||
| "leal (%%eax, %1, 4), %%ebx \n\t" | |||||
| // 0 1 2 3 4 5 6 7 8 9 | |||||
| // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |||||
| "movq (%0), %%mm0 \n\t" | |||||
| "movq (%%eax, %1), %%mm1 \n\t" | |||||
| PAVGB(%%mm1, %%mm0)\ | |||||
| "movq %%mm0, (%%eax) \n\t" | |||||
| "movq (%0, %1, 4), %%mm0 \n\t" | |||||
| PAVGB(%%mm0, %%mm1)\ | |||||
| "movq %%mm1, (%%eax, %1, 2) \n\t" | |||||
| "movq (%%ebx, %1), %%mm1 \n\t" | |||||
| PAVGB(%%mm1, %%mm0)\ | |||||
| "movq %%mm0, (%%ebx) \n\t" | |||||
| "movq (%0, %1, 8), %%mm0 \n\t" | |||||
| PAVGB(%%mm0, %%mm1)\ | |||||
| "movq %%mm1, (%%ebx, %1, 2) \n\t" | |||||
| : : "r" (src), "r" (stride) | |||||
| : "%eax", "%ebx" | |||||
| ); | |||||
| #else | |||||
| int x; | |||||
| for(x=0; x<8; x++) | |||||
| { | |||||
| src[stride] = (src[0] + src[stride*2])>>1; | |||||
| src[stride*3] = (src[stride*2] + src[stride*4])>>1; | |||||
| src[stride*5] = (src[stride*4] + src[stride*6])>>1; | |||||
| src[stride*7] = (src[stride*6] + src[stride*8])>>1; | |||||
| src++; | |||||
| } | |||||
| #endif | |||||
| } | |||||
| /** | |||||
| * Deinterlaces the given block | |||||
| * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block | |||||
| */ | |||||
| static inline void deInterlaceInterpolateLinearLastRow(uint8_t src[], int stride) | |||||
| { | |||||
| #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |||||
| asm volatile( | |||||
| "leal (%0, %1), %%eax \n\t" | |||||
| "leal (%%eax, %1, 4), %%ebx \n\t" | |||||
| // 0 1 2 3 4 5 6 7 8 9 | |||||
| // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |||||
| "movq (%0), %%mm0 \n\t" | |||||
| "movq (%%eax, %1), %%mm1 \n\t" | |||||
| PAVGB(%%mm1, %%mm0)\ | |||||
| "movq %%mm0, (%%eax) \n\t" | |||||
| "movq (%0, %1, 4), %%mm0 \n\t" | |||||
| PAVGB(%%mm0, %%mm1)\ | |||||
| "movq %%mm1, (%%eax, %1, 2) \n\t" | |||||
| "movq (%%ebx, %1), %%mm1 \n\t" | |||||
| PAVGB(%%mm1, %%mm0)\ | |||||
| "movq %%mm0, (%%ebx) \n\t" | |||||
| "movq %%mm1, (%%ebx, %1, 2) \n\t" | |||||
| : : "r" (src), "r" (stride) | |||||
| : "%eax", "%ebx" | |||||
| ); | |||||
| #else | |||||
| int x; | |||||
| for(x=0; x<8; x++) | |||||
| { | |||||
| src[stride] = (src[0] + src[stride*2])>>1; | |||||
| src[stride*3] = (src[stride*2] + src[stride*4])>>1; | |||||
| src[stride*5] = (src[stride*4] + src[stride*6])>>1; | |||||
| src[stride*7] = src[stride*6]; | |||||
| src++; | |||||
| } | |||||
| #endif | |||||
| } | |||||
| /** | |||||
| * Deinterlaces the given block | |||||
| * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block | |||||
| * will shift the image up by 1 line (FIXME if this is a problem) | |||||
| */ | |||||
| static inline void deInterlaceBlendLinear(uint8_t src[], int stride) | |||||
| { | |||||
| #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |||||
| asm volatile( | |||||
| "leal (%0, %1), %%eax \n\t" | |||||
| "leal (%%eax, %1, 4), %%ebx \n\t" | |||||
| // 0 1 2 3 4 5 6 7 8 9 | |||||
| // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |||||
| "movq (%0), %%mm0 \n\t" // L0 | |||||
| "movq (%%eax, %1), %%mm1 \n\t" // L2 | |||||
| PAVGB(%%mm1, %%mm0) // L0+L2 | |||||
| "movq (%%eax), %%mm2 \n\t" // L1 | |||||
| PAVGB(%%mm2, %%mm0) | |||||
| "movq %%mm0, (%0) \n\t" | |||||
| "movq (%%eax, %1, 2), %%mm0 \n\t" // L3 | |||||
| PAVGB(%%mm0, %%mm2) // L1+L3 | |||||
| PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 | |||||
| "movq %%mm2, (%%eax) \n\t" | |||||
| "movq (%0, %1, 4), %%mm2 \n\t" // L4 | |||||
| PAVGB(%%mm2, %%mm1) // L2+L4 | |||||
| PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 | |||||
| "movq %%mm1, (%%eax, %1) \n\t" | |||||
| "movq (%%ebx), %%mm1 \n\t" // L5 | |||||
| PAVGB(%%mm1, %%mm0) // L3+L5 | |||||
| PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 | |||||
| "movq %%mm0, (%%eax, %1, 2) \n\t" | |||||
| "movq (%%ebx, %1), %%mm0 \n\t" // L6 | |||||
| PAVGB(%%mm0, %%mm2) // L4+L6 | |||||
| PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 | |||||
| "movq %%mm2, (%0, %1, 4) \n\t" | |||||
| "movq (%%ebx, %1, 2), %%mm2 \n\t" // L7 | |||||
| PAVGB(%%mm2, %%mm1) // L5+L7 | |||||
| PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 | |||||
| "movq %%mm1, (%%ebx) \n\t" | |||||
| "movq (%0, %1, 8), %%mm1 \n\t" // L8 | |||||
| PAVGB(%%mm1, %%mm0) // L6+L8 | |||||
| PAVGB(%%mm2, %%mm0) // 2L7 + L6 + L8 | |||||
| "movq %%mm0, (%%ebx, %1) \n\t" | |||||
| "movq (%%ebx, %1, 4), %%mm0 \n\t" // L9 | |||||
| PAVGB(%%mm0, %%mm2) // L7+L9 | |||||
| PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9 | |||||
| "movq %%mm2, (%%ebx, %1, 2) \n\t" | |||||
| : : "r" (src), "r" (stride) | |||||
| : "%eax", "%ebx" | |||||
| ); | |||||
| #else | |||||
| int x; | |||||
| for(x=0; x<8; x++) | |||||
| { | |||||
| src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; | |||||
| src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; | |||||
| src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; | |||||
| src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; | |||||
| src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; | |||||
| src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; | |||||
| src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2; | |||||
| src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2; | |||||
| src++; | |||||
| } | |||||
| #endif | |||||
| } | |||||
| /** | |||||
| * Deinterlaces the given block | |||||
| * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block | |||||
| * will shift the image up by 1 line (FIXME if this is a problem) | |||||
| */ | |||||
| static inline void deInterlaceBlendLinearLastRow(uint8_t src[], int stride) | |||||
| { | |||||
| #if defined (HAVE_MMSX2) || defined (HAVE_3DNOW) | |||||
| asm volatile( | |||||
| "leal (%0, %1), %%eax \n\t" | |||||
| "leal (%%eax, %1, 4), %%ebx \n\t" | |||||
| // 0 1 2 3 4 5 6 7 8 9 | |||||
| // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |||||
| "movq (%0), %%mm0 \n\t" // L0 | |||||
| "movq (%%eax, %1), %%mm1 \n\t" // L2 | |||||
| PAVGB(%%mm1, %%mm0) // L0+L2 | |||||
| "movq (%%eax), %%mm2 \n\t" // L1 | |||||
| PAVGB(%%mm2, %%mm0) | |||||
| "movq %%mm0, (%0) \n\t" | |||||
| "movq (%%eax, %1, 2), %%mm0 \n\t" // L3 | |||||
| PAVGB(%%mm0, %%mm2) // L1+L3 | |||||
| PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 | |||||
| "movq %%mm2, (%%eax) \n\t" | |||||
| "movq (%0, %1, 4), %%mm2 \n\t" // L4 | |||||
| PAVGB(%%mm2, %%mm1) // L2+L4 | |||||
| PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 | |||||
| "movq %%mm1, (%%eax, %1) \n\t" | |||||
| "movq (%%ebx), %%mm1 \n\t" // L5 | |||||
| PAVGB(%%mm1, %%mm0) // L3+L5 | |||||
| PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 | |||||
| "movq %%mm0, (%%eax, %1, 2) \n\t" | |||||
| "movq (%%ebx, %1), %%mm0 \n\t" // L6 | |||||
| PAVGB(%%mm0, %%mm2) // L4+L6 | |||||
| PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 | |||||
| "movq %%mm2, (%0, %1, 4) \n\t" | |||||
| "movq (%%ebx, %1, 2), %%mm2 \n\t" // L7 | |||||
| PAVGB(%%mm2, %%mm1) // L5+L7 | |||||
| PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 | |||||
| "movq %%mm1, (%%ebx) \n\t" | |||||
| PAVGB(%%mm2, %%mm0) // L7 + L8 | |||||
| "movq %%mm0, (%%ebx, %1) \n\t" | |||||
| "movq %%mm0, (%%ebx, %1, 2) \n\t" | |||||
| : : "r" (src), "r" (stride) | |||||
| : "%eax", "%ebx" | |||||
| ); | |||||
| #else | |||||
| int x; | |||||
| for(x=0; x<8; x++) | |||||
| { | |||||
| src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; | |||||
| src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; | |||||
| src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; | |||||
| src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; | |||||
| src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; | |||||
| src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; | |||||
| src[stride*6] = (src[stride*6] + src[stride*7])>>1; | |||||
| src[stride*7] = src[stride*6]; | |||||
| src++; | |||||
| } | |||||
| #endif | |||||
| } | |||||
| /** | |||||
| * Deinterlaces the given block | |||||
| * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block | |||||
| */ | |||||
| static inline void deInterlaceMedian(uint8_t src[], int stride) | |||||
| { | |||||
| #if defined (HAVE_MMX2) | |||||
| asm volatile( | |||||
| "leal (%0, %1), %%eax \n\t" | |||||
| "leal (%%eax, %1, 4), %%ebx \n\t" | |||||
| // 0 1 2 3 4 5 6 7 8 9 | |||||
| // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |||||
| "movq (%0), %%mm0 \n\t" // | |||||
| "movq (%%eax, %1), %%mm2 \n\t" // | |||||
| "movq (%%eax), %%mm1 \n\t" // | |||||
| "movq %%mm0, %%mm3 \n\t" | |||||
| "pmaxub %%mm1, %%mm0 \n\t" // | |||||
| "pminub %%mm3, %%mm1 \n\t" // | |||||
| "pmaxub %%mm2, %%mm1 \n\t" // | |||||
| "pminub %%mm1, %%mm0 \n\t" | |||||
| "movq %%mm0, (%%eax) \n\t" | |||||
| "movq (%0, %1, 4), %%mm0 \n\t" // | |||||
| "movq (%%eax, %1, 2), %%mm1 \n\t" // | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "pmaxub %%mm1, %%mm2 \n\t" // | |||||
| "pminub %%mm3, %%mm1 \n\t" // | |||||
| "pmaxub %%mm0, %%mm1 \n\t" // | |||||
| "pminub %%mm1, %%mm2 \n\t" | |||||
| "movq %%mm2, (%%eax, %1, 2) \n\t" | |||||
| "movq (%%ebx), %%mm2 \n\t" // | |||||
| "movq (%%ebx, %1), %%mm1 \n\t" // | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "pmaxub %%mm0, %%mm2 \n\t" // | |||||
| "pminub %%mm3, %%mm0 \n\t" // | |||||
| "pmaxub %%mm1, %%mm0 \n\t" // | |||||
| "pminub %%mm0, %%mm2 \n\t" | |||||
| "movq %%mm2, (%%ebx) \n\t" | |||||
| "movq (%%ebx, %1, 2), %%mm2 \n\t" // | |||||
| "movq (%0, %1, 8), %%mm0 \n\t" // | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "pmaxub %%mm0, %%mm2 \n\t" // | |||||
| "pminub %%mm3, %%mm0 \n\t" // | |||||
| "pmaxub %%mm1, %%mm0 \n\t" // | |||||
| "pminub %%mm0, %%mm2 \n\t" | |||||
| "movq %%mm2, (%%ebx, %1, 2) \n\t" | |||||
| : : "r" (src), "r" (stride) | |||||
| : "%eax", "%ebx" | |||||
| ); | |||||
| #else | |||||
| //FIXME | |||||
| int x; | |||||
| for(x=0; x<8; x++) | |||||
| { | |||||
| src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; | |||||
| src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; | |||||
| src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; | |||||
| src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; | |||||
| src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; | |||||
| src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; | |||||
| src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2; | |||||
| src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2; | |||||
| src++; | |||||
| } | |||||
| #endif | |||||
| } | |||||
| /** | |||||
| * Deinterlaces the given block | |||||
| * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block | |||||
| * will shift the image up by 1 line (FIXME if this is a problem) | |||||
| */ | |||||
| static inline void deInterlaceMedianLastRow(uint8_t src[], int stride) | |||||
| { | |||||
| #if defined (HAVE_MMX2) | |||||
| asm volatile( | |||||
| "leal (%0, %1), %%eax \n\t" | |||||
| "leal (%%eax, %1, 4), %%ebx \n\t" | |||||
| // 0 1 2 3 4 5 6 7 8 9 | |||||
| // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |||||
| "movq (%0), %%mm0 \n\t" // | |||||
| "movq (%%eax, %1), %%mm2 \n\t" // | |||||
| "movq (%%eax), %%mm1 \n\t" // | |||||
| "movq %%mm0, %%mm3 \n\t" | |||||
| "pmaxub %%mm1, %%mm0 \n\t" // | |||||
| "pminub %%mm3, %%mm1 \n\t" // | |||||
| "pmaxub %%mm2, %%mm1 \n\t" // | |||||
| "pminub %%mm1, %%mm0 \n\t" | |||||
| "movq %%mm0, (%%eax) \n\t" | |||||
| "movq (%0, %1, 4), %%mm0 \n\t" // | |||||
| "movq (%%eax, %1, 2), %%mm1 \n\t" // | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "pmaxub %%mm1, %%mm2 \n\t" // | |||||
| "pminub %%mm3, %%mm1 \n\t" // | |||||
| "pmaxub %%mm0, %%mm1 \n\t" // | |||||
| "pminub %%mm1, %%mm2 \n\t" | |||||
| "movq %%mm2, (%%eax, %1, 2) \n\t" | |||||
| "movq (%%ebx), %%mm2 \n\t" // | |||||
| "movq (%%ebx, %1), %%mm1 \n\t" // | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "pmaxub %%mm0, %%mm2 \n\t" // | |||||
| "pminub %%mm3, %%mm0 \n\t" // | |||||
| "pmaxub %%mm1, %%mm0 \n\t" // | |||||
| "pminub %%mm0, %%mm2 \n\t" | |||||
| "movq %%mm2, (%%ebx) \n\t" | |||||
| "movq %%mm1, (%%ebx, %1, 2) \n\t" | |||||
| : : "r" (src), "r" (stride) | |||||
| : "%eax", "%ebx" | |||||
| ); | |||||
| #else | |||||
| //FIXME | |||||
| int x; | |||||
| for(x=0; x<8; x++) | |||||
| { | |||||
| src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; | |||||
| src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; | |||||
| src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; | |||||
| src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; | |||||
| src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; | |||||
| src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; | |||||
| src[stride*6] = (src[stride*6] + src[stride*7])>>1; | |||||
| src[stride*7] = src[stride*6]; | |||||
| src++; | |||||
| } | |||||
| #endif | |||||
| } | |||||
| #ifdef HAVE_ODIVX_POSTPROCESS | #ifdef HAVE_ODIVX_POSTPROCESS | ||||
| #include "../opendivx/postprocess.h" | #include "../opendivx/postprocess.h" | ||||
| int use_old_pp=0; | int use_old_pp=0; | ||||
| @@ -1841,7 +2269,6 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||||
| * the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63) | * the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63) | ||||
| * -63 is best quality -1 is worst | * -63 is best quality -1 is worst | ||||
| */ | */ | ||||
| //extern "C"{ | |||||
| void postprocess(unsigned char * src[], int src_stride, | void postprocess(unsigned char * src[], int src_stride, | ||||
| unsigned char * dst[], int dst_stride, | unsigned char * dst[], int dst_stride, | ||||
| int horizontal_size, int vertical_size, | int horizontal_size, int vertical_size, | ||||
| @@ -2196,6 +2623,17 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||||
| blockCopy(vertBlock + dstStride*2, dstStride, | blockCopy(vertBlock + dstStride*2, dstStride, | ||||
| vertSrcBlock + srcStride*2, srcStride, 8, mode & LEVEL_FIX); | vertSrcBlock + srcStride*2, srcStride, 8, mode & LEVEL_FIX); | ||||
| if(mode & LINEAR_IPOL_DEINT_FILTER) | |||||
| deInterlaceInterpolateLinear(dstBlock, dstStride); | |||||
| else if(mode & LINEAR_BLEND_DEINT_FILTER) | |||||
| deInterlaceBlendLinear(dstBlock, dstStride); | |||||
| else if(mode & MEDIAN_DEINT_FILTER) | |||||
| deInterlaceMedian(dstBlock, dstStride); | |||||
| /* else if(mode & CUBIC_IPOL_DEINT_FILTER) | |||||
| deInterlaceInterpolateCubic(dstBlock, dstStride); | |||||
| else if(mode & CUBIC_BLEND_DEINT_FILTER) | |||||
| deInterlaceBlendCubic(dstBlock, dstStride); | |||||
| */ | |||||
| #ifdef MORE_TIMEING | #ifdef MORE_TIMEING | ||||
| T1= rdtsc(); | T1= rdtsc(); | ||||
| @@ -2226,9 +2664,22 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||||
| #endif | #endif | ||||
| } | } | ||||
| else | else | ||||
| { | |||||
| blockCopy(vertBlock + dstStride*1, dstStride, | blockCopy(vertBlock + dstStride*1, dstStride, | ||||
| vertSrcBlock + srcStride*1, srcStride, 4, mode & LEVEL_FIX); | vertSrcBlock + srcStride*1, srcStride, 4, mode & LEVEL_FIX); | ||||
| if(mode & LINEAR_IPOL_DEINT_FILTER) | |||||
| deInterlaceInterpolateLinearLastRow(dstBlock, dstStride); | |||||
| else if(mode & LINEAR_BLEND_DEINT_FILTER) | |||||
| deInterlaceBlendLinearLastRow(dstBlock, dstStride); | |||||
| else if(mode & MEDIAN_DEINT_FILTER) | |||||
| deInterlaceMedianLastRow(dstBlock, dstStride); | |||||
| /* else if(mode & CUBIC_IPOL_DEINT_FILTER) | |||||
| deInterlaceInterpolateCubicLastRow(dstBlock, dstStride); | |||||
| else if(mode & CUBIC_BLEND_DEINT_FILTER) | |||||
| deInterlaceBlendCubicLastRow(dstBlock, dstStride); | |||||
| */ | |||||
| } | |||||
| if(x - 8 >= 0 && x<width) | if(x - 8 >= 0 && x<width) | ||||
| { | { | ||||