brightness_debug (draws luminance histogram & autodetected white/black level) Originally committed as revision 3014 to svn://svn.mplayerhq.hu/mplayer/trunk/postproctags/v0.5
@@ -21,11 +21,11 @@ | |||
isVertDC Ec Ec | |||
isVertMinMaxOk Ec Ec | |||
doVertLowPass E e e | |||
doVertDefFilter Ec Ec Ec | |||
doVertDefFilter Ec Ec e e | |||
isHorizDC Ec Ec | |||
isHorizMinMaxOk a E | |||
doHorizLowPass E e e | |||
doHorizDefFilter Ec Ec Ec | |||
doHorizDefFilter Ec Ec e e | |||
deRing E e e* | |||
Vertical RKAlgo1 E a a | |||
Horizontal RKAlgo1 a a | |||
@@ -63,8 +63,6 @@ optimize c versions | |||
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks | |||
smart blur | |||
... | |||
Notes: | |||
*/ | |||
//Changelog: use the CVS log | |||
@@ -80,6 +78,7 @@ Notes: | |||
//#undef HAVE_MMX2 | |||
//#define HAVE_3DNOW | |||
//#undef HAVE_MMX | |||
//#define DEBUG_BRIGHTNESS | |||
#include "postprocess.h" | |||
#define MIN(a,b) ((a) > (b) ? (b) : (a)) | |||
@@ -1067,10 +1066,299 @@ HX1b((%0, %1, 4),(%%ebx),(%%ebx, %1),(%%ebx, %1, 2)) | |||
static inline void doVertDefFilter(uint8_t src[], int stride, int QP) | |||
{ | |||
#ifdef HAVE_MMX | |||
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |||
/* | |||
uint8_t tmp[16]; | |||
const int l1= stride; | |||
const int l2= stride + l1; | |||
const int l3= stride + l2; | |||
const int l4= (int)tmp - (int)src - stride*3; | |||
const int l5= (int)tmp - (int)src - stride*3 + 8; | |||
const int l6= stride*3 + l3; | |||
const int l7= stride + l6; | |||
const int l8= stride + l7; | |||
memcpy(tmp, src+stride*7, 8); | |||
memcpy(tmp+8, src+stride*8, 8); | |||
*/ | |||
src+= stride*4; | |||
asm volatile( | |||
#if 0 //sligtly more accurate and slightly slower | |||
"pxor %%mm7, %%mm7 \n\t" // 0 | |||
"leal (%0, %1), %%eax \n\t" | |||
"leal (%%eax, %1, 4), %%ebx \n\t" | |||
// 0 1 2 3 4 5 6 7 | |||
// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ebx+%1 ebx+2%1 | |||
// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 | |||
"movq (%0, %1, 2), %%mm0 \n\t" // l2 | |||
"movq (%0), %%mm1 \n\t" // l0 | |||
"movq %%mm0, %%mm2 \n\t" // l2 | |||
PAVGB(%%mm7, %%mm0) // ~l2/2 | |||
PAVGB(%%mm1, %%mm0) // ~(l2 + 2l0)/4 | |||
PAVGB(%%mm2, %%mm0) // ~(5l2 + 2l0)/8 | |||
"movq (%%eax), %%mm1 \n\t" // l1 | |||
"movq (%%eax, %1, 2), %%mm3 \n\t" // l3 | |||
"movq %%mm1, %%mm4 \n\t" // l1 | |||
PAVGB(%%mm7, %%mm1) // ~l1/2 | |||
PAVGB(%%mm3, %%mm1) // ~(l1 + 2l3)/4 | |||
PAVGB(%%mm4, %%mm1) // ~(5l1 + 2l3)/8 | |||
"movq %%mm0, %%mm4 \n\t" // ~(5l2 + 2l0)/8 | |||
"psubusb %%mm1, %%mm0 \n\t" | |||
"psubusb %%mm4, %%mm1 \n\t" | |||
"por %%mm0, %%mm1 \n\t" // ~|2l0 - 5l1 + 5l2 - 2l3|/8 | |||
// mm1= |lenergy|, mm2= l2, mm3= l3, mm7=0 | |||
"movq (%0, %1, 4), %%mm0 \n\t" // l4 | |||
"movq %%mm0, %%mm4 \n\t" // l4 | |||
PAVGB(%%mm7, %%mm0) // ~l4/2 | |||
PAVGB(%%mm2, %%mm0) // ~(l4 + 2l2)/4 | |||
PAVGB(%%mm4, %%mm0) // ~(5l4 + 2l2)/8 | |||
"movq (%%ebx), %%mm2 \n\t" // l5 | |||
"movq %%mm3, %%mm5 \n\t" // l3 | |||
PAVGB(%%mm7, %%mm3) // ~l3/2 | |||
PAVGB(%%mm2, %%mm3) // ~(l3 + 2l5)/4 | |||
PAVGB(%%mm5, %%mm3) // ~(5l3 + 2l5)/8 | |||
"movq %%mm0, %%mm6 \n\t" // ~(5l4 + 2l2)/8 | |||
"psubusb %%mm3, %%mm0 \n\t" | |||
"psubusb %%mm6, %%mm3 \n\t" | |||
"por %%mm0, %%mm3 \n\t" // ~|2l2 - 5l3 + 5l4 - 2l5|/8 | |||
"pcmpeqb %%mm7, %%mm0 \n\t" // SIGN(2l2 - 5l3 + 5l4 - 2l5) | |||
// mm0= SIGN(menergy), mm1= |lenergy|, mm2= l5, mm3= |menergy|, mm4=l4, mm5= l3, mm7=0 | |||
"movq (%%ebx, %1), %%mm6 \n\t" // l6 | |||
"movq %%mm6, %%mm5 \n\t" // l6 | |||
PAVGB(%%mm7, %%mm6) // ~l6/2 | |||
PAVGB(%%mm4, %%mm6) // ~(l6 + 2l4)/4 | |||
PAVGB(%%mm5, %%mm6) // ~(5l6 + 2l4)/8 | |||
"movq (%%ebx, %1, 2), %%mm5 \n\t" // l7 | |||
"movq %%mm2, %%mm4 \n\t" // l5 | |||
PAVGB(%%mm7, %%mm2) // ~l5/2 | |||
PAVGB(%%mm5, %%mm2) // ~(l5 + 2l7)/4 | |||
PAVGB(%%mm4, %%mm2) // ~(5l5 + 2l7)/8 | |||
"movq %%mm6, %%mm4 \n\t" // ~(5l6 + 2l4)/8 | |||
"psubusb %%mm2, %%mm6 \n\t" | |||
"psubusb %%mm4, %%mm2 \n\t" | |||
"por %%mm6, %%mm2 \n\t" // ~|2l4 - 5l5 + 5l6 - 2l7|/8 | |||
// mm0= SIGN(menergy), mm1= |lenergy|/8, mm2= |renergy|/8, mm3= |menergy|/8, mm7=0 | |||
PMINUB(%%mm2, %%mm1, %%mm4) // MIN(|lenergy|,|renergy|)/8 | |||
"movq pQPb, %%mm4 \n\t" // QP //FIXME QP+1 ? | |||
"paddusb b01, %%mm4 \n\t" | |||
"pcmpgtb %%mm3, %%mm4 \n\t" // |menergy|/8 < QP | |||
"psubusb %%mm1, %%mm3 \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8 | |||
"pand %%mm4, %%mm3 \n\t" | |||
"movq %%mm3, %%mm1 \n\t" | |||
// "psubusb b01, %%mm3 \n\t" | |||
PAVGB(%%mm7, %%mm3) | |||
PAVGB(%%mm7, %%mm3) | |||
"paddusb %%mm1, %%mm3 \n\t" | |||
// "paddusb b01, %%mm3 \n\t" | |||
"movq (%%eax, %1, 2), %%mm6 \n\t" //l3 | |||
"movq (%0, %1, 4), %%mm5 \n\t" //l4 | |||
"movq (%0, %1, 4), %%mm4 \n\t" //l4 | |||
"psubusb %%mm6, %%mm5 \n\t" | |||
"psubusb %%mm4, %%mm6 \n\t" | |||
"por %%mm6, %%mm5 \n\t" // |l3-l4| | |||
"pcmpeqb %%mm7, %%mm6 \n\t" // SIGN(l3-l4) | |||
"pxor %%mm6, %%mm0 \n\t" | |||
"pand %%mm0, %%mm3 \n\t" | |||
PMINUB(%%mm5, %%mm3, %%mm0) | |||
"psubusb b01, %%mm3 \n\t" | |||
PAVGB(%%mm7, %%mm3) | |||
"movq (%%eax, %1, 2), %%mm0 \n\t" | |||
"movq (%0, %1, 4), %%mm2 \n\t" | |||
"pxor %%mm6, %%mm0 \n\t" | |||
"pxor %%mm6, %%mm2 \n\t" | |||
"psubb %%mm3, %%mm0 \n\t" | |||
"paddb %%mm3, %%mm2 \n\t" | |||
"pxor %%mm6, %%mm0 \n\t" | |||
"pxor %%mm6, %%mm2 \n\t" | |||
"movq %%mm0, (%%eax, %1, 2) \n\t" | |||
"movq %%mm2, (%0, %1, 4) \n\t" | |||
#endif | |||
"leal (%0, %1), %%eax \n\t" | |||
"pcmpeqb %%mm6, %%mm6 \n\t" // -1 | |||
// 0 1 2 3 4 5 6 7 | |||
// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ebx+%1 ebx+2%1 | |||
// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 | |||
"movq (%%eax, %1, 2), %%mm1 \n\t" // l3 | |||
"movq (%0, %1, 4), %%mm0 \n\t" // l4 | |||
"pxor %%mm6, %%mm1 \n\t" // -l3-1 | |||
PAVGB(%%mm1, %%mm0) // -q+128 = (l4-l3+256)/2 | |||
// mm1=-l3-1, mm0=128-q | |||
"movq (%%eax, %1, 4), %%mm2 \n\t" // l5 | |||
"movq (%%eax, %1), %%mm3 \n\t" // l2 | |||
"pxor %%mm6, %%mm2 \n\t" // -l5-1 | |||
"movq %%mm2, %%mm5 \n\t" // -l5-1 | |||
"movq b80, %%mm4 \n\t" // 128 | |||
"leal (%%eax, %1, 4), %%ebx \n\t" | |||
PAVGB(%%mm3, %%mm2) // (l2-l5+256)/2 | |||
PAVGB(%%mm0, %%mm4) // ~(l4-l3)/4 + 128 | |||
PAVGB(%%mm2, %%mm4) // ~(l2-l5)/4 +(l4-l3)/8 + 128 | |||
PAVGB(%%mm0, %%mm4) // ~(l2-l5)/8 +5(l4-l3)/16 + 128 | |||
// mm1=-l3-1, mm0=128-q, mm3=l2, mm4=menergy/16 + 128, mm5= -l5-1 | |||
"movq (%%eax), %%mm2 \n\t" // l1 | |||
"pxor %%mm6, %%mm2 \n\t" // -l1-1 | |||
PAVGB(%%mm3, %%mm2) // (l2-l1+256)/2 | |||
PAVGB((%0), %%mm1) // (l0-l3+256)/2 | |||
"movq b80, %%mm3 \n\t" // 128 | |||
PAVGB(%%mm2, %%mm3) // ~(l2-l1)/4 + 128 | |||
PAVGB(%%mm1, %%mm3) // ~(l0-l3)/4 +(l2-l1)/8 + 128 | |||
PAVGB(%%mm2, %%mm3) // ~(l0-l3)/8 +5(l2-l1)/16 + 128 | |||
// mm0=128-q, mm3=lenergy/16 + 128, mm4= menergy/16 + 128, mm5= -l5-1 | |||
PAVGB((%%ebx, %1), %%mm5) // (l6-l5+256)/2 | |||
"movq (%%ebx, %1, 2), %%mm1 \n\t" // l7 | |||
"pxor %%mm6, %%mm1 \n\t" // -l7-1 | |||
PAVGB((%0, %1, 4), %%mm1) // (l4-l7+256)/2 | |||
"movq b80, %%mm2 \n\t" // 128 | |||
PAVGB(%%mm5, %%mm2) // ~(l6-l5)/4 + 128 | |||
PAVGB(%%mm1, %%mm2) // ~(l4-l7)/4 +(l6-l5)/8 + 128 | |||
PAVGB(%%mm5, %%mm2) // ~(l4-l7)/8 +5(l6-l5)/16 + 128 | |||
// mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128 | |||
"movq b00, %%mm1 \n\t" // 0 | |||
"movq b00, %%mm5 \n\t" // 0 | |||
"psubb %%mm2, %%mm1 \n\t" // 128 - renergy/16 | |||
"psubb %%mm3, %%mm5 \n\t" // 128 - lenergy/16 | |||
PMAXUB(%%mm1, %%mm2) // 128 + |renergy/16| | |||
PMAXUB(%%mm5, %%mm3) // 128 + |lenergy/16| | |||
PMINUB(%%mm2, %%mm3, %%mm1) // 128 + MIN(|lenergy|,|renergy|)/16 | |||
// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128 | |||
"movq b00, %%mm7 \n\t" // 0 | |||
"movq pQPb, %%mm2 \n\t" // QP | |||
PAVGB(%%mm6, %%mm2) // 128 + QP/2 | |||
"psubb %%mm6, %%mm2 \n\t" | |||
"movq %%mm4, %%mm1 \n\t" | |||
"pcmpgtb %%mm7, %%mm1 \n\t" // SIGN(menergy) | |||
"pxor %%mm1, %%mm4 \n\t" | |||
"psubb %%mm1, %%mm4 \n\t" // 128 + |menergy|/16 | |||
"pcmpgtb %%mm4, %%mm2 \n\t" // |menergy|/16 < QP/2 | |||
"psubusb %%mm3, %%mm4 \n\t" //d=|menergy|/16 - MIN(|lenergy|,|renergy|)/16 | |||
// mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16 | |||
"movq %%mm4, %%mm3 \n\t" // d | |||
"psubusb b01, %%mm4 \n\t" | |||
PAVGB(%%mm7, %%mm4) // d/32 | |||
PAVGB(%%mm7, %%mm4) // (d + 32)/64 | |||
"paddb %%mm3, %%mm4 \n\t" // 5d/64 | |||
"pand %%mm2, %%mm4 \n\t" | |||
"movq b80, %%mm5 \n\t" // 128 | |||
"psubb %%mm0, %%mm5 \n\t" // q | |||
"paddsb %%mm6, %%mm5 \n\t" // fix bad rounding | |||
"pcmpgtb %%mm5, %%mm7 \n\t" // SIGN(q) | |||
"pxor %%mm7, %%mm5 \n\t" | |||
PMINUB(%%mm5, %%mm4, %%mm3) // MIN(|q|, 5d/64) | |||
"pxor %%mm1, %%mm7 \n\t" // SIGN(d*q) | |||
"pand %%mm7, %%mm4 \n\t" | |||
"movq (%%eax, %1, 2), %%mm0 \n\t" | |||
"movq (%0, %1, 4), %%mm2 \n\t" | |||
"pxor %%mm1, %%mm0 \n\t" | |||
"pxor %%mm1, %%mm2 \n\t" | |||
"paddb %%mm4, %%mm0 \n\t" | |||
"psubb %%mm4, %%mm2 \n\t" | |||
"pxor %%mm1, %%mm0 \n\t" | |||
"pxor %%mm1, %%mm2 \n\t" | |||
"movq %%mm0, (%%eax, %1, 2) \n\t" | |||
"movq %%mm2, (%0, %1, 4) \n\t" | |||
: | |||
: "r" (src), "r" (stride) | |||
: "%eax", "%ebx" | |||
); | |||
/* | |||
{ | |||
int x; | |||
src-= stride; | |||
for(x=0; x<BLOCK_SIZE; x++) | |||
{ | |||
const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]); | |||
if(ABS(middleEnergy)< 8*QP) | |||
{ | |||
const int q=(src[l4] - src[l5])/2; | |||
const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]); | |||
const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]); | |||
int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); | |||
d= MAX(d, 0); | |||
d= (5*d + 32) >> 6; | |||
d*= SIGN(-middleEnergy); | |||
if(q>0) | |||
{ | |||
d= d<0 ? 0 : d; | |||
d= d>q ? q : d; | |||
} | |||
else | |||
{ | |||
d= d>0 ? 0 : d; | |||
d= d<q ? q : d; | |||
} | |||
src[l4]-= d; | |||
src[l5]+= d; | |||
} | |||
src++; | |||
} | |||
src-=8; | |||
for(x=0; x<8; x++) | |||
{ | |||
int y; | |||
for(y=4; y<6; y++) | |||
{ | |||
int d= src[x+y*stride] - tmp[x+(y-4)*8]; | |||
int ad= ABS(d); | |||
static int max=0; | |||
static int sum=0; | |||
static int num=0; | |||
static int bias=0; | |||
if(max<ad) max=ad; | |||
sum+= ad>3 ? 1 : 0; | |||
if(ad>3) | |||
{ | |||
src[0] = src[7] = src[stride*7] = src[(stride+1)*7]=255; | |||
} | |||
if(y==4) bias+=d; | |||
num++; | |||
if(num%1000000 == 0) | |||
{ | |||
printf(" %d %d %d %d\n", num, sum, max, bias); | |||
} | |||
} | |||
} | |||
} | |||
*/ | |||
#elif defined (HAVE_MMX) | |||
src+= stride*4; | |||
//FIXME try pmul for *5 stuff | |||
// src[0]=0; | |||
asm volatile( | |||
"pxor %%mm7, %%mm7 \n\t" | |||
"leal (%0, %1), %%eax \n\t" | |||
@@ -3961,7 +4249,17 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||
uint8_t *dstBlock= &(dst[y*dstStride]); | |||
memcpy(dstBlock, tempDst + dstStride, dstStride*(height-y) ); | |||
} | |||
} | |||
/* | |||
for(x=0; x<width; x+=32) | |||
{ | |||
int i; | |||
i+= + dstBlock[x + 7*dstStride] + dstBlock[x + 8*dstStride] | |||
+ dstBlock[x + 9*dstStride] + dstBlock[x +10*dstStride] | |||
+ dstBlock[x +11*dstStride] + dstBlock[x +12*dstStride] | |||
+ dstBlock[x +13*dstStride] + dstBlock[x +14*dstStride] | |||
+ dstBlock[x +15*dstStride]; | |||
} | |||
*/ } | |||
#ifdef HAVE_3DNOW | |||
asm volatile("femms"); | |||
#elif defined (HAVE_MMX) | |||
@@ -3977,4 +4275,31 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||
(int)(sumTime/1000), (int)((sumTime-memcpyTime-vertTime-horizTime)/1000) | |||
, black, white); | |||
#endif | |||
#ifdef DEBUG_BRIGHTNESS | |||
if(!isColor) | |||
{ | |||
int max=1; | |||
int i; | |||
for(i=0; i<256; i++) | |||
if(yHistogram[i] > max) max=yHistogram[i]; | |||
for(i=1; i<256; i++) | |||
{ | |||
int x; | |||
int start=yHistogram[i-1]/(max/256+1); | |||
int end=yHistogram[i]/(max/256+1); | |||
int inc= end > start ? 1 : -1; | |||
for(x=start; x!=end+inc; x+=inc) | |||
dst[ i*dstStride + x]+=128; | |||
} | |||
for(i=0; i<100; i+=2) | |||
{ | |||
dst[ (white)*dstStride + i]+=128; | |||
dst[ (black)*dstStride + i]+=128; | |||
} | |||
} | |||
#endif | |||
} |
@@ -21,11 +21,11 @@ | |||
isVertDC Ec Ec | |||
isVertMinMaxOk Ec Ec | |||
doVertLowPass E e e | |||
doVertDefFilter Ec Ec Ec | |||
doVertDefFilter Ec Ec e e | |||
isHorizDC Ec Ec | |||
isHorizMinMaxOk a E | |||
doHorizLowPass E e e | |||
doHorizDefFilter Ec Ec Ec | |||
doHorizDefFilter Ec Ec e e | |||
deRing E e e* | |||
Vertical RKAlgo1 E a a | |||
Horizontal RKAlgo1 a a | |||
@@ -63,8 +63,6 @@ optimize c versions | |||
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks | |||
smart blur | |||
... | |||
Notes: | |||
*/ | |||
//Changelog: use the CVS log | |||
@@ -80,6 +78,7 @@ Notes: | |||
//#undef HAVE_MMX2 | |||
//#define HAVE_3DNOW | |||
//#undef HAVE_MMX | |||
//#define DEBUG_BRIGHTNESS | |||
#include "postprocess.h" | |||
#define MIN(a,b) ((a) > (b) ? (b) : (a)) | |||
@@ -1067,10 +1066,299 @@ HX1b((%0, %1, 4),(%%ebx),(%%ebx, %1),(%%ebx, %1, 2)) | |||
static inline void doVertDefFilter(uint8_t src[], int stride, int QP) | |||
{ | |||
#ifdef HAVE_MMX | |||
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |||
/* | |||
uint8_t tmp[16]; | |||
const int l1= stride; | |||
const int l2= stride + l1; | |||
const int l3= stride + l2; | |||
const int l4= (int)tmp - (int)src - stride*3; | |||
const int l5= (int)tmp - (int)src - stride*3 + 8; | |||
const int l6= stride*3 + l3; | |||
const int l7= stride + l6; | |||
const int l8= stride + l7; | |||
memcpy(tmp, src+stride*7, 8); | |||
memcpy(tmp+8, src+stride*8, 8); | |||
*/ | |||
src+= stride*4; | |||
asm volatile( | |||
#if 0 //sligtly more accurate and slightly slower | |||
"pxor %%mm7, %%mm7 \n\t" // 0 | |||
"leal (%0, %1), %%eax \n\t" | |||
"leal (%%eax, %1, 4), %%ebx \n\t" | |||
// 0 1 2 3 4 5 6 7 | |||
// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ebx+%1 ebx+2%1 | |||
// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 | |||
"movq (%0, %1, 2), %%mm0 \n\t" // l2 | |||
"movq (%0), %%mm1 \n\t" // l0 | |||
"movq %%mm0, %%mm2 \n\t" // l2 | |||
PAVGB(%%mm7, %%mm0) // ~l2/2 | |||
PAVGB(%%mm1, %%mm0) // ~(l2 + 2l0)/4 | |||
PAVGB(%%mm2, %%mm0) // ~(5l2 + 2l0)/8 | |||
"movq (%%eax), %%mm1 \n\t" // l1 | |||
"movq (%%eax, %1, 2), %%mm3 \n\t" // l3 | |||
"movq %%mm1, %%mm4 \n\t" // l1 | |||
PAVGB(%%mm7, %%mm1) // ~l1/2 | |||
PAVGB(%%mm3, %%mm1) // ~(l1 + 2l3)/4 | |||
PAVGB(%%mm4, %%mm1) // ~(5l1 + 2l3)/8 | |||
"movq %%mm0, %%mm4 \n\t" // ~(5l2 + 2l0)/8 | |||
"psubusb %%mm1, %%mm0 \n\t" | |||
"psubusb %%mm4, %%mm1 \n\t" | |||
"por %%mm0, %%mm1 \n\t" // ~|2l0 - 5l1 + 5l2 - 2l3|/8 | |||
// mm1= |lenergy|, mm2= l2, mm3= l3, mm7=0 | |||
"movq (%0, %1, 4), %%mm0 \n\t" // l4 | |||
"movq %%mm0, %%mm4 \n\t" // l4 | |||
PAVGB(%%mm7, %%mm0) // ~l4/2 | |||
PAVGB(%%mm2, %%mm0) // ~(l4 + 2l2)/4 | |||
PAVGB(%%mm4, %%mm0) // ~(5l4 + 2l2)/8 | |||
"movq (%%ebx), %%mm2 \n\t" // l5 | |||
"movq %%mm3, %%mm5 \n\t" // l3 | |||
PAVGB(%%mm7, %%mm3) // ~l3/2 | |||
PAVGB(%%mm2, %%mm3) // ~(l3 + 2l5)/4 | |||
PAVGB(%%mm5, %%mm3) // ~(5l3 + 2l5)/8 | |||
"movq %%mm0, %%mm6 \n\t" // ~(5l4 + 2l2)/8 | |||
"psubusb %%mm3, %%mm0 \n\t" | |||
"psubusb %%mm6, %%mm3 \n\t" | |||
"por %%mm0, %%mm3 \n\t" // ~|2l2 - 5l3 + 5l4 - 2l5|/8 | |||
"pcmpeqb %%mm7, %%mm0 \n\t" // SIGN(2l2 - 5l3 + 5l4 - 2l5) | |||
// mm0= SIGN(menergy), mm1= |lenergy|, mm2= l5, mm3= |menergy|, mm4=l4, mm5= l3, mm7=0 | |||
"movq (%%ebx, %1), %%mm6 \n\t" // l6 | |||
"movq %%mm6, %%mm5 \n\t" // l6 | |||
PAVGB(%%mm7, %%mm6) // ~l6/2 | |||
PAVGB(%%mm4, %%mm6) // ~(l6 + 2l4)/4 | |||
PAVGB(%%mm5, %%mm6) // ~(5l6 + 2l4)/8 | |||
"movq (%%ebx, %1, 2), %%mm5 \n\t" // l7 | |||
"movq %%mm2, %%mm4 \n\t" // l5 | |||
PAVGB(%%mm7, %%mm2) // ~l5/2 | |||
PAVGB(%%mm5, %%mm2) // ~(l5 + 2l7)/4 | |||
PAVGB(%%mm4, %%mm2) // ~(5l5 + 2l7)/8 | |||
"movq %%mm6, %%mm4 \n\t" // ~(5l6 + 2l4)/8 | |||
"psubusb %%mm2, %%mm6 \n\t" | |||
"psubusb %%mm4, %%mm2 \n\t" | |||
"por %%mm6, %%mm2 \n\t" // ~|2l4 - 5l5 + 5l6 - 2l7|/8 | |||
// mm0= SIGN(menergy), mm1= |lenergy|/8, mm2= |renergy|/8, mm3= |menergy|/8, mm7=0 | |||
PMINUB(%%mm2, %%mm1, %%mm4) // MIN(|lenergy|,|renergy|)/8 | |||
"movq pQPb, %%mm4 \n\t" // QP //FIXME QP+1 ? | |||
"paddusb b01, %%mm4 \n\t" | |||
"pcmpgtb %%mm3, %%mm4 \n\t" // |menergy|/8 < QP | |||
"psubusb %%mm1, %%mm3 \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8 | |||
"pand %%mm4, %%mm3 \n\t" | |||
"movq %%mm3, %%mm1 \n\t" | |||
// "psubusb b01, %%mm3 \n\t" | |||
PAVGB(%%mm7, %%mm3) | |||
PAVGB(%%mm7, %%mm3) | |||
"paddusb %%mm1, %%mm3 \n\t" | |||
// "paddusb b01, %%mm3 \n\t" | |||
"movq (%%eax, %1, 2), %%mm6 \n\t" //l3 | |||
"movq (%0, %1, 4), %%mm5 \n\t" //l4 | |||
"movq (%0, %1, 4), %%mm4 \n\t" //l4 | |||
"psubusb %%mm6, %%mm5 \n\t" | |||
"psubusb %%mm4, %%mm6 \n\t" | |||
"por %%mm6, %%mm5 \n\t" // |l3-l4| | |||
"pcmpeqb %%mm7, %%mm6 \n\t" // SIGN(l3-l4) | |||
"pxor %%mm6, %%mm0 \n\t" | |||
"pand %%mm0, %%mm3 \n\t" | |||
PMINUB(%%mm5, %%mm3, %%mm0) | |||
"psubusb b01, %%mm3 \n\t" | |||
PAVGB(%%mm7, %%mm3) | |||
"movq (%%eax, %1, 2), %%mm0 \n\t" | |||
"movq (%0, %1, 4), %%mm2 \n\t" | |||
"pxor %%mm6, %%mm0 \n\t" | |||
"pxor %%mm6, %%mm2 \n\t" | |||
"psubb %%mm3, %%mm0 \n\t" | |||
"paddb %%mm3, %%mm2 \n\t" | |||
"pxor %%mm6, %%mm0 \n\t" | |||
"pxor %%mm6, %%mm2 \n\t" | |||
"movq %%mm0, (%%eax, %1, 2) \n\t" | |||
"movq %%mm2, (%0, %1, 4) \n\t" | |||
#endif | |||
"leal (%0, %1), %%eax \n\t" | |||
"pcmpeqb %%mm6, %%mm6 \n\t" // -1 | |||
// 0 1 2 3 4 5 6 7 | |||
// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ebx+%1 ebx+2%1 | |||
// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 | |||
"movq (%%eax, %1, 2), %%mm1 \n\t" // l3 | |||
"movq (%0, %1, 4), %%mm0 \n\t" // l4 | |||
"pxor %%mm6, %%mm1 \n\t" // -l3-1 | |||
PAVGB(%%mm1, %%mm0) // -q+128 = (l4-l3+256)/2 | |||
// mm1=-l3-1, mm0=128-q | |||
"movq (%%eax, %1, 4), %%mm2 \n\t" // l5 | |||
"movq (%%eax, %1), %%mm3 \n\t" // l2 | |||
"pxor %%mm6, %%mm2 \n\t" // -l5-1 | |||
"movq %%mm2, %%mm5 \n\t" // -l5-1 | |||
"movq b80, %%mm4 \n\t" // 128 | |||
"leal (%%eax, %1, 4), %%ebx \n\t" | |||
PAVGB(%%mm3, %%mm2) // (l2-l5+256)/2 | |||
PAVGB(%%mm0, %%mm4) // ~(l4-l3)/4 + 128 | |||
PAVGB(%%mm2, %%mm4) // ~(l2-l5)/4 +(l4-l3)/8 + 128 | |||
PAVGB(%%mm0, %%mm4) // ~(l2-l5)/8 +5(l4-l3)/16 + 128 | |||
// mm1=-l3-1, mm0=128-q, mm3=l2, mm4=menergy/16 + 128, mm5= -l5-1 | |||
"movq (%%eax), %%mm2 \n\t" // l1 | |||
"pxor %%mm6, %%mm2 \n\t" // -l1-1 | |||
PAVGB(%%mm3, %%mm2) // (l2-l1+256)/2 | |||
PAVGB((%0), %%mm1) // (l0-l3+256)/2 | |||
"movq b80, %%mm3 \n\t" // 128 | |||
PAVGB(%%mm2, %%mm3) // ~(l2-l1)/4 + 128 | |||
PAVGB(%%mm1, %%mm3) // ~(l0-l3)/4 +(l2-l1)/8 + 128 | |||
PAVGB(%%mm2, %%mm3) // ~(l0-l3)/8 +5(l2-l1)/16 + 128 | |||
// mm0=128-q, mm3=lenergy/16 + 128, mm4= menergy/16 + 128, mm5= -l5-1 | |||
PAVGB((%%ebx, %1), %%mm5) // (l6-l5+256)/2 | |||
"movq (%%ebx, %1, 2), %%mm1 \n\t" // l7 | |||
"pxor %%mm6, %%mm1 \n\t" // -l7-1 | |||
PAVGB((%0, %1, 4), %%mm1) // (l4-l7+256)/2 | |||
"movq b80, %%mm2 \n\t" // 128 | |||
PAVGB(%%mm5, %%mm2) // ~(l6-l5)/4 + 128 | |||
PAVGB(%%mm1, %%mm2) // ~(l4-l7)/4 +(l6-l5)/8 + 128 | |||
PAVGB(%%mm5, %%mm2) // ~(l4-l7)/8 +5(l6-l5)/16 + 128 | |||
// mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128 | |||
"movq b00, %%mm1 \n\t" // 0 | |||
"movq b00, %%mm5 \n\t" // 0 | |||
"psubb %%mm2, %%mm1 \n\t" // 128 - renergy/16 | |||
"psubb %%mm3, %%mm5 \n\t" // 128 - lenergy/16 | |||
PMAXUB(%%mm1, %%mm2) // 128 + |renergy/16| | |||
PMAXUB(%%mm5, %%mm3) // 128 + |lenergy/16| | |||
PMINUB(%%mm2, %%mm3, %%mm1) // 128 + MIN(|lenergy|,|renergy|)/16 | |||
// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128 | |||
"movq b00, %%mm7 \n\t" // 0 | |||
"movq pQPb, %%mm2 \n\t" // QP | |||
PAVGB(%%mm6, %%mm2) // 128 + QP/2 | |||
"psubb %%mm6, %%mm2 \n\t" | |||
"movq %%mm4, %%mm1 \n\t" | |||
"pcmpgtb %%mm7, %%mm1 \n\t" // SIGN(menergy) | |||
"pxor %%mm1, %%mm4 \n\t" | |||
"psubb %%mm1, %%mm4 \n\t" // 128 + |menergy|/16 | |||
"pcmpgtb %%mm4, %%mm2 \n\t" // |menergy|/16 < QP/2 | |||
"psubusb %%mm3, %%mm4 \n\t" //d=|menergy|/16 - MIN(|lenergy|,|renergy|)/16 | |||
// mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16 | |||
"movq %%mm4, %%mm3 \n\t" // d | |||
"psubusb b01, %%mm4 \n\t" | |||
PAVGB(%%mm7, %%mm4) // d/32 | |||
PAVGB(%%mm7, %%mm4) // (d + 32)/64 | |||
"paddb %%mm3, %%mm4 \n\t" // 5d/64 | |||
"pand %%mm2, %%mm4 \n\t" | |||
"movq b80, %%mm5 \n\t" // 128 | |||
"psubb %%mm0, %%mm5 \n\t" // q | |||
"paddsb %%mm6, %%mm5 \n\t" // fix bad rounding | |||
"pcmpgtb %%mm5, %%mm7 \n\t" // SIGN(q) | |||
"pxor %%mm7, %%mm5 \n\t" | |||
PMINUB(%%mm5, %%mm4, %%mm3) // MIN(|q|, 5d/64) | |||
"pxor %%mm1, %%mm7 \n\t" // SIGN(d*q) | |||
"pand %%mm7, %%mm4 \n\t" | |||
"movq (%%eax, %1, 2), %%mm0 \n\t" | |||
"movq (%0, %1, 4), %%mm2 \n\t" | |||
"pxor %%mm1, %%mm0 \n\t" | |||
"pxor %%mm1, %%mm2 \n\t" | |||
"paddb %%mm4, %%mm0 \n\t" | |||
"psubb %%mm4, %%mm2 \n\t" | |||
"pxor %%mm1, %%mm0 \n\t" | |||
"pxor %%mm1, %%mm2 \n\t" | |||
"movq %%mm0, (%%eax, %1, 2) \n\t" | |||
"movq %%mm2, (%0, %1, 4) \n\t" | |||
: | |||
: "r" (src), "r" (stride) | |||
: "%eax", "%ebx" | |||
); | |||
/* | |||
{ | |||
int x; | |||
src-= stride; | |||
for(x=0; x<BLOCK_SIZE; x++) | |||
{ | |||
const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]); | |||
if(ABS(middleEnergy)< 8*QP) | |||
{ | |||
const int q=(src[l4] - src[l5])/2; | |||
const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]); | |||
const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]); | |||
int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); | |||
d= MAX(d, 0); | |||
d= (5*d + 32) >> 6; | |||
d*= SIGN(-middleEnergy); | |||
if(q>0) | |||
{ | |||
d= d<0 ? 0 : d; | |||
d= d>q ? q : d; | |||
} | |||
else | |||
{ | |||
d= d>0 ? 0 : d; | |||
d= d<q ? q : d; | |||
} | |||
src[l4]-= d; | |||
src[l5]+= d; | |||
} | |||
src++; | |||
} | |||
src-=8; | |||
for(x=0; x<8; x++) | |||
{ | |||
int y; | |||
for(y=4; y<6; y++) | |||
{ | |||
int d= src[x+y*stride] - tmp[x+(y-4)*8]; | |||
int ad= ABS(d); | |||
static int max=0; | |||
static int sum=0; | |||
static int num=0; | |||
static int bias=0; | |||
if(max<ad) max=ad; | |||
sum+= ad>3 ? 1 : 0; | |||
if(ad>3) | |||
{ | |||
src[0] = src[7] = src[stride*7] = src[(stride+1)*7]=255; | |||
} | |||
if(y==4) bias+=d; | |||
num++; | |||
if(num%1000000 == 0) | |||
{ | |||
printf(" %d %d %d %d\n", num, sum, max, bias); | |||
} | |||
} | |||
} | |||
} | |||
*/ | |||
#elif defined (HAVE_MMX) | |||
src+= stride*4; | |||
//FIXME try pmul for *5 stuff | |||
// src[0]=0; | |||
asm volatile( | |||
"pxor %%mm7, %%mm7 \n\t" | |||
"leal (%0, %1), %%eax \n\t" | |||
@@ -3961,7 +4249,17 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||
uint8_t *dstBlock= &(dst[y*dstStride]); | |||
memcpy(dstBlock, tempDst + dstStride, dstStride*(height-y) ); | |||
} | |||
} | |||
/* | |||
for(x=0; x<width; x+=32) | |||
{ | |||
int i; | |||
i+= + dstBlock[x + 7*dstStride] + dstBlock[x + 8*dstStride] | |||
+ dstBlock[x + 9*dstStride] + dstBlock[x +10*dstStride] | |||
+ dstBlock[x +11*dstStride] + dstBlock[x +12*dstStride] | |||
+ dstBlock[x +13*dstStride] + dstBlock[x +14*dstStride] | |||
+ dstBlock[x +15*dstStride]; | |||
} | |||
*/ } | |||
#ifdef HAVE_3DNOW | |||
asm volatile("femms"); | |||
#elif defined (HAVE_MMX) | |||
@@ -3977,4 +4275,31 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri | |||
(int)(sumTime/1000), (int)((sumTime-memcpyTime-vertTime-horizTime)/1000) | |||
, black, white); | |||
#endif | |||
#ifdef DEBUG_BRIGHTNESS | |||
if(!isColor) | |||
{ | |||
int max=1; | |||
int i; | |||
for(i=0; i<256; i++) | |||
if(yHistogram[i] > max) max=yHistogram[i]; | |||
for(i=1; i<256; i++) | |||
{ | |||
int x; | |||
int start=yHistogram[i-1]/(max/256+1); | |||
int end=yHistogram[i]/(max/256+1); | |||
int inc= end > start ? 1 : -1; | |||
for(x=start; x!=end+inc; x+=inc) | |||
dst[ i*dstStride + x]+=128; | |||
} | |||
for(i=0; i<100; i+=2) | |||
{ | |||
dst[ (white)*dstStride + i]+=128; | |||
dst[ (black)*dstStride + i]+=128; | |||
} | |||
} | |||
#endif | |||
} |