Originally committed as revision 2430 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -2884,11 +2884,11 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||
| #ifdef CONFIG_ENCODERS | |||
| if(avctx->dct_algo==FF_DCT_FASTINT) { | |||
| c->fdct = fdct_ifast; | |||
| c->fdct248 = ff_fdct248_islow; // FIXME: need an optimized version | |||
| c->fdct248 = fdct_ifast248; | |||
| } | |||
| else if(avctx->dct_algo==FF_DCT_FAAN) { | |||
| c->fdct = ff_faandct; | |||
| c->fdct248 = ff_fdct248_islow; // FIXME: need an optimized version | |||
| c->fdct248 = ff_faandct248; | |||
| } | |||
| else { | |||
| c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default | |||
| @@ -36,6 +36,7 @@ | |||
| typedef short DCTELEM; | |||
| void fdct_ifast (DCTELEM *data); | |||
| void fdct_ifast248 (DCTELEM *data); | |||
| void ff_jpeg_fdct_islow (DCTELEM *data); | |||
| void ff_fdct248_islow (DCTELEM *data); | |||
| @@ -160,3 +160,89 @@ void ff_faandct(DCTELEM * data) | |||
| data[8*7 + i]= lrintf(SCALE(8*7 + i) * (z11 - z4)); | |||
| } | |||
| } | |||
| void ff_faandct248(DCTELEM * data) | |||
| { | |||
| FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; | |||
| FLOAT tmp10, tmp11, tmp12, tmp13; | |||
| FLOAT z1, z2, z3, z4, z5, z11, z13; | |||
| FLOAT temp[64]; | |||
| int i; | |||
| emms_c(); | |||
| for (i=0; i<8*8; i+=8) { | |||
| tmp0= data[0 + i] + data[7 + i]; | |||
| tmp7= data[0 + i] - data[7 + i]; | |||
| tmp1= data[1 + i] + data[6 + i]; | |||
| tmp6= data[1 + i] - data[6 + i]; | |||
| tmp2= data[2 + i] + data[5 + i]; | |||
| tmp5= data[2 + i] - data[5 + i]; | |||
| tmp3= data[3 + i] + data[4 + i]; | |||
| tmp4= data[3 + i] - data[4 + i]; | |||
| tmp10= tmp0 + tmp3; | |||
| tmp13= tmp0 - tmp3; | |||
| tmp11= tmp1 + tmp2; | |||
| tmp12= tmp1 - tmp2; | |||
| temp[0 + i]= tmp10 + tmp11; | |||
| temp[4 + i]= tmp10 - tmp11; | |||
| z1= (tmp12 + tmp13)*A1; | |||
| temp[2 + i]= tmp13 + z1; | |||
| temp[6 + i]= tmp13 - z1; | |||
| tmp10= tmp4 + tmp5; | |||
| tmp11= tmp5 + tmp6; | |||
| tmp12= tmp6 + tmp7; | |||
| z5= (tmp10 - tmp12) * A5; | |||
| z2= tmp10*A2 + z5; | |||
| z4= tmp12*A4 + z5; | |||
| z3= tmp11*A1; | |||
| z11= tmp7 + z3; | |||
| z13= tmp7 - z3; | |||
| temp[5 + i]= z13 + z2; | |||
| temp[3 + i]= z13 - z2; | |||
| temp[1 + i]= z11 + z4; | |||
| temp[7 + i]= z11 - z4; | |||
| } | |||
| for (i=0; i<8; i++) { | |||
| tmp0 = temp[8*0 + i] + temp[8*1 + i]; | |||
| tmp1 = temp[8*2 + i] + temp[8*3 + i]; | |||
| tmp2 = temp[8*4 + i] + temp[8*5 + i]; | |||
| tmp3 = temp[8*6 + i] + temp[8*7 + i]; | |||
| tmp4 = temp[8*0 + i] - temp[8*1 + i]; | |||
| tmp5 = temp[8*2 + i] - temp[8*3 + i]; | |||
| tmp6 = temp[8*4 + i] - temp[8*5 + i]; | |||
| tmp7 = temp[8*6 + i] - temp[8*7 + i]; | |||
| tmp10 = tmp0 + tmp3; | |||
| tmp11 = tmp1 + tmp2; | |||
| tmp12 = tmp1 - tmp2; | |||
| tmp13 = tmp0 - tmp3; | |||
| data[8*0 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11)); | |||
| data[8*4 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11)); | |||
| z1 = (tmp12 + tmp13)* A1; | |||
| data[8*2 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1)); | |||
| data[8*6 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1)); | |||
| tmp10 = tmp4 + tmp7; | |||
| tmp11 = tmp5 + tmp6; | |||
| tmp12 = tmp5 - tmp6; | |||
| tmp13 = tmp4 - tmp7; | |||
| data[8*1 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11)); | |||
| data[8*5 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11)); | |||
| z1 = (tmp12 + tmp13)* A1; | |||
| data[8*3 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1)); | |||
| data[8*7 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1)); | |||
| } | |||
| } | |||
| @@ -28,3 +28,4 @@ | |||
| #define FAAN_POSTSCALE | |||
| void ff_faandct(DCTELEM * data); | |||
| void ff_faandct248(DCTELEM * data); | |||
| @@ -228,6 +228,113 @@ fdct_ifast (DCTELEM * data) | |||
| } | |||
| } | |||
| /* | |||
| * Perform the forward 2-4-8 DCT on one block of samples. | |||
| */ | |||
| GLOBAL(void) | |||
| fdct_ifast248 (DCTELEM * data) | |||
| { | |||
| DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; | |||
| DCTELEM tmp10, tmp11, tmp12, tmp13; | |||
| DCTELEM z1, z2, z3, z4, z5, z11, z13; | |||
| DCTELEM *dataptr; | |||
| int ctr; | |||
| SHIFT_TEMPS | |||
| /* Pass 1: process rows. */ | |||
| dataptr = data; | |||
| for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | |||
| tmp0 = dataptr[0] + dataptr[7]; | |||
| tmp7 = dataptr[0] - dataptr[7]; | |||
| tmp1 = dataptr[1] + dataptr[6]; | |||
| tmp6 = dataptr[1] - dataptr[6]; | |||
| tmp2 = dataptr[2] + dataptr[5]; | |||
| tmp5 = dataptr[2] - dataptr[5]; | |||
| tmp3 = dataptr[3] + dataptr[4]; | |||
| tmp4 = dataptr[3] - dataptr[4]; | |||
| /* Even part */ | |||
| tmp10 = tmp0 + tmp3; /* phase 2 */ | |||
| tmp13 = tmp0 - tmp3; | |||
| tmp11 = tmp1 + tmp2; | |||
| tmp12 = tmp1 - tmp2; | |||
| dataptr[0] = tmp10 + tmp11; /* phase 3 */ | |||
| dataptr[4] = tmp10 - tmp11; | |||
| z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ | |||
| dataptr[2] = tmp13 + z1; /* phase 5 */ | |||
| dataptr[6] = tmp13 - z1; | |||
| /* Odd part */ | |||
| tmp10 = tmp4 + tmp5; /* phase 2 */ | |||
| tmp11 = tmp5 + tmp6; | |||
| tmp12 = tmp6 + tmp7; | |||
| /* The rotator is modified from fig 4-8 to avoid extra negations. */ | |||
| z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ | |||
| z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ | |||
| z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ | |||
| z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ | |||
| z11 = tmp7 + z3; /* phase 5 */ | |||
| z13 = tmp7 - z3; | |||
| dataptr[5] = z13 + z2; /* phase 6 */ | |||
| dataptr[3] = z13 - z2; | |||
| dataptr[1] = z11 + z4; | |||
| dataptr[7] = z11 - z4; | |||
| dataptr += DCTSIZE; /* advance pointer to next row */ | |||
| } | |||
| /* Pass 2: process columns. */ | |||
| dataptr = data; | |||
| for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | |||
| tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1]; | |||
| tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; | |||
| tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5]; | |||
| tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7]; | |||
| tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1]; | |||
| tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; | |||
| tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5]; | |||
| tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7]; | |||
| /* Even part */ | |||
| tmp10 = tmp0 + tmp3; | |||
| tmp11 = tmp1 + tmp2; | |||
| tmp12 = tmp1 - tmp2; | |||
| tmp13 = tmp0 - tmp3; | |||
| dataptr[DCTSIZE*0] = tmp10 + tmp11; | |||
| dataptr[DCTSIZE*4] = tmp10 - tmp11; | |||
| z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); | |||
| dataptr[DCTSIZE*2] = tmp13 + z1; | |||
| dataptr[DCTSIZE*6] = tmp13 - z1; | |||
| tmp10 = tmp4 + tmp7; | |||
| tmp11 = tmp5 + tmp6; | |||
| tmp12 = tmp5 - tmp6; | |||
| tmp13 = tmp4 - tmp7; | |||
| dataptr[DCTSIZE*1] = tmp10 + tmp11; | |||
| dataptr[DCTSIZE*5] = tmp10 - tmp11; | |||
| z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); | |||
| dataptr[DCTSIZE*3] = tmp13 + z1; | |||
| dataptr[DCTSIZE*7] = tmp13 - z1; | |||
| dataptr++; /* advance pointer to next column */ | |||
| } | |||
| } | |||
| #undef GLOBAL | |||
| #undef CONST_BITS | |||