fixing some threadunsafe code Originally committed as revision 980 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -117,6 +117,7 @@ static int do_deinterlace = 0; | |||
| static int workaround_bugs = 0; | |||
| static int error_resilience = 0; | |||
| static int dct_algo = 0; | |||
| static int idct_algo = 0; | |||
| static int use_part = 0; | |||
| static int packet_size = 0; | |||
| @@ -1392,6 +1393,12 @@ void opt_dct_algo(const char *arg) | |||
| dct_algo = atoi(arg); | |||
| } | |||
| void opt_idct_algo(const char *arg) | |||
| { | |||
| idct_algo = atoi(arg); | |||
| } | |||
| void opt_error_resilience(const char *arg) | |||
| { | |||
| error_resilience = atoi(arg); | |||
| @@ -1750,6 +1757,7 @@ void opt_input_file(const char *filename) | |||
| rfps = ic->streams[i]->r_frame_rate; | |||
| enc->workaround_bugs = workaround_bugs; | |||
| enc->error_resilience = error_resilience; | |||
| enc->idct_algo= idct_algo; | |||
| if (enc->frame_rate != rfps) { | |||
| fprintf(stderr,"\nSeems that stream %d comes from film source: %2.2f->%2.2f\n", | |||
| i, (float)enc->frame_rate / FRAME_RATE_BASE, | |||
| @@ -1922,6 +1930,7 @@ void opt_output_file(const char *filename) | |||
| video_enc->i_quant_offset = video_i_qoffset; | |||
| video_enc->b_quant_offset = video_b_qoffset; | |||
| video_enc->dct_algo = dct_algo; | |||
| video_enc->idct_algo = idct_algo; | |||
| if(packet_size){ | |||
| video_enc->rtp_mode= 1; | |||
| video_enc->rtp_payload_size= packet_size; | |||
| @@ -2287,6 +2296,7 @@ const OptionDef options[] = { | |||
| { "me", HAS_ARG | OPT_EXPERT, {(void*)opt_motion_estimation}, "set motion estimation method", | |||
| "method" }, | |||
| { "dct_algo", HAS_ARG | OPT_EXPERT, {(void*)opt_dct_algo}, "set dct algo", "algo" }, | |||
| { "idct_algo", HAS_ARG | OPT_EXPERT, {(void*)opt_idct_algo}, "set idct algo", "algo" }, | |||
| { "er", HAS_ARG | OPT_EXPERT, {(void*)opt_error_resilience}, "set error resilience", "" }, | |||
| { "bf", HAS_ARG | OPT_EXPERT, {(void*)opt_b_frames}, "use 'frames' B frames (only MPEG-4)", "frames" }, | |||
| { "hq", OPT_BOOL | OPT_EXPERT, {(void*)&use_hq}, "activate high quality settings" }, | |||
| @@ -5,8 +5,8 @@ | |||
| #define LIBAVCODEC_VERSION_INT 0x000406 | |||
| #define LIBAVCODEC_VERSION "0.4.6" | |||
| #define LIBAVCODEC_BUILD 4628 | |||
| #define LIBAVCODEC_BUILD_STR "4628" | |||
| #define LIBAVCODEC_BUILD 4629 | |||
| #define LIBAVCODEC_BUILD_STR "4629" | |||
| enum CodecID { | |||
| CODEC_ID_NONE, | |||
| @@ -684,6 +684,21 @@ typedef struct AVCodecContext { | |||
| */ | |||
| int fourcc; | |||
| /** | |||
| * idct algorithm, see FF_IDCT_* below | |||
| * encoding: set by user | |||
| * decoding: set by user | |||
| */ | |||
| int idct_algo; | |||
| #define FF_IDCT_AUTO 0 | |||
| #define FF_IDCT_INT 1 | |||
| #define FF_IDCT_SIMPLE 2 | |||
| #define FF_IDCT_SIMPLEMMX 3 | |||
| #define FF_IDCT_LIBMPEG2MMX 4 | |||
| #define FF_IDCT_PS2 5 | |||
| #define FF_IDCT_MLIB 6 | |||
| #define FF_IDCT_ARM 7 | |||
| //FIXME this should be reordered after kabis API is finished ... | |||
| //TODO kill kabi | |||
| /* | |||
| @@ -932,6 +932,22 @@ static inline int ff_get_fourcc(char *s){ | |||
| return (s[0]) + (s[1]<<8) + (s[2]<<16) + (s[3]<<24); | |||
| } | |||
| #ifdef ARCH_X86 | |||
| #define MASK_ABS(mask, level)\ | |||
| asm volatile(\ | |||
| "cdq \n\t"\ | |||
| "xorl %1, %0 \n\t"\ | |||
| "subl %1, %0 \n\t"\ | |||
| : "+a" (level), "=&d" (mask)\ | |||
| ); | |||
| #else | |||
| #define MASK_ABS(mask, level)\ | |||
| mask= level>>31;\ | |||
| level= (level^mask)-mask; | |||
| #endif | |||
| #if __CPU__ >= 686 && !defined(RUNTIME_CPUDETECT) | |||
| #define COPY3_IF_LT(x,y,a,b,c,d)\ | |||
| asm volatile (\ | |||
| @@ -20,11 +20,7 @@ | |||
| */ | |||
| #include "avcodec.h" | |||
| #include "dsputil.h" | |||
| #include "simple_idct.h" | |||
| void (*ff_idct)(DCTELEM *block); | |||
| void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block); | |||
| void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block); | |||
| void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); | |||
| void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); | |||
| void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | |||
| @@ -49,16 +45,11 @@ int ff_bit_exact=0; | |||
| UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; | |||
| UINT32 squareTbl[512]; | |||
| extern INT16 ff_mpeg1_default_intra_matrix[64]; | |||
| extern INT16 ff_mpeg1_default_non_intra_matrix[64]; | |||
| extern INT16 ff_mpeg4_default_intra_matrix[64]; | |||
| extern INT16 ff_mpeg4_default_non_intra_matrix[64]; | |||
| UINT8 zigzag_direct[64] = { | |||
| 0, 1, 8, 16, 9, 2, 3, 10, | |||
| 17, 24, 32, 25, 18, 11, 4, 5, | |||
| const UINT8 ff_zigzag_direct[64] = { | |||
| 0, 1, 8, 16, 9, 2, 3, 10, | |||
| 17, 24, 32, 25, 18, 11, 4, 5, | |||
| 12, 19, 26, 33, 40, 48, 41, 34, | |||
| 27, 20, 13, 6, 7, 14, 21, 28, | |||
| 27, 20, 13, 6, 7, 14, 21, 28, | |||
| 35, 42, 49, 56, 57, 50, 43, 36, | |||
| 29, 22, 15, 23, 30, 37, 44, 51, | |||
| 58, 59, 52, 45, 38, 31, 39, 46, | |||
| @@ -68,11 +59,8 @@ UINT8 zigzag_direct[64] = { | |||
| /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ | |||
| UINT16 __align8 inv_zigzag_direct16[64]; | |||
| /* not permutated zigzag_direct for MMX quantizer */ | |||
| UINT8 zigzag_direct_noperm[64]; | |||
| UINT8 ff_alternate_horizontal_scan[64] = { | |||
| 0, 1, 2, 3, 8, 9, 16, 17, | |||
| const UINT8 ff_alternate_horizontal_scan[64] = { | |||
| 0, 1, 2, 3, 8, 9, 16, 17, | |||
| 10, 11, 4, 5, 6, 7, 15, 14, | |||
| 13, 12, 19, 18, 24, 25, 32, 33, | |||
| 26, 27, 20, 21, 22, 23, 28, 29, | |||
| @@ -82,8 +70,8 @@ UINT8 ff_alternate_horizontal_scan[64] = { | |||
| 52, 53, 54, 55, 60, 61, 62, 63, | |||
| }; | |||
| UINT8 ff_alternate_vertical_scan[64] = { | |||
| 0, 8, 16, 24, 1, 9, 2, 10, | |||
| const UINT8 ff_alternate_vertical_scan[64] = { | |||
| 0, 8, 16, 24, 1, 9, 2, 10, | |||
| 17, 25, 32, 40, 48, 56, 57, 49, | |||
| 41, 33, 26, 18, 3, 11, 4, 12, | |||
| 19, 27, 34, 42, 50, 58, 35, 43, | |||
| @@ -93,21 +81,6 @@ UINT8 ff_alternate_vertical_scan[64] = { | |||
| 38, 46, 54, 62, 39, 47, 55, 63, | |||
| }; | |||
| #ifdef SIMPLE_IDCT | |||
| /* Input permutation for the simple_idct_mmx */ | |||
| static UINT8 simple_mmx_permutation[64]={ | |||
| 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |||
| 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |||
| 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |||
| 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |||
| 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |||
| 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |||
| 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |||
| 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |||
| }; | |||
| #endif | |||
| /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ | |||
| UINT32 inverse[256]={ | |||
| 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, | |||
| @@ -144,24 +117,6 @@ UINT32 inverse[256]={ | |||
| 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, | |||
| }; | |||
| /* used to skip zeros at the end */ | |||
| UINT8 zigzag_end[64]; | |||
| UINT8 permutation[64]; | |||
| //UINT8 invPermutation[64]; | |||
| static void build_zigzag_end(void) | |||
| { | |||
| int lastIndex; | |||
| int lastIndexAfterPerm=0; | |||
| for(lastIndex=0; lastIndex<64; lastIndex++) | |||
| { | |||
| if(zigzag_direct[lastIndex] > lastIndexAfterPerm) | |||
| lastIndexAfterPerm= zigzag_direct[lastIndex]; | |||
| zigzag_end[lastIndex]= lastIndexAfterPerm + 1; | |||
| } | |||
| } | |||
| int pix_sum_c(UINT8 * pix, int line_size) | |||
| { | |||
| int s, i, j; | |||
| @@ -1540,65 +1495,24 @@ int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| /* permute block according so that it corresponds to the MMX idct | |||
| order */ | |||
| #ifdef SIMPLE_IDCT | |||
| /* general permutation, but perhaps slightly slower */ | |||
| void block_permute(INT16 *block) | |||
| void block_permute(INT16 *block, UINT8 *permutation) | |||
| { | |||
| int i; | |||
| INT16 temp[64]; | |||
| for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i]; | |||
| for(i=0; i<64; i++) temp[ permutation[i] ] = block[i]; | |||
| for(i=0; i<64; i++) block[i] = temp[i]; | |||
| } | |||
| #else | |||
| void block_permute(INT16 *block) | |||
| { | |||
| int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; | |||
| int i; | |||
| for(i=0;i<8;i++) { | |||
| tmp1 = block[1]; | |||
| tmp2 = block[2]; | |||
| tmp3 = block[3]; | |||
| tmp4 = block[4]; | |||
| tmp5 = block[5]; | |||
| tmp6 = block[6]; | |||
| block[1] = tmp2; | |||
| block[2] = tmp4; | |||
| block[3] = tmp6; | |||
| block[4] = tmp1; | |||
| block[5] = tmp3; | |||
| block[6] = tmp5; | |||
| block += 8; | |||
| } | |||
| } | |||
| #endif | |||
| void clear_blocks_c(DCTELEM *blocks) | |||
| { | |||
| memset(blocks, 0, sizeof(DCTELEM)*6*64); | |||
| } | |||
| /* XXX: those functions should be suppressed ASAP when all IDCTs are | |||
| converted */ | |||
| void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block) | |||
| { | |||
| ff_idct (block); | |||
| put_pixels_clamped(block, dest, line_size); | |||
| } | |||
| void gen_idct_add(UINT8 *dest, int line_size, DCTELEM *block) | |||
| { | |||
| ff_idct (block); | |||
| add_pixels_clamped(block, dest, line_size); | |||
| } | |||
| void dsputil_init(void) | |||
| { | |||
| int i, j; | |||
| int use_permuted_idct; | |||
| for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; | |||
| for(i=0;i<MAX_NEG_CROP;i++) { | |||
| @@ -1610,11 +1524,6 @@ void dsputil_init(void) | |||
| squareTbl[i] = (i - 256) * (i - 256); | |||
| } | |||
| #ifdef SIMPLE_IDCT | |||
| ff_idct = NULL; | |||
| #else | |||
| ff_idct = j_rev_dct; | |||
| #endif | |||
| get_pixels = get_pixels_c; | |||
| diff_pixels = diff_pixels_c; | |||
| put_pixels_clamped = put_pixels_clamped_c; | |||
| @@ -1633,8 +1542,6 @@ void dsputil_init(void) | |||
| pix_abs8x8_y2 = pix_abs8x8_y2_c; | |||
| pix_abs8x8_xy2 = pix_abs8x8_xy2_c; | |||
| use_permuted_idct = 1; | |||
| #ifdef HAVE_MMX | |||
| dsputil_init_mmx(); | |||
| #endif | |||
| @@ -1643,61 +1550,18 @@ void dsputil_init(void) | |||
| #endif | |||
| #ifdef HAVE_MLIB | |||
| dsputil_init_mlib(); | |||
| use_permuted_idct = 0; | |||
| #endif | |||
| #ifdef ARCH_ALPHA | |||
| dsputil_init_alpha(); | |||
| use_permuted_idct = 0; | |||
| #endif | |||
| #ifdef ARCH_POWERPC | |||
| dsputil_init_ppc(); | |||
| #endif | |||
| #ifdef HAVE_MMI | |||
| dsputil_init_mmi(); | |||
| use_permuted_idct = 0; | |||
| #endif | |||
| #ifdef SIMPLE_IDCT | |||
| if (ff_idct == NULL) { | |||
| ff_idct_put = simple_idct_put; | |||
| ff_idct_add = simple_idct_add; | |||
| use_permuted_idct=0; | |||
| } | |||
| #endif | |||
| if(ff_idct != NULL) { | |||
| ff_idct_put = gen_idct_put; | |||
| ff_idct_add = gen_idct_add; | |||
| } | |||
| if(use_permuted_idct) | |||
| #ifdef SIMPLE_IDCT | |||
| for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i]; | |||
| #else | |||
| for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |||
| #endif | |||
| else | |||
| for(i=0; i<64; i++) permutation[i]=i; | |||
| for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1; | |||
| for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i]; | |||
| if (use_permuted_idct) { | |||
| /* permute for IDCT */ | |||
| for(i=0;i<64;i++) { | |||
| j = zigzag_direct[i]; | |||
| zigzag_direct[i] = block_permute_op(j); | |||
| j = ff_alternate_horizontal_scan[i]; | |||
| ff_alternate_horizontal_scan[i] = block_permute_op(j); | |||
| j = ff_alternate_vertical_scan[i]; | |||
| ff_alternate_vertical_scan[i] = block_permute_op(j); | |||
| } | |||
| block_permute(ff_mpeg1_default_intra_matrix); | |||
| block_permute(ff_mpeg1_default_non_intra_matrix); | |||
| block_permute(ff_mpeg4_default_intra_matrix); | |||
| block_permute(ff_mpeg4_default_non_intra_matrix); | |||
| } | |||
| build_zigzag_end(); | |||
| for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; | |||
| } | |||
| /* remove any non bit exact operation (testing purpose) */ | |||
| @@ -34,12 +34,9 @@ void j_rev_dct (DCTELEM *data); | |||
| void ff_fdct_mmx(DCTELEM *block); | |||
| /* encoding scans */ | |||
| extern UINT8 ff_alternate_horizontal_scan[64]; | |||
| extern UINT8 ff_alternate_vertical_scan[64]; | |||
| extern UINT8 zigzag_direct[64]; | |||
| /* permutation table */ | |||
| extern UINT8 permutation[64]; | |||
| extern const UINT8 ff_alternate_horizontal_scan[64]; | |||
| extern const UINT8 ff_alternate_vertical_scan[64]; | |||
| extern const UINT8 ff_zigzag_direct[64]; | |||
| /* pixel operations */ | |||
| #define MAX_NEG_CROP 384 | |||
| @@ -61,9 +58,6 @@ i (michael) didnt check them, these are just the alignents which i think could b | |||
| */ | |||
| /* pixel ops : interface with DCT */ | |||
| extern void (*ff_idct)(DCTELEM *block/*align 16*/); | |||
| extern void (*ff_idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | |||
| extern void (*ff_idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | |||
| extern void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size); | |||
| extern void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride); | |||
| extern void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); | |||
| @@ -119,12 +113,7 @@ int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx); | |||
| int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); | |||
| int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); | |||
| static inline int block_permute_op(int j) | |||
| { | |||
| return permutation[j]; | |||
| } | |||
| void block_permute(INT16 *block); | |||
| void block_permute(INT16 *block, UINT8 *permutation); | |||
| #if defined(HAVE_MMX) | |||
| @@ -287,19 +287,19 @@ static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int d | |||
| if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){ | |||
| /* same qscale */ | |||
| for(i=1; i<8; i++){ | |||
| const int level= block[n][block_permute_op(i )]; | |||
| const int level= block[n][s->idct_permutation[i ]]; | |||
| score0+= ABS(level); | |||
| score1+= ABS(level - ac_val[i+8]); | |||
| ac_val1[i ]= block[n][block_permute_op(i<<3)]; | |||
| ac_val1[i ]= block[n][s->idct_permutation[i<<3]]; | |||
| ac_val1[i+8]= level; | |||
| } | |||
| }else{ | |||
| /* different qscale, we must rescale */ | |||
| for(i=1; i<8; i++){ | |||
| const int level= block[n][block_permute_op(i )]; | |||
| const int level= block[n][s->idct_permutation[i ]]; | |||
| score0+= ABS(level); | |||
| score1+= ABS(level - ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale)); | |||
| ac_val1[i ]= block[n][block_permute_op(i<<3)]; | |||
| ac_val1[i ]= block[n][s->idct_permutation[i<<3]]; | |||
| ac_val1[i+8]= level; | |||
| } | |||
| } | |||
| @@ -310,20 +310,20 @@ static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int d | |||
| if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){ | |||
| /* same qscale */ | |||
| for(i=1; i<8; i++){ | |||
| const int level= block[n][block_permute_op(i<<3)]; | |||
| const int level= block[n][s->idct_permutation[i<<3]]; | |||
| score0+= ABS(level); | |||
| score1+= ABS(level - ac_val[i]); | |||
| ac_val1[i ]= level; | |||
| ac_val1[i+8]= block[n][block_permute_op(i )]; | |||
| ac_val1[i+8]= block[n][s->idct_permutation[i ]]; | |||
| } | |||
| }else{ | |||
| /* different qscale, we must rescale */ | |||
| for(i=1; i<8; i++){ | |||
| const int level= block[n][block_permute_op(i<<3)]; | |||
| const int level= block[n][s->idct_permutation[i<<3]]; | |||
| score0+= ABS(level); | |||
| score1+= ABS(level - ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale)); | |||
| ac_val1[i ]= level; | |||
| ac_val1[i+8]= block[n][block_permute_op(i )]; | |||
| ac_val1[i+8]= block[n][s->idct_permutation[i ]]; | |||
| } | |||
| } | |||
| } | |||
| @@ -519,7 +519,7 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||
| /* encode each block */ | |||
| for (i = 0; i < 6; i++) { | |||
| mpeg4_encode_block(s, block[i], i, 0, zigzag_direct, NULL, &s->pb); | |||
| mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, NULL, &s->pb); | |||
| } | |||
| if(interleaved_stats){ | |||
| @@ -637,7 +637,7 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||
| /* encode each block */ | |||
| for (i = 0; i < 6; i++) { | |||
| mpeg4_encode_block(s, block[i], i, 0, zigzag_direct, NULL, tex_pb); | |||
| mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, NULL, tex_pb); | |||
| } | |||
| if(interleaved_stats){ | |||
| @@ -674,8 +674,8 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||
| int last_index; | |||
| mpeg4_inv_pred_ac(s, block[i], i, dir[i]); | |||
| if (dir[i]==0) st = ff_alternate_vertical_scan; /* left */ | |||
| else st = ff_alternate_horizontal_scan; /* top */ | |||
| if (dir[i]==0) st = s->intra_v_scantable.permutated; /* left */ | |||
| else st = s->intra_h_scantable.permutated; /* top */ | |||
| for(last_index=63; last_index>=0; last_index--) //FIXME optimize | |||
| if(block[i][st[last_index]]) break; | |||
| @@ -685,7 +685,7 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||
| } | |||
| }else{ | |||
| for(i=0; i<6; i++) | |||
| scan_table[i]= zigzag_direct; | |||
| scan_table[i]= s->intra_scantable.permutated; | |||
| } | |||
| /* compute cbp */ | |||
| @@ -746,10 +746,10 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||
| if(dir[i]){ | |||
| for(j=1; j<8; j++) | |||
| block[i][block_permute_op(j )]= ac_val[j+8]; | |||
| block[i][s->idct_permutation[j ]]= ac_val[j+8]; | |||
| }else{ | |||
| for(j=1; j<8; j++) | |||
| block[i][block_permute_op(j<<3)]= ac_val[j ]; | |||
| block[i][s->idct_permutation[j<<3]]= ac_val[j ]; | |||
| } | |||
| s->block_last_index[i]= zigzag_last_index[i]; | |||
| } | |||
| @@ -974,7 +974,7 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n) | |||
| if (a != 1024) { | |||
| ac_val -= 16; | |||
| for(i=1;i<8;i++) { | |||
| block[block_permute_op(i*8)] += ac_val[i]; | |||
| block[s->idct_permutation[i<<3]] += ac_val[i]; | |||
| } | |||
| pred_dc = a; | |||
| } | |||
| @@ -983,7 +983,7 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n) | |||
| if (c != 1024) { | |||
| ac_val -= 16 * wrap; | |||
| for(i=1;i<8;i++) { | |||
| block[block_permute_op(i)] += ac_val[i + 8]; | |||
| block[s->idct_permutation[i ]] += ac_val[i + 8]; | |||
| } | |||
| pred_dc = c; | |||
| } | |||
| @@ -1011,10 +1011,10 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n) | |||
| /* left copy */ | |||
| for(i=1;i<8;i++) | |||
| ac_val1[i] = block[block_permute_op(i * 8)]; | |||
| ac_val1[i ] = block[s->idct_permutation[i<<3]]; | |||
| /* top copy */ | |||
| for(i=1;i<8;i++) | |||
| ac_val1[8 + i] = block[block_permute_op(i)]; | |||
| ac_val1[8 + i] = block[s->idct_permutation[i ]]; | |||
| } | |||
| INT16 *h263_pred_motion(MpegEncContext * s, int block, | |||
| @@ -1425,7 +1425,7 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n) | |||
| last_index = s->block_last_index[n]; | |||
| last_non_zero = i - 1; | |||
| for (; i <= last_index; i++) { | |||
| j = zigzag_direct[i]; | |||
| j = s->intra_scantable.permutated[i]; | |||
| level = block[j]; | |||
| if (level) { | |||
| run = i - last_non_zero - 1; | |||
| @@ -1710,12 +1710,12 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, | |||
| if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){ | |||
| /* same qscale */ | |||
| for(i=1;i<8;i++) { | |||
| block[block_permute_op(i*8)] += ac_val[i]; | |||
| block[s->idct_permutation[i<<3]] += ac_val[i]; | |||
| } | |||
| }else{ | |||
| /* different qscale, we must rescale */ | |||
| for(i=1;i<8;i++) { | |||
| block[block_permute_op(i*8)] += ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale); | |||
| block[s->idct_permutation[i<<3]] += ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale); | |||
| } | |||
| } | |||
| } else { | |||
| @@ -1726,23 +1726,23 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, | |||
| if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){ | |||
| /* same qscale */ | |||
| for(i=1;i<8;i++) { | |||
| block[block_permute_op(i)] += ac_val[i + 8]; | |||
| block[s->idct_permutation[i]] += ac_val[i + 8]; | |||
| } | |||
| }else{ | |||
| /* different qscale, we must rescale */ | |||
| for(i=1;i<8;i++) { | |||
| block[block_permute_op(i)] += ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale); | |||
| block[s->idct_permutation[i]] += ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| /* left copy */ | |||
| for(i=1;i<8;i++) | |||
| ac_val1[i] = block[block_permute_op(i * 8)]; | |||
| ac_val1[i ] = block[s->idct_permutation[i<<3]]; | |||
| /* top copy */ | |||
| for(i=1;i<8;i++) | |||
| ac_val1[8 + i] = block[block_permute_op(i)]; | |||
| ac_val1[8 + i] = block[s->idct_permutation[i ]]; | |||
| } | |||
| @@ -1762,12 +1762,12 @@ static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n, | |||
| if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){ | |||
| /* same qscale */ | |||
| for(i=1;i<8;i++) { | |||
| block[block_permute_op(i*8)] -= ac_val[i]; | |||
| block[s->idct_permutation[i<<3]] -= ac_val[i]; | |||
| } | |||
| }else{ | |||
| /* different qscale, we must rescale */ | |||
| for(i=1;i<8;i++) { | |||
| block[block_permute_op(i*8)] -= ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale); | |||
| block[s->idct_permutation[i<<3]] -= ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale); | |||
| } | |||
| } | |||
| } else { | |||
| @@ -1777,12 +1777,12 @@ static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n, | |||
| if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){ | |||
| /* same qscale */ | |||
| for(i=1;i<8;i++) { | |||
| block[block_permute_op(i)] -= ac_val[i + 8]; | |||
| block[s->idct_permutation[i]] -= ac_val[i + 8]; | |||
| } | |||
| }else{ | |||
| /* different qscale, we must rescale */ | |||
| for(i=1;i<8;i++) { | |||
| block[block_permute_op(i)] -= ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale); | |||
| block[s->idct_permutation[i]] -= ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale); | |||
| } | |||
| } | |||
| } | |||
| @@ -3192,13 +3192,13 @@ intra: | |||
| static int h263_decode_motion(MpegEncContext * s, int pred, int f_code) | |||
| { | |||
| int code, val, sign, shift, l; | |||
| code = get_vlc2(&s->gb, mv_vlc.table, MV_VLC_BITS, 2); | |||
| if (code < 0) | |||
| return 0xffff; | |||
| if (code == 0) | |||
| return pred; | |||
| sign = get_bits1(&s->gb); | |||
| shift = f_code - 1; | |||
| val = (code - 1) << shift; | |||
| @@ -3211,7 +3211,7 @@ static int h263_decode_motion(MpegEncContext * s, int pred, int f_code) | |||
| /* modulo decoding */ | |||
| if (!s->h263_long_vectors) { | |||
| l = (1 << (f_code - 1)) * 32; | |||
| l = 1 << (f_code + 4); | |||
| if (val < -l) { | |||
| val += l<<1; | |||
| } else if (val >= l) { | |||
| @@ -3261,15 +3261,15 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block, | |||
| RLTable *rl = &rl_inter; | |||
| const UINT8 *scan_table; | |||
| scan_table = zigzag_direct; | |||
| scan_table = s->intra_scantable.permutated; | |||
| if (s->h263_aic && s->mb_intra) { | |||
| rl = &rl_intra_aic; | |||
| i = 0; | |||
| if (s->ac_pred) { | |||
| if (s->h263_aic_dir) | |||
| scan_table = ff_alternate_vertical_scan; /* left */ | |||
| scan_table = s->intra_v_scantable.permutated; /* left */ | |||
| else | |||
| scan_table = ff_alternate_horizontal_scan; /* top */ | |||
| scan_table = s->intra_h_scantable.permutated; /* top */ | |||
| } | |||
| } else if (s->mb_intra) { | |||
| /* DC coef */ | |||
| @@ -3417,14 +3417,14 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||
| rl = &rl_intra; | |||
| rl_vlc = rl_intra.rl_vlc[0]; | |||
| if(s->alternate_scan) | |||
| scan_table = ff_alternate_vertical_scan; /* left */ | |||
| scan_table = s->intra_v_scantable.permutated; /* left */ | |||
| else if (s->ac_pred) { | |||
| if (dc_pred_dir == 0) | |||
| scan_table = ff_alternate_vertical_scan; /* left */ | |||
| scan_table = s->intra_v_scantable.permutated; /* left */ | |||
| else | |||
| scan_table = ff_alternate_horizontal_scan; /* top */ | |||
| scan_table = s->intra_h_scantable.permutated; /* top */ | |||
| } else { | |||
| scan_table = zigzag_direct; | |||
| scan_table = s->intra_scantable.permutated; | |||
| } | |||
| qmul=1; | |||
| qadd=0; | |||
| @@ -3437,9 +3437,9 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||
| rl = &rl_inter; | |||
| if(s->alternate_scan) | |||
| scan_table = ff_alternate_vertical_scan; /* left */ | |||
| scan_table = s->intra_v_scantable.permutated; /* left */ | |||
| else | |||
| scan_table = zigzag_direct; | |||
| scan_table = s->intra_scantable.permutated; | |||
| if(s->mpeg_quant){ | |||
| qmul=1; | |||
| @@ -4081,13 +4081,14 @@ int mpeg4_decode_picture_header(MpegEncContext * s) | |||
| /* load default matrixes */ | |||
| for(i=0; i<64; i++){ | |||
| int j= s->idct_permutation[i]; | |||
| v= ff_mpeg4_default_intra_matrix[i]; | |||
| s->intra_matrix[i]= v; | |||
| s->chroma_intra_matrix[i]= v; | |||
| s->intra_matrix[j]= v; | |||
| s->chroma_intra_matrix[j]= v; | |||
| v= ff_mpeg4_default_non_intra_matrix[i]; | |||
| s->inter_matrix[i]= v; | |||
| s->chroma_inter_matrix[i]= v; | |||
| s->inter_matrix[j]= v; | |||
| s->chroma_inter_matrix[j]= v; | |||
| } | |||
| /* load custom intra matrix */ | |||
| @@ -4096,7 +4097,7 @@ int mpeg4_decode_picture_header(MpegEncContext * s) | |||
| v= get_bits(&s->gb, 8); | |||
| if(v==0) break; | |||
| j= zigzag_direct[i]; | |||
| j= s->intra_scantable.permutated[i]; | |||
| s->intra_matrix[j]= v; | |||
| s->chroma_intra_matrix[j]= v; | |||
| } | |||
| @@ -4108,14 +4109,14 @@ int mpeg4_decode_picture_header(MpegEncContext * s) | |||
| v= get_bits(&s->gb, 8); | |||
| if(v==0) break; | |||
| j= zigzag_direct[i]; | |||
| j= s->intra_scantable.permutated[i]; | |||
| s->inter_matrix[j]= v; | |||
| s->chroma_inter_matrix[j]= v; | |||
| } | |||
| /* replicate last value */ | |||
| for(; i<64; i++){ | |||
| j= zigzag_direct[i]; | |||
| j= s->intra_scantable.permutated[i]; | |||
| s->inter_matrix[j]= v; | |||
| s->chroma_inter_matrix[j]= v; | |||
| } | |||
| @@ -20,7 +20,6 @@ | |||
| */ | |||
| #include "../dsputil.h" | |||
| #include "../simple_idct.h" | |||
| int mm_flags; /* multimedia extension flags */ | |||
| @@ -44,10 +43,6 @@ int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |||
| int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |||
| int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |||
| /* external functions, from idct_mmx.c */ | |||
| void ff_mmx_idct(DCTELEM *block); | |||
| void ff_mmxext_idct(DCTELEM *block); | |||
| /* pixel operations */ | |||
| static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL; | |||
| static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; | |||
| @@ -588,17 +583,6 @@ void dsputil_init_mmx(void) | |||
| avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; | |||
| avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; | |||
| } | |||
| /* idct */ | |||
| if (mm_flags & MM_MMXEXT) { | |||
| ff_idct = ff_mmxext_idct; | |||
| } else { | |||
| ff_idct = ff_mmx_idct; | |||
| } | |||
| #ifdef SIMPLE_IDCT | |||
| // ff_idct = simple_idct; | |||
| ff_idct = simple_idct_mmx; | |||
| #endif | |||
| } | |||
| #if 0 | |||
| @@ -637,28 +621,6 @@ void dsputil_init_mmx(void) | |||
| #endif | |||
| } | |||
| void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block); | |||
| /** | |||
| * this will send coeff matrixes which would have different results for the 16383 type MMX vs C IDCTs to the C IDCT | |||
| */ | |||
| void bit_exact_idct_put(UINT8 *dest, int line_size, INT16 *block){ | |||
| if( block[0]>1022 && block[1]==0 && block[4 ]==0 && block[5 ]==0 | |||
| && block[8]==0 && block[9]==0 && block[12]==0 && block[13]==0){ | |||
| int16_t tmp[64]; | |||
| int i; | |||
| for(i=0; i<64; i++) | |||
| tmp[i]= block[i]; | |||
| for(i=0; i<64; i++) | |||
| block[i]= tmp[block_permute_op(i)]; | |||
| simple_idct_put(dest, line_size, block); | |||
| } | |||
| else | |||
| gen_idct_put(dest, line_size, block); | |||
| } | |||
| /* remove any non bit exact operation (testing purpose). NOTE that | |||
| this function should be kept as small as possible because it is | |||
| always difficult to test automatically non bit exact cases. */ | |||
| @@ -682,9 +644,5 @@ void dsputil_set_bit_exact_mmx(void) | |||
| pix_abs8x8_y2 = pix_abs8x8_y2_mmx; | |||
| pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; | |||
| } | |||
| #ifdef SIMPLE_IDCT | |||
| if(ff_idct_put==gen_idct_put && ff_idct == simple_idct_mmx) | |||
| ff_idct_put= bit_exact_idct_put; | |||
| #endif | |||
| } | |||
| } | |||
| @@ -23,53 +23,24 @@ | |||
| #include "../dsputil.h" | |||
| #include "../mpegvideo.h" | |||
| #include "../avcodec.h" | |||
| extern UINT8 zigzag_end[64]; | |||
| #include "../simple_idct.h" | |||
| /* Input permutation for the simple_idct_mmx */ | |||
| static UINT8 simple_mmx_permutation[64]={ | |||
| 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |||
| 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |||
| 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |||
| 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |||
| 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |||
| 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |||
| 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |||
| 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |||
| }; | |||
| extern UINT8 zigzag_direct_noperm[64]; | |||
| extern UINT16 inv_zigzag_direct16[64]; | |||
| extern UINT32 inverse[256]; | |||
| #if 0 | |||
| /* XXX: GL: I don't understand why this function needs optimization | |||
| (it is called only once per frame!), so I disabled it */ | |||
| void MPV_frame_start(MpegEncContext *s) | |||
| { | |||
| if (s->pict_type == B_TYPE) { | |||
| __asm __volatile( | |||
| "movl (%1), %%eax\n\t" | |||
| "movl 4(%1), %%edx\n\t" | |||
| "movl 8(%1), %%ecx\n\t" | |||
| "movl %%eax, (%0)\n\t" | |||
| "movl %%edx, 4(%0)\n\t" | |||
| "movl %%ecx, 8(%0)\n\t" | |||
| : | |||
| :"r"(s->current_picture), "r"(s->aux_picture) | |||
| :"eax","edx","ecx","memory"); | |||
| } else { | |||
| /* swap next and last */ | |||
| __asm __volatile( | |||
| "movl (%1), %%eax\n\t" | |||
| "movl 4(%1), %%edx\n\t" | |||
| "movl 8(%1), %%ecx\n\t" | |||
| "xchgl (%0), %%eax\n\t" | |||
| "xchgl 4(%0), %%edx\n\t" | |||
| "xchgl 8(%0), %%ecx\n\t" | |||
| "movl %%eax, (%1)\n\t" | |||
| "movl %%edx, 4(%1)\n\t" | |||
| "movl %%ecx, 8(%1)\n\t" | |||
| "movl %%eax, (%2)\n\t" | |||
| "movl %%edx, 4(%2)\n\t" | |||
| "movl %%ecx, 8(%2)\n\t" | |||
| : | |||
| :"r"(s->last_picture), "r"(s->next_picture), "r"(s->current_picture) | |||
| :"eax","edx","ecx","memory"); | |||
| } | |||
| } | |||
| #endif | |||
| static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL; | |||
| static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; | |||
| @@ -77,36 +48,26 @@ static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x000 | |||
| static void dct_unquantize_h263_mmx(MpegEncContext *s, | |||
| DCTELEM *block, int n, int qscale) | |||
| { | |||
| int i, level, qmul, qadd, nCoeffs; | |||
| qmul = s->qscale << 1; | |||
| if (s->h263_aic && s->mb_intra) | |||
| qadd = 0; | |||
| else | |||
| qadd = (s->qscale - 1) | 1; | |||
| int level, qmul, qadd, nCoeffs; | |||
| qmul = qscale << 1; | |||
| qadd = (qscale - 1) | 1; | |||
| assert(s->block_last_index[n]>=0); | |||
| if (s->mb_intra) { | |||
| if (!s->h263_aic) { | |||
| if (n < 4) | |||
| block[0] = block[0] * s->y_dc_scale; | |||
| level = block[0] * s->y_dc_scale; | |||
| else | |||
| block[0] = block[0] * s->c_dc_scale; | |||
| level = block[0] * s->c_dc_scale; | |||
| }else{ | |||
| qadd = 0; | |||
| level= block[0]; | |||
| } | |||
| for(i=1; i<8; i++) { | |||
| level = block[i]; | |||
| if (level) { | |||
| if (level < 0) { | |||
| level = level * qmul - qadd; | |||
| } else { | |||
| level = level * qmul + qadd; | |||
| } | |||
| block[i] = level; | |||
| } | |||
| } | |||
| nCoeffs=64; | |||
| nCoeffs=63; | |||
| } else { | |||
| i = 0; | |||
| nCoeffs= zigzag_end[ s->block_last_index[n] ]; | |||
| nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; | |||
| } | |||
| //printf("%d %d ", qmul, qadd); | |||
| asm volatile( | |||
| @@ -152,10 +113,12 @@ asm volatile( | |||
| "movq %%mm1, 8(%0, %3) \n\t" | |||
| "addl $16, %3 \n\t" | |||
| "js 1b \n\t" | |||
| ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(i-nCoeffs)) | |||
| "jng 1b \n\t" | |||
| ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) | |||
| : "memory" | |||
| ); | |||
| if(s->mb_intra) | |||
| block[0]= level; | |||
| } | |||
| @@ -193,9 +156,10 @@ static void dct_unquantize_mpeg1_mmx(MpegEncContext *s, | |||
| { | |||
| int nCoeffs; | |||
| const UINT16 *quant_matrix; | |||
| if(s->alternate_scan) nCoeffs= 64; | |||
| else nCoeffs= zigzag_end[ s->block_last_index[n] ]; | |||
| assert(s->block_last_index[n]>=0); | |||
| nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; | |||
| if (s->mb_intra) { | |||
| int block0; | |||
| @@ -312,6 +276,7 @@ asm volatile( | |||
| : "%eax", "memory" | |||
| ); | |||
| } | |||
| } | |||
| static void dct_unquantize_mpeg2_mmx(MpegEncContext *s, | |||
| @@ -320,8 +285,10 @@ static void dct_unquantize_mpeg2_mmx(MpegEncContext *s, | |||
| int nCoeffs; | |||
| const UINT16 *quant_matrix; | |||
| if(s->alternate_scan) nCoeffs= 64; | |||
| else nCoeffs= zigzag_end[ s->block_last_index[n] ]; | |||
| assert(s->block_last_index[n]>=0); | |||
| if(s->alternate_scan) nCoeffs= 63; //FIXME | |||
| else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; | |||
| if (s->mb_intra) { | |||
| int block0; | |||
| @@ -371,7 +338,7 @@ asm volatile( | |||
| "movq %%mm5, 8(%0, %%eax) \n\t" | |||
| "addl $16, %%eax \n\t" | |||
| "js 1b \n\t" | |||
| "jng 1b \n\t" | |||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) | |||
| : "%eax", "memory" | |||
| ); | |||
| @@ -427,7 +394,7 @@ asm volatile( | |||
| "movq %%mm5, 8(%0, %%eax) \n\t" | |||
| "addl $16, %%eax \n\t" | |||
| "js 1b \n\t" | |||
| "jng 1b \n\t" | |||
| "movd 124(%0, %3), %%mm0 \n\t" | |||
| "movq %%mm7, %%mm6 \n\t" | |||
| "psrlq $32, %%mm7 \n\t" | |||
| @@ -534,12 +501,6 @@ static void draw_edges_mmx(UINT8 *buf, int wrap, int width, int height, int w) | |||
| } | |||
| } | |||
| static volatile int esp_temp; | |||
| void unused_var_warning_killer(){ | |||
| esp_temp++; | |||
| } | |||
| #undef HAVE_MMX2 | |||
| #define RENAME(a) a ## _MMX | |||
| #include "mpegvideo_mmx_template.c" | |||
| @@ -549,10 +510,40 @@ void unused_var_warning_killer(){ | |||
| #define RENAME(a) a ## _MMX2 | |||
| #include "mpegvideo_mmx_template.c" | |||
| /* external functions, from idct_mmx.c */ | |||
| void ff_mmx_idct(DCTELEM *block); | |||
| void ff_mmxext_idct(DCTELEM *block); | |||
| /* XXX: those functions should be suppressed ASAP when all IDCTs are | |||
| converted */ | |||
| static void ff_libmpeg2mmx_idct_put(UINT8 *dest, int line_size, DCTELEM *block) | |||
| { | |||
| ff_mmx_idct (block); | |||
| put_pixels_clamped(block, dest, line_size); | |||
| } | |||
| static void ff_libmpeg2mmx_idct_add(UINT8 *dest, int line_size, DCTELEM *block) | |||
| { | |||
| ff_mmx_idct (block); | |||
| add_pixels_clamped(block, dest, line_size); | |||
| } | |||
| static void ff_libmpeg2mmx2_idct_put(UINT8 *dest, int line_size, DCTELEM *block) | |||
| { | |||
| ff_mmxext_idct (block); | |||
| put_pixels_clamped(block, dest, line_size); | |||
| } | |||
| static void ff_libmpeg2mmx2_idct_add(UINT8 *dest, int line_size, DCTELEM *block) | |||
| { | |||
| ff_mmxext_idct (block); | |||
| add_pixels_clamped(block, dest, line_size); | |||
| } | |||
| void MPV_common_init_mmx(MpegEncContext *s) | |||
| { | |||
| if (mm_flags & MM_MMX) { | |||
| const int dct_algo= s->avctx->dct_algo; | |||
| int i; | |||
| const int dct_algo = s->avctx->dct_algo; | |||
| const int idct_algo= s->avctx->idct_algo; | |||
| s->dct_unquantize_h263 = dct_unquantize_h263_mmx; | |||
| s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_mmx; | |||
| s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_mmx; | |||
| @@ -568,5 +559,22 @@ void MPV_common_init_mmx(MpegEncContext *s) | |||
| s->dct_quantize= dct_quantize_MMX; | |||
| } | |||
| } | |||
| if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ | |||
| s->idct_put= ff_simple_idct_put_mmx; | |||
| s->idct_add= ff_simple_idct_add_mmx; | |||
| for(i=0; i<64; i++) | |||
| s->idct_permutation[i]= simple_mmx_permutation[i]; | |||
| }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ | |||
| if(mm_flags & MM_MMXEXT){ | |||
| s->idct_put= ff_libmpeg2mmx2_idct_put; | |||
| s->idct_add= ff_libmpeg2mmx2_idct_add; | |||
| }else{ | |||
| s->idct_put= ff_libmpeg2mmx_idct_put; | |||
| s->idct_add= ff_libmpeg2mmx_idct_add; | |||
| } | |||
| for(i=0; i<64; i++) | |||
| s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |||
| } | |||
| } | |||
| } | |||
| @@ -189,31 +189,143 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | |||
| ); | |||
| } | |||
| if(s->mb_intra) temp_block[0]= level; //FIXME move afer permute | |||
| // last_non_zero_p1=64; | |||
| /* permute for IDCT */ | |||
| asm volatile( | |||
| "movl %0, %%eax \n\t" | |||
| "pushl %%ebp \n\t" | |||
| "movl %%esp, " MANGLE(esp_temp) "\n\t" | |||
| "1: \n\t" | |||
| "movzbl (%1, %%eax), %%ebx \n\t" | |||
| "movzbl 1(%1, %%eax), %%ebp \n\t" | |||
| "movw (%2, %%ebx, 2), %%cx \n\t" | |||
| "movw (%2, %%ebp, 2), %%sp \n\t" | |||
| "movzbl " MANGLE(permutation) "(%%ebx), %%ebx\n\t" | |||
| "movzbl " MANGLE(permutation) "(%%ebp), %%ebp\n\t" | |||
| "movw %%cx, (%3, %%ebx, 2) \n\t" | |||
| "movw %%sp, (%3, %%ebp, 2) \n\t" | |||
| "addl $2, %%eax \n\t" | |||
| " js 1b \n\t" | |||
| "movl " MANGLE(esp_temp) ", %%esp\n\t" | |||
| "popl %%ebp \n\t" | |||
| : | |||
| : "g" (-last_non_zero_p1), "d" (zigzag_direct_noperm+last_non_zero_p1), "S" (temp_block), "D" (block) | |||
| : "%eax", "%ebx", "%ecx" | |||
| ); | |||
| if(s->mb_intra) block[0]= level; | |||
| else block[0]= temp_block[0]; | |||
| if(s->idct_permutation[1]==8){ | |||
| if(last_non_zero_p1 <= 1) goto end; | |||
| block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08]; | |||
| block[0x20] = temp_block[0x10]; | |||
| if(last_non_zero_p1 <= 4) goto end; | |||
| block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02]; | |||
| block[0x09] = temp_block[0x03]; | |||
| if(last_non_zero_p1 <= 7) goto end; | |||
| block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11]; | |||
| block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20]; | |||
| if(last_non_zero_p1 <= 11) goto end; | |||
| block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12]; | |||
| block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04]; | |||
| block[0x0C] = temp_block[0x05]; | |||
| if(last_non_zero_p1 <= 16) goto end; | |||
| block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13]; | |||
| block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21]; | |||
| block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30]; | |||
| block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22]; | |||
| if(last_non_zero_p1 <= 24) goto end; | |||
| block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14]; | |||
| block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06]; | |||
| block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E]; | |||
| block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C]; | |||
| if(last_non_zero_p1 <= 32) goto end; | |||
| block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A]; | |||
| block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38]; | |||
| block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32]; | |||
| block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24]; | |||
| if(last_non_zero_p1 <= 40) goto end; | |||
| block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16]; | |||
| block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17]; | |||
| block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25]; | |||
| block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33]; | |||
| if(last_non_zero_p1 <= 48) goto end; | |||
| block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B]; | |||
| block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D]; | |||
| block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; | |||
| block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E]; | |||
| if(last_non_zero_p1 <= 56) goto end; | |||
| block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C]; | |||
| block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36]; | |||
| block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37]; | |||
| block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; | |||
| }else if(s->idct_permutation[1]==4){ | |||
| if(last_non_zero_p1 <= 1) goto end; | |||
| block[0x04] = temp_block[0x01]; | |||
| block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10]; | |||
| if(last_non_zero_p1 <= 4) goto end; | |||
| block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02]; | |||
| block[0x05] = temp_block[0x03]; | |||
| if(last_non_zero_p1 <= 7) goto end; | |||
| block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11]; | |||
| block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20]; | |||
| if(last_non_zero_p1 <= 11) goto end; | |||
| block[0x1C] = temp_block[0x19]; | |||
| block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B]; | |||
| block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05]; | |||
| if(last_non_zero_p1 <= 16) goto end; | |||
| block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13]; | |||
| block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21]; | |||
| block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30]; | |||
| block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22]; | |||
| if(last_non_zero_p1 <= 24) goto end; | |||
| block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14]; | |||
| block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06]; | |||
| block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E]; | |||
| block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C]; | |||
| if(last_non_zero_p1 <= 32) goto end; | |||
| block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A]; | |||
| block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38]; | |||
| block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32]; | |||
| block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24]; | |||
| if(last_non_zero_p1 <= 40) goto end; | |||
| block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16]; | |||
| block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17]; | |||
| block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25]; | |||
| block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33]; | |||
| if(last_non_zero_p1 <= 48) goto end; | |||
| block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B]; | |||
| block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D]; | |||
| block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; | |||
| block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E]; | |||
| if(last_non_zero_p1 <= 56) goto end; | |||
| block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C]; | |||
| block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36]; | |||
| block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37]; | |||
| block[0x3B] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; | |||
| }else{ | |||
| if(last_non_zero_p1 <= 1) goto end; | |||
| block[0x01] = temp_block[0x01]; | |||
| block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10]; | |||
| if(last_non_zero_p1 <= 4) goto end; | |||
| block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02]; | |||
| block[0x03] = temp_block[0x03]; | |||
| if(last_non_zero_p1 <= 7) goto end; | |||
| block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11]; | |||
| block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20]; | |||
| if(last_non_zero_p1 <= 11) goto end; | |||
| block[0x19] = temp_block[0x19]; | |||
| block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B]; | |||
| block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05]; | |||
| if(last_non_zero_p1 <= 16) goto end; | |||
| block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13]; | |||
| block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21]; | |||
| block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30]; | |||
| block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22]; | |||
| if(last_non_zero_p1 <= 24) goto end; | |||
| block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14]; | |||
| block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06]; | |||
| block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E]; | |||
| block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C]; | |||
| if(last_non_zero_p1 <= 32) goto end; | |||
| block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A]; | |||
| block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38]; | |||
| block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32]; | |||
| block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24]; | |||
| if(last_non_zero_p1 <= 40) goto end; | |||
| block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16]; | |||
| block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17]; | |||
| block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25]; | |||
| block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33]; | |||
| if(last_non_zero_p1 <= 48) goto end; | |||
| block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B]; | |||
| block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D]; | |||
| block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; | |||
| block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E]; | |||
| if(last_non_zero_p1 <= 56) goto end; | |||
| block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C]; | |||
| block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36]; | |||
| block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37]; | |||
| block[0x3E] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; | |||
| } | |||
| end: | |||
| /* | |||
| for(i=0; i<last_non_zero_p1; i++) | |||
| { | |||
| @@ -221,7 +333,6 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | |||
| block[block_permute_op(j)]= temp_block[j]; | |||
| } | |||
| */ | |||
| //block_permute(block); | |||
| return last_non_zero_p1 - 1; | |||
| } | |||
| @@ -1291,7 +1291,20 @@ Temp | |||
| ); | |||
| } | |||
| void simple_idct_mmx(int16_t *block) | |||
| void ff_simple_idct_mmx(int16_t *block) | |||
| { | |||
| idct(block); | |||
| idct(block); | |||
| } | |||
| //FIXME merge add/put into the idct | |||
| void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, DCTELEM *block) | |||
| { | |||
| idct(block); | |||
| put_pixels_clamped(block, dest, line_size); | |||
| } | |||
| void ff_simple_idct_add_mmx(UINT8 *dest, int line_size, DCTELEM *block) | |||
| { | |||
| idct(block); | |||
| add_pixels_clamped(block, dest, line_size); | |||
| } | |||
| @@ -322,14 +322,14 @@ static void jpeg_table_header(MpegEncContext *s) | |||
| put_bits(p, 4, 0); /* 8 bit precision */ | |||
| put_bits(p, 4, 0); /* table 0 */ | |||
| for(i=0;i<64;i++) { | |||
| j = zigzag_direct[i]; | |||
| j = s->intra_scantable.permutated[i]; | |||
| put_bits(p, 8, s->intra_matrix[j]); | |||
| } | |||
| #ifdef TWOMATRIXES | |||
| put_bits(p, 4, 0); /* 8 bit precision */ | |||
| put_bits(p, 4, 1); /* table 1 */ | |||
| for(i=0;i<64;i++) { | |||
| j = zigzag_direct[i]; | |||
| j = s->intra_scantable.permutated[i]; | |||
| put_bits(p, 8, s->chroma_intra_matrix[j]); | |||
| } | |||
| #endif | |||
| @@ -535,7 +535,7 @@ static void encode_block(MpegEncContext *s, DCTELEM *block, int n) | |||
| run = 0; | |||
| last_index = s->block_last_index[n]; | |||
| for(i=1;i<=last_index;i++) { | |||
| j = zigzag_direct[i]; | |||
| j = s->intra_scantable.permutated[i]; | |||
| val = block[j]; | |||
| if (val == 0) { | |||
| run++; | |||
| @@ -620,6 +620,8 @@ typedef struct MJpegDecodeContext { | |||
| int restart_interval; | |||
| int restart_count; | |||
| int interleaved_rows; | |||
| ScanTable scantable; | |||
| void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | |||
| } MJpegDecodeContext; | |||
| #define SKIP_REMAINING(gb, len) { \ | |||
| @@ -645,9 +647,23 @@ static void build_vlc(VLC *vlc, const UINT8 *bits_table, const UINT8 *val_table, | |||
| static int mjpeg_decode_init(AVCodecContext *avctx) | |||
| { | |||
| MJpegDecodeContext *s = avctx->priv_data; | |||
| MpegEncContext s2; | |||
| s->avctx = avctx; | |||
| /* ugly way to get the idct & scantable */ | |||
| memset(&s2, 0, sizeof(MpegEncContext)); | |||
| s2.flags= avctx->flags; | |||
| s2.avctx= avctx; | |||
| // s2->out_format = FMT_MJPEG; | |||
| s2.width = 8; | |||
| s2.height = 8; | |||
| if (MPV_common_init(&s2) < 0) | |||
| return -1; | |||
| s->scantable= s2.intra_scantable; | |||
| s->idct_put= s2.idct_put; | |||
| MPV_common_end(&s2); | |||
| s->header_state = 0; | |||
| s->mpeg_enc_ctx_allocated = 0; | |||
| s->buffer_size = PICTURE_BUFFER_SIZE - 1; /* minus 1 to take into | |||
| @@ -657,7 +673,7 @@ static int mjpeg_decode_init(AVCodecContext *avctx) | |||
| s->first_picture = 1; | |||
| s->org_width = avctx->width; | |||
| s->org_height = avctx->height; | |||
| build_vlc(&s->vlcs[0][0], bits_dc_luminance, val_dc_luminance, 12); | |||
| build_vlc(&s->vlcs[0][1], bits_dc_chrominance, val_dc_chrominance, 12); | |||
| build_vlc(&s->vlcs[1][0], bits_ac_luminance, val_ac_luminance, 251); | |||
| @@ -694,7 +710,7 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s, | |||
| dprintf("index=%d\n", index); | |||
| /* read quant table */ | |||
| for(i=0;i<64;i++) { | |||
| j = zigzag_direct[i]; | |||
| j = s->scantable.permutated[i]; | |||
| s->quant_matrixes[index][j] = get_bits(&s->gb, 8); | |||
| } | |||
| len -= 65; | |||
| @@ -897,7 +913,7 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block, | |||
| dprintf("error count: %d\n", i); | |||
| return -1; | |||
| } | |||
| j = zigzag_direct[i]; | |||
| j = s->scantable.permutated[i]; | |||
| block[j] = level * quant_matrix[j]; | |||
| i++; | |||
| if (i >= 64) | |||
| @@ -1021,7 +1037,7 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s, | |||
| (h * mb_x + x) * 8; | |||
| if (s->interlaced && s->bottom_field) | |||
| ptr += s->linesize[c] >> 1; | |||
| ff_idct_put(ptr, s->linesize[c], s->block); | |||
| s->idct_put(ptr, s->linesize[c], s->block); | |||
| if (++x == h) { | |||
| x = 0; | |||
| y++; | |||
| @@ -542,7 +542,7 @@ static void mpeg1_encode_block(MpegEncContext *s, | |||
| last_non_zero = i - 1; | |||
| for(;i<=last_index;i++) { | |||
| j = zigzag_direct[i]; | |||
| j = s->intra_scantable.permutated[i]; | |||
| level = block[j]; | |||
| next_coef: | |||
| #if 0 | |||
| @@ -552,26 +552,11 @@ static void mpeg1_encode_block(MpegEncContext *s, | |||
| /* encode using VLC */ | |||
| if (level != 0) { | |||
| run = i - last_non_zero - 1; | |||
| #ifdef ARCH_X86 | |||
| asm volatile( | |||
| "movl %2, %1 \n\t" | |||
| "movl %1, %0 \n\t" | |||
| "addl %1, %1 \n\t" | |||
| "sbbl %1, %1 \n\t" | |||
| "xorl %1, %0 \n\t" | |||
| "subl %1, %0 \n\t" | |||
| "andl $1, %1 \n\t" | |||
| : "=&r" (alevel), "=&r" (sign) | |||
| : "g" (level) | |||
| ); | |||
| #else | |||
| sign = 0; | |||
| alevel = level; | |||
| if (alevel < 0) { | |||
| sign = 1; | |||
| alevel = -alevel; | |||
| } | |||
| #endif | |||
| alevel= level; | |||
| MASK_ABS(sign, alevel) | |||
| sign&=1; | |||
| // code = get_rl_index(rl, 0, run, alevel); | |||
| if (alevel > mpeg1_max_level[0][run]) | |||
| code= 111; /*rl->n*/ | |||
| @@ -1040,6 +1025,7 @@ static int mpeg1_decode_block(MpegEncContext *s, | |||
| int level, dc, diff, i, j, run; | |||
| int code, component; | |||
| RLTable *rl = &rl_mpeg1; | |||
| UINT8 * const scantable= s->intra_scantable.permutated; | |||
| if (s->mb_intra) { | |||
| /* DC coef */ | |||
| @@ -1099,7 +1085,7 @@ static int mpeg1_decode_block(MpegEncContext *s, | |||
| return -1; | |||
| add_coef: | |||
| dprintf("%d: run=%d level=%d\n", n, run, level); | |||
| j = zigzag_direct[i]; | |||
| j = scantable[i]; | |||
| block[j] = level; | |||
| i++; | |||
| } | |||
| @@ -1121,9 +1107,9 @@ static int mpeg2_decode_block_non_intra(MpegEncContext *s, | |||
| int mismatch; | |||
| if (s->alternate_scan) | |||
| scan_table = ff_alternate_vertical_scan; | |||
| scan_table = s->intra_v_scantable.permutated; | |||
| else | |||
| scan_table = zigzag_direct; | |||
| scan_table = s->intra_scantable.permutated; | |||
| mismatch = 1; | |||
| { | |||
| @@ -1140,7 +1126,7 @@ static int mpeg2_decode_block_non_intra(MpegEncContext *s, | |||
| v= SHOW_UBITS(re, &s->gb, 2); | |||
| if (v & 2) { | |||
| run = 0; | |||
| level = 1 - ((v & 1) << 1); | |||
| level = 5 - (v << 1); | |||
| SKIP_BITS(re, &s->gb, 2); | |||
| CLOSE_READER(re, &s->gb); | |||
| goto add_coef; | |||
| @@ -1191,6 +1177,7 @@ static int mpeg2_decode_block_non_intra(MpegEncContext *s, | |||
| } | |||
| block[63] ^= (mismatch & 1); | |||
| s->block_last_index[n] = i; | |||
| return 0; | |||
| } | |||
| @@ -1206,9 +1193,9 @@ static int mpeg2_decode_block_intra(MpegEncContext *s, | |||
| int mismatch; | |||
| if (s->alternate_scan) | |||
| scan_table = ff_alternate_vertical_scan; | |||
| scan_table = s->intra_v_scantable.permutated; | |||
| else | |||
| scan_table = zigzag_direct; | |||
| scan_table = s->intra_scantable.permutated; | |||
| /* DC coef */ | |||
| component = (n <= 3 ? 0 : n - 4 + 1); | |||
| @@ -1402,7 +1389,7 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s) | |||
| if (get_bits1(&s->gb)) { | |||
| for(i=0;i<64;i++) { | |||
| v = get_bits(&s->gb, 8); | |||
| j = zigzag_direct[i]; | |||
| j = s->intra_scantable.permutated[i]; | |||
| s->intra_matrix[j] = v; | |||
| s->chroma_intra_matrix[j] = v; | |||
| } | |||
| @@ -1410,7 +1397,7 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s) | |||
| if (get_bits1(&s->gb)) { | |||
| for(i=0;i<64;i++) { | |||
| v = get_bits(&s->gb, 8); | |||
| j = zigzag_direct[i]; | |||
| j = s->intra_scantable.permutated[i]; | |||
| s->inter_matrix[j] = v; | |||
| s->chroma_inter_matrix[j] = v; | |||
| } | |||
| @@ -1418,14 +1405,14 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s) | |||
| if (get_bits1(&s->gb)) { | |||
| for(i=0;i<64;i++) { | |||
| v = get_bits(&s->gb, 8); | |||
| j = zigzag_direct[i]; | |||
| j = s->intra_scantable.permutated[i]; | |||
| s->chroma_intra_matrix[j] = v; | |||
| } | |||
| } | |||
| if (get_bits1(&s->gb)) { | |||
| for(i=0;i<64;i++) { | |||
| v = get_bits(&s->gb, 8); | |||
| j = zigzag_direct[i]; | |||
| j = s->intra_scantable.permutated[i]; | |||
| s->chroma_inter_matrix[j] = v; | |||
| } | |||
| } | |||
| @@ -1636,7 +1623,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, | |||
| if (get_bits1(&s->gb)) { | |||
| for(i=0;i<64;i++) { | |||
| v = get_bits(&s->gb, 8); | |||
| j = zigzag_direct[i]; | |||
| j = s->intra_scantable.permutated[i]; | |||
| s->intra_matrix[j] = v; | |||
| s->chroma_intra_matrix[j] = v; | |||
| } | |||
| @@ -1648,15 +1635,16 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, | |||
| #endif | |||
| } else { | |||
| for(i=0;i<64;i++) { | |||
| int j= s->idct_permutation[i]; | |||
| v = ff_mpeg1_default_intra_matrix[i]; | |||
| s->intra_matrix[i] = v; | |||
| s->chroma_intra_matrix[i] = v; | |||
| s->intra_matrix[j] = v; | |||
| s->chroma_intra_matrix[j] = v; | |||
| } | |||
| } | |||
| if (get_bits1(&s->gb)) { | |||
| for(i=0;i<64;i++) { | |||
| v = get_bits(&s->gb, 8); | |||
| j = zigzag_direct[i]; | |||
| j = s->intra_scantable.permutated[i]; | |||
| s->inter_matrix[j] = v; | |||
| s->chroma_inter_matrix[j] = v; | |||
| } | |||
| @@ -1668,9 +1656,10 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, | |||
| #endif | |||
| } else { | |||
| for(i=0;i<64;i++) { | |||
| int j= s->idct_permutation[i]; | |||
| v = ff_mpeg1_default_non_intra_matrix[i]; | |||
| s->inter_matrix[i] = v; | |||
| s->chroma_inter_matrix[i] = v; | |||
| s->inter_matrix[j] = v; | |||
| s->chroma_inter_matrix[j] = v; | |||
| } | |||
| } | |||
| @@ -2,7 +2,7 @@ | |||
| * MPEG1/2 tables | |||
| */ | |||
| INT16 ff_mpeg1_default_intra_matrix[64] = { | |||
| const INT16 ff_mpeg1_default_intra_matrix[64] = { | |||
| 8, 16, 19, 22, 26, 27, 29, 34, | |||
| 16, 16, 22, 24, 27, 29, 34, 37, | |||
| 19, 22, 26, 27, 29, 34, 34, 38, | |||
| @@ -13,7 +13,7 @@ INT16 ff_mpeg1_default_intra_matrix[64] = { | |||
| 27, 29, 35, 38, 46, 56, 69, 83 | |||
| }; | |||
| INT16 ff_mpeg1_default_non_intra_matrix[64] = { | |||
| const INT16 ff_mpeg1_default_non_intra_matrix[64] = { | |||
| 16, 16, 16, 16, 16, 16, 16, 16, | |||
| 16, 16, 16, 16, 16, 16, 16, 16, | |||
| 16, 16, 16, 16, 16, 16, 16, 16, | |||
| @@ -135,7 +135,7 @@ static const UINT16 pixel_aspect[16][2]={ | |||
| }; | |||
| /* these matrixes will be permuted for the idct */ | |||
| INT16 ff_mpeg4_default_intra_matrix[64] = { | |||
| const INT16 ff_mpeg4_default_intra_matrix[64] = { | |||
| 8, 17, 18, 19, 21, 23, 25, 27, | |||
| 17, 18, 19, 21, 23, 25, 27, 28, | |||
| 20, 21, 22, 23, 24, 26, 28, 30, | |||
| @@ -146,7 +146,7 @@ INT16 ff_mpeg4_default_intra_matrix[64] = { | |||
| 27, 28, 30, 32, 35, 38, 41, 45, | |||
| }; | |||
| INT16 ff_mpeg4_default_non_intra_matrix[64] = { | |||
| const INT16 ff_mpeg4_default_non_intra_matrix[64] = { | |||
| 16, 17, 18, 19, 20, 21, 22, 23, | |||
| 17, 18, 19, 20, 21, 22, 23, 24, | |||
| 18, 19, 20, 21, 22, 23, 24, 25, | |||
| @@ -23,11 +23,15 @@ | |||
| #include "avcodec.h" | |||
| #include "dsputil.h" | |||
| #include "mpegvideo.h" | |||
| #include "simple_idct.h" | |||
| #ifdef USE_FASTMEMCPY | |||
| #include "fastmemcpy.h" | |||
| #endif | |||
| //#undef NDEBUG | |||
| //#include <assert.h> | |||
| static void encode_picture(MpegEncContext *s, int picture_number); | |||
| static void dct_unquantize_mpeg1_c(MpegEncContext *s, | |||
| DCTELEM *block, int n, int qscale); | |||
| @@ -72,8 +76,6 @@ static UINT8 h263_chroma_roundtab[16] = { | |||
| static UINT16 default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1]; | |||
| static UINT8 default_fcode_tab[MAX_MV*2+1]; | |||
| extern UINT8 zigzag_end[64]; | |||
| /* default motion estimation */ | |||
| int motion_estimation_method = ME_EPZS; | |||
| @@ -86,7 +88,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 | |||
| int i; | |||
| if (s->fdct == ff_jpeg_fdct_islow) { | |||
| for(i=0;i<64;i++) { | |||
| const int j= block_permute_op(i); | |||
| const int j= s->idct_permutation[i]; | |||
| /* 16 <= qscale * quant_matrix[i] <= 7905 */ | |||
| /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ | |||
| /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ | |||
| @@ -97,7 +99,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 | |||
| } | |||
| } else if (s->fdct == fdct_ifast) { | |||
| for(i=0;i<64;i++) { | |||
| const int j= block_permute_op(i); | |||
| const int j= s->idct_permutation[i]; | |||
| /* 16 <= qscale * quant_matrix[i] <= 7905 */ | |||
| /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ | |||
| /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ | |||
| @@ -108,13 +110,14 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 | |||
| } | |||
| } else { | |||
| for(i=0;i<64;i++) { | |||
| const int j= s->idct_permutation[i]; | |||
| /* We can safely suppose that 16 <= quant_matrix[i] <= 255 | |||
| So 16 <= qscale * quant_matrix[i] <= 7905 | |||
| so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905 | |||
| so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67 | |||
| */ | |||
| qmat [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]); | |||
| qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]); | |||
| qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]); | |||
| if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1; | |||
| qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]); | |||
| @@ -131,6 +134,50 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 | |||
| goto fail;\ | |||
| }\ | |||
| } | |||
| /* | |||
| static void build_end(void) | |||
| { | |||
| int lastIndex; | |||
| int lastIndexAfterPerm=0; | |||
| for(lastIndex=0; lastIndex<64; lastIndex++) | |||
| { | |||
| if(ff_zigzag_direct[lastIndex] > lastIndexAfterPerm) | |||
| lastIndexAfterPerm= ff_zigzag_direct[lastIndex]; | |||
| zigzag_end[lastIndex]= lastIndexAfterPerm + 1; | |||
| } | |||
| } | |||
| */ | |||
| void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable){ | |||
| int i; | |||
| int end; | |||
| for(i=0; i<64; i++){ | |||
| int j; | |||
| j = src_scantable[i]; | |||
| st->permutated[i] = s->idct_permutation[j]; | |||
| } | |||
| end=-1; | |||
| for(i=0; i<64; i++){ | |||
| int j; | |||
| j = st->permutated[i]; | |||
| if(j>end) end=j; | |||
| st->raster_end[i]= end; | |||
| } | |||
| } | |||
| /* XXX: those functions should be suppressed ASAP when all IDCTs are | |||
| converted */ | |||
| static void ff_jref_idct_put(UINT8 *dest, int line_size, DCTELEM *block) | |||
| { | |||
| j_rev_dct (block); | |||
| put_pixels_clamped(block, dest, line_size); | |||
| } | |||
| static void ff_jref_idct_add(UINT8 *dest, int line_size, DCTELEM *block) | |||
| { | |||
| j_rev_dct (block); | |||
| add_pixels_clamped(block, dest, line_size); | |||
| } | |||
| /* init common structure for both encoder and decoder */ | |||
| int MPV_common_init(MpegEncContext *s) | |||
| @@ -146,7 +193,19 @@ int MPV_common_init(MpegEncContext *s) | |||
| if(s->avctx->dct_algo==FF_DCT_FASTINT) | |||
| s->fdct = fdct_ifast; | |||
| else | |||
| s->fdct = ff_jpeg_fdct_islow; | |||
| s->fdct = ff_jpeg_fdct_islow; //slow/accurate/default | |||
| if(s->avctx->idct_algo==FF_IDCT_INT){ | |||
| s->idct_put= ff_jref_idct_put; | |||
| s->idct_add= ff_jref_idct_add; | |||
| for(i=0; i<64; i++) | |||
| s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |||
| }else{ //accurate/default | |||
| s->idct_put= simple_idct_put; | |||
| s->idct_add= simple_idct_add; | |||
| for(i=0; i<64; i++) | |||
| s->idct_permutation[i]= i; | |||
| } | |||
| #ifdef HAVE_MMX | |||
| MPV_common_init_mmx(s); | |||
| @@ -157,6 +216,15 @@ int MPV_common_init(MpegEncContext *s) | |||
| #ifdef HAVE_MLIB | |||
| MPV_common_init_mlib(s); | |||
| #endif | |||
| /* load & permutate scantables | |||
| note: only wmv uses differnt ones | |||
| */ | |||
| ff_init_scantable(s, &s->inter_scantable , ff_zigzag_direct); | |||
| ff_init_scantable(s, &s->intra_scantable , ff_zigzag_direct); | |||
| ff_init_scantable(s, &s->intra_h_scantable, ff_alternate_horizontal_scan); | |||
| ff_init_scantable(s, &s->intra_v_scantable, ff_alternate_vertical_scan); | |||
| s->mb_width = (s->width + 15) / 16; | |||
| s->mb_height = (s->height + 15) / 16; | |||
| @@ -577,13 +645,6 @@ int MPV_encode_init(AVCodecContext *avctx) | |||
| s->y_dc_scale_table= | |||
| s->c_dc_scale_table= ff_mpeg1_dc_scale_table; | |||
| if (s->out_format == FMT_H263) | |||
| h263_encode_init(s); | |||
| else if (s->out_format == FMT_MPEG1) | |||
| ff_mpeg1_encode_init(s); | |||
| if(s->msmpeg4_version) | |||
| ff_msmpeg4_encode_init(s); | |||
| /* dont use mv_penalty table for crap MV as it would be confused */ | |||
| if (s->me_method < ME_EPZS) s->mv_penalty = default_mv_penalty; | |||
| @@ -593,17 +654,25 @@ int MPV_encode_init(AVCodecContext *avctx) | |||
| if (MPV_common_init(s) < 0) | |||
| return -1; | |||
| if (s->out_format == FMT_H263) | |||
| h263_encode_init(s); | |||
| else if (s->out_format == FMT_MPEG1) | |||
| ff_mpeg1_encode_init(s); | |||
| if(s->msmpeg4_version) | |||
| ff_msmpeg4_encode_init(s); | |||
| /* init default q matrix */ | |||
| for(i=0;i<64;i++) { | |||
| int j= s->idct_permutation[i]; | |||
| if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){ | |||
| s->intra_matrix[i] = ff_mpeg4_default_intra_matrix[i]; | |||
| s->inter_matrix[i] = ff_mpeg4_default_non_intra_matrix[i]; | |||
| s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i]; | |||
| s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i]; | |||
| }else if(s->out_format == FMT_H263){ | |||
| s->intra_matrix[i] = | |||
| s->inter_matrix[i] = ff_mpeg1_default_non_intra_matrix[i]; | |||
| s->intra_matrix[j] = | |||
| s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i]; | |||
| }else{ /* mpeg1 */ | |||
| s->intra_matrix[i] = ff_mpeg1_default_intra_matrix[i]; | |||
| s->inter_matrix[i] = ff_mpeg1_default_non_intra_matrix[i]; | |||
| s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i]; | |||
| s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i]; | |||
| } | |||
| } | |||
| @@ -1450,7 +1519,7 @@ static inline void put_dct(MpegEncContext *s, | |||
| { | |||
| if (!s->mpeg2) | |||
| s->dct_unquantize(s, block, i, s->qscale); | |||
| ff_idct_put (dest, line_size, block); | |||
| s->idct_put (dest, line_size, block); | |||
| } | |||
| /* add block[] to dest[] */ | |||
| @@ -1458,7 +1527,7 @@ static inline void add_dct(MpegEncContext *s, | |||
| DCTELEM *block, int i, UINT8 *dest, int line_size) | |||
| { | |||
| if (s->block_last_index[i] >= 0) { | |||
| ff_idct_add (dest, line_size, block); | |||
| s->idct_add (dest, line_size, block); | |||
| } | |||
| } | |||
| @@ -1468,7 +1537,7 @@ static inline void add_dequant_dct(MpegEncContext *s, | |||
| if (s->block_last_index[i] >= 0) { | |||
| s->dct_unquantize(s, block, i, s->qscale); | |||
| ff_idct_add (dest, line_size, block); | |||
| s->idct_add (dest, line_size, block); | |||
| } | |||
| } | |||
| @@ -1720,7 +1789,7 @@ static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int th | |||
| if(last_index<=skip_dc - 1) return; | |||
| for(i=0; i<=last_index; i++){ | |||
| const int j = zigzag_direct[i]; | |||
| const int j = s->intra_scantable.permutated[i]; | |||
| const int level = ABS(block[j]); | |||
| if(level==1){ | |||
| if(skip_dc && i==0) continue; | |||
| @@ -1734,7 +1803,7 @@ static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int th | |||
| } | |||
| if(score >= threshold) return; | |||
| for(i=skip_dc; i<=last_index; i++){ | |||
| const int j = zigzag_direct[i]; | |||
| const int j = s->intra_scantable.permutated[i]; | |||
| block[j]=0; | |||
| } | |||
| if(block[0]) s->block_last_index[n]= 0; | |||
| @@ -1746,9 +1815,14 @@ static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index | |||
| int i; | |||
| const int maxlevel= s->max_qcoeff; | |||
| const int minlevel= s->min_qcoeff; | |||
| for(i=0;i<=last_index; i++){ | |||
| const int j = zigzag_direct[i]; | |||
| if(s->mb_intra){ | |||
| i=1; //skip clipping of intra dc | |||
| }else | |||
| i=0; | |||
| for(;i<=last_index; i++){ | |||
| const int j= s->intra_scantable.permutated[i]; | |||
| int level = block[j]; | |||
| if (level>maxlevel) level=maxlevel; | |||
| @@ -1760,22 +1834,22 @@ static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index | |||
| static inline void requantize_coeffs(MpegEncContext *s, DCTELEM block[64], int oldq, int newq, int n) | |||
| { | |||
| int i; | |||
| if(s->mb_intra){ | |||
| //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...) | |||
| i=1; | |||
| if(s->mb_intra){ | |||
| i=1; //skip clipping of intra dc | |||
| //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...) | |||
| }else | |||
| i=0; | |||
| for(;i<=s->block_last_index[n]; i++){ | |||
| const int j = zigzag_direct[i]; | |||
| const int j = s->intra_scantable.permutated[i]; | |||
| int level = block[j]; | |||
| block[j]= ROUNDED_DIV(level*oldq, newq); | |||
| } | |||
| for(i=s->block_last_index[n]; i>=0; i--){ | |||
| const int j = zigzag_direct[i]; //FIXME other scantabs | |||
| const int j = s->intra_scantable.permutated[i]; | |||
| if(block[j]) break; | |||
| } | |||
| s->block_last_index[n]= i; | |||
| @@ -1791,11 +1865,14 @@ static inline void auto_requantize_coeffs(MpegEncContext *s, DCTELEM block[6][64 | |||
| assert(s->adaptive_quant); | |||
| for(n=0; n<6; n++){ | |||
| if(s->mb_intra) i=1; | |||
| else i=0; | |||
| if(s->mb_intra){ | |||
| i=1; //skip clipping of intra dc | |||
| //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...) | |||
| }else | |||
| i=0; | |||
| for(;i<=s->block_last_index[n]; i++){ | |||
| const int j = zigzag_direct[i]; //FIXME other scantabs | |||
| const int j = s->intra_scantable.permutated[i]; | |||
| int level = block[n][j]; | |||
| if(largest < level) largest = level; | |||
| if(smallest > level) smallest= level; | |||
| @@ -2379,8 +2456,11 @@ static void encode_picture(MpegEncContext *s, int picture_number) | |||
| if (s->out_format == FMT_MJPEG) { | |||
| /* for mjpeg, we do include qscale in the matrix */ | |||
| s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0]; | |||
| for(i=1;i<64;i++) | |||
| s->intra_matrix[i] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); | |||
| for(i=1;i<64;i++){ | |||
| int j= s->idct_permutation[i]; | |||
| s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); | |||
| } | |||
| convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, | |||
| s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias); | |||
| } | |||
| @@ -2752,7 +2832,7 @@ static int dct_quantize_c(MpegEncContext *s, | |||
| #ifndef ARCH_ALPHA /* Alpha uses unpermuted matrix */ | |||
| /* we need this permutation so that we correct the IDCT | |||
| permutation. will be moved into DCT code */ | |||
| block_permute(block); | |||
| block_permute(block, s->idct_permutation); //FIXME remove | |||
| #endif | |||
| if (s->mb_intra) { | |||
| @@ -2782,7 +2862,7 @@ static int dct_quantize_c(MpegEncContext *s, | |||
| threshold2= (threshold1<<1); | |||
| for(;i<64;i++) { | |||
| j = zigzag_direct[i]; | |||
| j = s->intra_scantable.permutated[i]; | |||
| level = block[j]; | |||
| level = level * qmat[j]; | |||
| @@ -2813,8 +2893,7 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s, | |||
| int i, level, nCoeffs; | |||
| const UINT16 *quant_matrix; | |||
| if(s->alternate_scan) nCoeffs= 64; | |||
| else nCoeffs= s->block_last_index[n]+1; | |||
| nCoeffs= s->block_last_index[n]; | |||
| if (s->mb_intra) { | |||
| if (n < 4) | |||
| @@ -2823,8 +2902,8 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s, | |||
| block[0] = block[0] * s->c_dc_scale; | |||
| /* XXX: only mpeg1 */ | |||
| quant_matrix = s->intra_matrix; | |||
| for(i=1;i<nCoeffs;i++) { | |||
| int j= zigzag_direct[i]; | |||
| for(i=1;i<=nCoeffs;i++) { | |||
| int j= s->intra_scantable.permutated[i]; | |||
| level = block[j]; | |||
| if (level) { | |||
| if (level < 0) { | |||
| @@ -2846,8 +2925,8 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s, | |||
| } else { | |||
| i = 0; | |||
| quant_matrix = s->inter_matrix; | |||
| for(;i<nCoeffs;i++) { | |||
| int j= zigzag_direct[i]; | |||
| for(;i<=nCoeffs;i++) { | |||
| int j= s->intra_scantable.permutated[i]; | |||
| level = block[j]; | |||
| if (level) { | |||
| if (level < 0) { | |||
| @@ -2877,8 +2956,8 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s, | |||
| int i, level, nCoeffs; | |||
| const UINT16 *quant_matrix; | |||
| if(s->alternate_scan) nCoeffs= 64; | |||
| else nCoeffs= s->block_last_index[n]+1; | |||
| if(s->alternate_scan) nCoeffs= 63; | |||
| else nCoeffs= s->block_last_index[n]; | |||
| if (s->mb_intra) { | |||
| if (n < 4) | |||
| @@ -2886,8 +2965,8 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s, | |||
| else | |||
| block[0] = block[0] * s->c_dc_scale; | |||
| quant_matrix = s->intra_matrix; | |||
| for(i=1;i<nCoeffs;i++) { | |||
| int j= zigzag_direct[i]; | |||
| for(i=1;i<=nCoeffs;i++) { | |||
| int j= s->intra_scantable.permutated[i]; | |||
| level = block[j]; | |||
| if (level) { | |||
| if (level < 0) { | |||
| @@ -2908,8 +2987,8 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s, | |||
| int sum=-1; | |||
| i = 0; | |||
| quant_matrix = s->inter_matrix; | |||
| for(;i<nCoeffs;i++) { | |||
| int j= zigzag_direct[i]; | |||
| for(;i<=nCoeffs;i++) { | |||
| int j= s->intra_scantable.permutated[i]; | |||
| level = block[j]; | |||
| if (level) { | |||
| if (level < 0) { | |||
| @@ -2940,27 +3019,27 @@ static void dct_unquantize_h263_c(MpegEncContext *s, | |||
| int i, level, qmul, qadd; | |||
| int nCoeffs; | |||
| assert(s->block_last_index[n]>=0); | |||
| qadd = (qscale - 1) | 1; | |||
| qmul = qscale << 1; | |||
| if (s->mb_intra) { | |||
| if (!s->h263_aic) { | |||
| if (n < 4) | |||
| block[0] = block[0] * s->y_dc_scale; | |||
| else | |||
| block[0] = block[0] * s->c_dc_scale; | |||
| } | |||
| }else | |||
| qadd = 0; | |||
| i = 1; | |||
| nCoeffs= 64; //does not allways use zigzag table | |||
| nCoeffs= 63; //does not allways use zigzag table | |||
| } else { | |||
| i = 0; | |||
| nCoeffs= zigzag_end[ s->block_last_index[n] ]; | |||
| nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; | |||
| } | |||
| qmul = s->qscale << 1; | |||
| if (s->h263_aic && s->mb_intra) | |||
| qadd = 0; | |||
| else | |||
| qadd = (s->qscale - 1) | 1; | |||
| for(;i<nCoeffs;i++) { | |||
| for(;i<=nCoeffs;i++) { | |||
| level = block[i]; | |||
| if (level) { | |||
| if (level < 0) { | |||
| @@ -99,6 +99,11 @@ typedef struct ReorderBuffer{ | |||
| int picture_in_gop_number; | |||
| } ReorderBuffer; | |||
| typedef struct ScanTable{ | |||
| UINT8 permutated[64]; | |||
| UINT8 raster_end[64]; | |||
| } ScanTable; | |||
| typedef struct MpegEncContext { | |||
| struct AVCodecContext *avctx; | |||
| /* the following parameters must be initialized before encoding */ | |||
| @@ -286,6 +291,12 @@ typedef struct MpegEncContext { | |||
| UINT16 __align8 q_intra_matrix16_bias[32][64]; | |||
| UINT16 __align8 q_inter_matrix16_bias[32][64]; | |||
| int block_last_index[6]; /* last non zero coefficient in block */ | |||
| /* scantables */ | |||
| ScanTable intra_scantable; | |||
| ScanTable intra_h_scantable; | |||
| ScanTable intra_v_scantable; | |||
| ScanTable inter_scantable; // if inter == intra then intra should be used to reduce tha cache usage | |||
| UINT8 idct_permutation[64]; | |||
| void *opaque; /* private data for the user */ | |||
| @@ -421,10 +432,6 @@ typedef struct MpegEncContext { | |||
| int per_mb_rl_table; | |||
| int esc3_level_length; | |||
| int esc3_run_length; | |||
| UINT8 *inter_scantable; | |||
| UINT8 *intra_scantable; | |||
| UINT8 *intra_v_scantable; | |||
| UINT8 *intra_h_scantable; | |||
| /* [mb_intra][isChroma][level][run][last] */ | |||
| int (*ac_stats)[2][MAX_LEVEL+1][MAX_RUN+1][2]; | |||
| int inter_intra_pred; | |||
| @@ -477,7 +484,9 @@ typedef struct MpegEncContext { | |||
| void (*dct_unquantize)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both) | |||
| DCTELEM *block, int n, int qscale); | |||
| int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow); | |||
| void (*fdct)(DCTELEM *block); | |||
| void (*fdct)(DCTELEM *block/* align 16*/); | |||
| void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | |||
| void (*idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | |||
| } MpegEncContext; | |||
| int MPV_common_init(MpegEncContext *s); | |||
| @@ -498,6 +507,7 @@ extern void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w); | |||
| void ff_conceal_past_errors(MpegEncContext *s, int conceal_all); | |||
| void ff_copy_bits(PutBitContext *pb, UINT8 *src, int length); | |||
| void ff_clean_intra_table_entries(MpegEncContext *s); | |||
| void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable); | |||
| extern int ff_bit_exact; | |||
| @@ -511,8 +521,8 @@ void ff_fix_long_p_mvs(MpegEncContext * s); | |||
| void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type); | |||
| /* mpeg12.c */ | |||
| extern INT16 ff_mpeg1_default_intra_matrix[64]; | |||
| extern INT16 ff_mpeg1_default_non_intra_matrix[64]; | |||
| extern const INT16 ff_mpeg1_default_intra_matrix[64]; | |||
| extern const INT16 ff_mpeg1_default_non_intra_matrix[64]; | |||
| extern UINT8 ff_mpeg1_dc_scale_table[128]; | |||
| void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number); | |||
| @@ -551,8 +561,8 @@ static inline int get_rl_index(const RLTable *rl, int last, int run, int level) | |||
| extern UINT8 ff_mpeg4_y_dc_scale_table[32]; | |||
| extern UINT8 ff_mpeg4_c_dc_scale_table[32]; | |||
| extern INT16 ff_mpeg4_default_intra_matrix[64]; | |||
| extern INT16 ff_mpeg4_default_non_intra_matrix[64]; | |||
| extern const INT16 ff_mpeg4_default_intra_matrix[64]; | |||
| extern const INT16 ff_mpeg4_default_non_intra_matrix[64]; | |||
| void h263_encode_mb(MpegEncContext *s, | |||
| DCTELEM block[6][64], | |||
| @@ -164,32 +164,21 @@ static void common_init(MpegEncContext * s) | |||
| break; | |||
| } | |||
| if(s->msmpeg4_version==4){ | |||
| s->intra_scantable = wmv1_scantable[1]; | |||
| s->intra_h_scantable= wmv1_scantable[2]; | |||
| s->intra_v_scantable= wmv1_scantable[3]; | |||
| s->inter_scantable = wmv1_scantable[0]; | |||
| }else{ | |||
| s->intra_scantable = zigzag_direct; | |||
| s->intra_h_scantable= ff_alternate_horizontal_scan; | |||
| s->intra_v_scantable= ff_alternate_vertical_scan; | |||
| s->inter_scantable = zigzag_direct; | |||
| int i; | |||
| ff_init_scantable(s, &s->intra_scantable , wmv1_scantable[1]); | |||
| ff_init_scantable(s, &s->intra_h_scantable, wmv1_scantable[2]); | |||
| ff_init_scantable(s, &s->intra_v_scantable, wmv1_scantable[3]); | |||
| ff_init_scantable(s, &s->inter_scantable , wmv1_scantable[0]); | |||
| } | |||
| //Note the default tables are set in common_init in mpegvideo.c | |||
| if(!inited){ | |||
| int i; | |||
| inited=1; | |||
| init_h263_dc_for_msmpeg4(); | |||
| /* permute for IDCT */ | |||
| for(i=0; i<WMV1_SCANTABLE_COUNT; i++){ | |||
| int k; | |||
| for(k=0;k<64;k++) { | |||
| int j = wmv1_scantable[i][k]; | |||
| wmv1_scantable[i][k]= block_permute_op(j); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -936,7 +925,7 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int | |||
| rl = &rl_table[3 + s->rl_chroma_table_index]; | |||
| } | |||
| run_diff = 0; | |||
| scantable= s->intra_scantable; | |||
| scantable= s->intra_scantable.permutated; | |||
| set_stat(ST_INTRA_AC); | |||
| } else { | |||
| i = 0; | |||
| @@ -945,12 +934,12 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int | |||
| run_diff = 0; | |||
| else | |||
| run_diff = 1; | |||
| scantable= s->inter_scantable; | |||
| scantable= s->inter_scantable.permutated; | |||
| set_stat(ST_INTER_AC); | |||
| } | |||
| /* recalculate block_last_index for M$ wmv1 */ | |||
| if(scantable!=zigzag_direct && s->block_last_index[n]>0){ | |||
| if(s->msmpeg4_version==4 && s->block_last_index[n]>0){ | |||
| for(last_index=63; last_index>=0; last_index--){ | |||
| if(block[scantable[last_index]]) break; | |||
| } | |||
| @@ -1704,11 +1693,11 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||
| } | |||
| if (s->ac_pred) { | |||
| if (dc_pred_dir == 0) | |||
| scan_table = s->intra_v_scantable; /* left */ | |||
| scan_table = s->intra_v_scantable.permutated; /* left */ | |||
| else | |||
| scan_table = s->intra_h_scantable; /* top */ | |||
| scan_table = s->intra_h_scantable.permutated; /* top */ | |||
| } else { | |||
| scan_table = s->intra_scantable; | |||
| scan_table = s->intra_scantable.permutated; | |||
| } | |||
| set_stat(ST_INTRA_AC); | |||
| rl_vlc= rl->rl_vlc[0]; | |||
| @@ -1727,7 +1716,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||
| s->block_last_index[n] = i; | |||
| return 0; | |||
| } | |||
| scan_table = s->inter_scantable; | |||
| scan_table = s->inter_scantable.permutated; | |||
| set_stat(ST_INTER_AC); | |||
| rl_vlc= rl->rl_vlc[s->qscale]; | |||
| } | |||
| @@ -1819,7 +1819,7 @@ static UINT8 old_ff_c_dc_scale_table[32]={ | |||
| #define WMV1_SCANTABLE_COUNT 4 | |||
| static UINT8 wmv1_scantable00[64]= { | |||
| static const UINT8 wmv1_scantable00[64]= { | |||
| 0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11, | |||
| 0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28, | |||
| 0x30, 0x38, 0x29, 0x21, 0x1A, 0x13, 0x0C, 0x05, | |||
| @@ -1829,7 +1829,7 @@ static UINT8 wmv1_scantable00[64]= { | |||
| 0x2C, 0x25, 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x35, | |||
| 0x3D, 0x3E, 0x36, 0x2E, 0x27, 0x2F, 0x37, 0x3F, | |||
| }; | |||
| static UINT8 wmv1_scantable01[64]= { | |||
| static const UINT8 wmv1_scantable01[64]= { | |||
| 0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11, | |||
| 0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28, | |||
| 0x21, 0x30, 0x1A, 0x13, 0x0C, 0x05, 0x06, 0x0D, | |||
| @@ -1839,7 +1839,7 @@ static UINT8 wmv1_scantable01[64]= { | |||
| 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3C, 0x35, | |||
| 0x3D, 0x2E, 0x27, 0x2F, 0x36, 0x3E, 0x37, 0x3F, | |||
| }; | |||
| static UINT8 wmv1_scantable02[64]= { | |||
| static const UINT8 wmv1_scantable02[64]= { | |||
| 0x00, 0x01, 0x08, 0x02, 0x03, 0x09, 0x10, 0x18, | |||
| 0x11, 0x0A, 0x04, 0x05, 0x0B, 0x12, 0x19, 0x20, | |||
| 0x28, 0x30, 0x21, 0x1A, 0x13, 0x0C, 0x06, 0x07, | |||
| @@ -1849,7 +1849,7 @@ static UINT8 wmv1_scantable02[64]= { | |||
| 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3B, 0x3C, 0x35, | |||
| 0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F, | |||
| }; | |||
| static UINT8 wmv1_scantable03[64]= { | |||
| static const UINT8 wmv1_scantable03[64]= { | |||
| 0x00, 0x08, 0x10, 0x01, 0x18, 0x20, 0x28, 0x09, | |||
| 0x02, 0x03, 0x0A, 0x11, 0x19, 0x30, 0x38, 0x29, | |||
| 0x21, 0x1A, 0x12, 0x0B, 0x04, 0x05, 0x0C, 0x13, | |||
| @@ -1860,7 +1860,7 @@ static UINT8 wmv1_scantable03[64]= { | |||
| 0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F, | |||
| }; | |||
| static UINT8 *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={ | |||
| static const UINT8 *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={ | |||
| wmv1_scantable00, | |||
| wmv1_scantable01, | |||
| wmv1_scantable02, | |||
| @@ -20,5 +20,7 @@ | |||
| void simple_idct_put(UINT8 *dest, int line_size, INT16 *block); | |||
| void simple_idct_add(UINT8 *dest, int line_size, INT16 *block); | |||
| void simple_idct_mmx(short *block); | |||
| void ff_simple_idct_mmx(short *block); | |||
| void ff_simple_idct_add_mmx(UINT8 *dest, int line_size, INT16 *block); | |||
| void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, INT16 *block); | |||
| void simple_idct(short *block); | |||
| @@ -51,8 +51,8 @@ do_ffmpeg() | |||
| { | |||
| f="$1" | |||
| shift | |||
| echo $ffmpeg -bitexact -dct_algo 1 $* | |||
| $ffmpeg -bitexact -dct_algo 1 -benchmark $* > $datadir/bench.tmp | |||
| echo $ffmpeg -bitexact -dct_algo 1 -idct_algo 2 $* | |||
| $ffmpeg -bitexact -dct_algo 1 -idct_algo 2 -benchmark $* > $datadir/bench.tmp | |||
| md5sum -b $f >> $logfile | |||
| expr "`cat $datadir/bench.tmp`" : '.*utime=\(.*s\)' > $datadir/bench2.tmp | |||
| echo `cat $datadir/bench2.tmp` $f >> $benchfile | |||
| @@ -62,8 +62,8 @@ do_ffmpeg_crc() | |||
| { | |||
| f="$1" | |||
| shift | |||
| echo $ffmpeg -y -bitexact -dct_algo 1 $* -f crc $datadir/ffmpeg.crc | |||
| $ffmpeg -y -bitexact -dct_algo 1 $* -f crc $datadir/ffmpeg.crc | |||
| echo $ffmpeg -y -bitexact -dct_algo 1 -idct_algo 2 $* -f crc $datadir/ffmpeg.crc | |||
| $ffmpeg -y -bitexact -dct_algo 1 -idct_algo 2 $* -f crc $datadir/ffmpeg.crc | |||
| echo -n "$f " >> $logfile | |||
| cat $datadir/ffmpeg.crc >> $logfile | |||
| } | |||