* qatar/master: (35 commits) flvdec: Do not call parse_keyframes_index with a NULL stream libspeexdec: include system headers before local headers libspeexdec: return meaningful error codes libspeexdec: cosmetics: reindent libspeexdec: decode one frame at a time. swscale: fix signed shift overflows in ff_yuv2rgb_c_init_tables() Move timefilter code from lavf to lavd. mov: add support for hdvd and pgapmetadata atoms mov: rename function _stik, some indentation cosmetics mov: rename function _int8 to remove ambiguity, some indentation cosmetics mov: parse the gnre atom mp3on4: check for allocation failures in decode_init_mp3on4() mp3on4: create a separate flush function for MP3onMP4. mp3on4: ensure that the frame channel count does not exceed the codec channel count. mp3on4: set channel layout mp3on4: fix the output channel order mp3on4: allocate temp buffer with av_malloc() instead of on the stack. mp3on4: copy MPADSPContext from first context to all contexts. fmtconvert: port float_to_int16_interleave() 2-channel x86 inline asm to yasm fmtconvert: port int32_to_float_fmul_scalar() x86 inline asm to yasm ... Conflicts: libavcodec/arm/h264dsp_init_arm.c libavcodec/h264.c libavcodec/h264.h libavcodec/h264_cabac.c libavcodec/h264_cavlc.c libavcodec/h264_ps.c libavcodec/h264dsp_template.c libavcodec/h264idct_template.c libavcodec/h264pred.c libavcodec/h264pred_template.c libavcodec/x86/h264dsp_mmx.c libavdevice/Makefile libavdevice/jack_audio.c libavformat/Makefile libavformat/flvdec.c libavformat/flvenc.c libavutil/pixfmt.h libswscale/utils.c Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n0.9
| @@ -67,6 +67,7 @@ easier to use. The changes are: | |||||
| - aevalsrc audio source added | - aevalsrc audio source added | ||||
| - Ut Video decoder | - Ut Video decoder | ||||
| - Speex encoding via libspeex | - Speex encoding via libspeex | ||||
| - 4:2:2 H.264 decoding support | |||||
| version 0.8: | version 0.8: | ||||
| @@ -32,47 +32,22 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, | |||||
| void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, | void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, | ||||
| int beta, int8_t *tc0); | int beta, int8_t *tc0); | ||||
| void ff_weight_h264_pixels_16x16_neon(uint8_t *ds, int stride, int log2_den, | |||||
| int weight, int offset); | |||||
| void ff_weight_h264_pixels_16x8_neon(uint8_t *ds, int stride, int log2_den, | |||||
| int weight, int offset); | |||||
| void ff_weight_h264_pixels_8x16_neon(uint8_t *ds, int stride, int log2_den, | |||||
| int weight, int offset); | |||||
| void ff_weight_h264_pixels_8x8_neon(uint8_t *ds, int stride, int log2_den, | |||||
| int weight, int offset); | |||||
| void ff_weight_h264_pixels_8x4_neon(uint8_t *ds, int stride, int log2_den, | |||||
| int weight, int offset); | |||||
| void ff_weight_h264_pixels_4x8_neon(uint8_t *ds, int stride, int log2_den, | |||||
| int weight, int offset); | |||||
| void ff_weight_h264_pixels_4x4_neon(uint8_t *ds, int stride, int log2_den, | |||||
| int weight, int offset); | |||||
| void ff_weight_h264_pixels_4x2_neon(uint8_t *ds, int stride, int log2_den, | |||||
| int weight, int offset); | |||||
| void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height, | |||||
| int log2_den, int weight, int offset); | |||||
| void ff_weight_h264_pixels_8_neon(uint8_t *dst, int stride, int height, | |||||
| int log2_den, int weight, int offset); | |||||
| void ff_weight_h264_pixels_4_neon(uint8_t *dst, int stride, int height, | |||||
| int log2_den, int weight, int offset); | |||||
| void ff_biweight_h264_pixels_16x16_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| int log2_den, int weightd, int weights, | |||||
| int offset); | |||||
| void ff_biweight_h264_pixels_16x8_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| int log2_den, int weightd, int weights, | |||||
| int offset); | |||||
| void ff_biweight_h264_pixels_8x16_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| int log2_den, int weightd, int weights, | |||||
| int offset); | |||||
| void ff_biweight_h264_pixels_8x8_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| int log2_den, int weightd, int weights, | |||||
| int offset); | |||||
| void ff_biweight_h264_pixels_8x4_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| int log2_den, int weightd, int weights, | |||||
| int offset); | |||||
| void ff_biweight_h264_pixels_4x8_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| int log2_den, int weightd, int weights, | |||||
| int offset); | |||||
| void ff_biweight_h264_pixels_4x4_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| int log2_den, int weightd, int weights, | |||||
| int offset); | |||||
| void ff_biweight_h264_pixels_4x2_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| int log2_den, int weightd, int weights, | |||||
| int offset); | |||||
| void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| int height, int log2_den, int weightd, | |||||
| int weights, int offset); | |||||
| void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| int height, int log2_den, int weightd, | |||||
| int weights, int offset); | |||||
| void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| int height, int log2_den, int weightd, | |||||
| int weights, int offset); | |||||
| void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); | void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); | ||||
| void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); | void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); | ||||
| @@ -101,23 +76,14 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const i | |||||
| c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; | c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; | ||||
| c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; | c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; | ||||
| } | } | ||||
| c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16x16_neon; | |||||
| c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_16x8_neon; | |||||
| c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_8x16_neon; | |||||
| c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels_8x8_neon; | |||||
| c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels_8x4_neon; | |||||
| c->weight_h264_pixels_tab[5] = ff_weight_h264_pixels_4x8_neon; | |||||
| c->weight_h264_pixels_tab[6] = ff_weight_h264_pixels_4x4_neon; | |||||
| c->weight_h264_pixels_tab[7] = ff_weight_h264_pixels_4x2_neon; | |||||
| c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16x16_neon; | |||||
| c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_16x8_neon; | |||||
| c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_8x16_neon; | |||||
| c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels_8x8_neon; | |||||
| c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels_8x4_neon; | |||||
| c->biweight_h264_pixels_tab[5] = ff_biweight_h264_pixels_4x8_neon; | |||||
| c->biweight_h264_pixels_tab[6] = ff_biweight_h264_pixels_4x4_neon; | |||||
| c->biweight_h264_pixels_tab[7] = ff_biweight_h264_pixels_4x2_neon; | |||||
| c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon; | |||||
| c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon; | |||||
| c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_4_neon; | |||||
| c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16_neon; | |||||
| c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_8_neon; | |||||
| c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_4_neon; | |||||
| c->h264_idct_add = ff_h264_idct_add_neon; | c->h264_idct_add = ff_h264_idct_add_neon; | ||||
| c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; | c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; | ||||
| @@ -1592,7 +1592,7 @@ endfunc | |||||
| vdup.8 d1, r5 | vdup.8 d1, r5 | ||||
| vmov q2, q8 | vmov q2, q8 | ||||
| vmov q3, q8 | vmov q3, q8 | ||||
| 1: subs ip, ip, #2 | |||||
| 1: subs r3, r3, #2 | |||||
| vld1.8 {d20-d21},[r0,:128], r2 | vld1.8 {d20-d21},[r0,:128], r2 | ||||
| \macd q2, d0, d20 | \macd q2, d0, d20 | ||||
| pld [r0] | pld [r0] | ||||
| @@ -1632,7 +1632,7 @@ endfunc | |||||
| vdup.8 d1, r5 | vdup.8 d1, r5 | ||||
| vmov q1, q8 | vmov q1, q8 | ||||
| vmov q10, q8 | vmov q10, q8 | ||||
| 1: subs ip, ip, #2 | |||||
| 1: subs r3, r3, #2 | |||||
| vld1.8 {d4},[r0,:64], r2 | vld1.8 {d4},[r0,:64], r2 | ||||
| \macd q1, d0, d4 | \macd q1, d0, d4 | ||||
| pld [r0] | pld [r0] | ||||
| @@ -1662,7 +1662,7 @@ endfunc | |||||
| vdup.8 d1, r5 | vdup.8 d1, r5 | ||||
| vmov q1, q8 | vmov q1, q8 | ||||
| vmov q10, q8 | vmov q10, q8 | ||||
| 1: subs ip, ip, #4 | |||||
| 1: subs r3, r3, #4 | |||||
| vld1.32 {d4[0]},[r0,:32], r2 | vld1.32 {d4[0]},[r0,:32], r2 | ||||
| vld1.32 {d4[1]},[r0,:32], r2 | vld1.32 {d4[1]},[r0,:32], r2 | ||||
| \macd q1, d0, d4 | \macd q1, d0, d4 | ||||
| @@ -1700,16 +1700,17 @@ endfunc | |||||
| .endm | .endm | ||||
| .macro biweight_func w | .macro biweight_func w | ||||
| function biweight_h264_pixels_\w\()_neon | |||||
| function ff_biweight_h264_pixels_\w\()_neon, export=1 | |||||
| push {r4-r6, lr} | push {r4-r6, lr} | ||||
| add r4, sp, #16 | |||||
| ldr r12, [sp, #16] | |||||
| add r4, sp, #20 | |||||
| ldm r4, {r4-r6} | ldm r4, {r4-r6} | ||||
| lsr lr, r4, #31 | lsr lr, r4, #31 | ||||
| add r6, r6, #1 | add r6, r6, #1 | ||||
| eors lr, lr, r5, lsr #30 | eors lr, lr, r5, lsr #30 | ||||
| orr r6, r6, #1 | orr r6, r6, #1 | ||||
| vdup.16 q9, r3 | |||||
| lsl r6, r6, r3 | |||||
| vdup.16 q9, r12 | |||||
| lsl r6, r6, r12 | |||||
| vmvn q9, q9 | vmvn q9, q9 | ||||
| vdup.16 q8, r6 | vdup.16 q8, r6 | ||||
| mov r6, r0 | mov r6, r0 | ||||
| @@ -1730,34 +1731,15 @@ function biweight_h264_pixels_\w\()_neon | |||||
| endfunc | endfunc | ||||
| .endm | .endm | ||||
| .macro biweight_entry w, h, b=1 | |||||
| function ff_biweight_h264_pixels_\w\()x\h\()_neon, export=1 | |||||
| mov ip, #\h | |||||
| .if \b | |||||
| b biweight_h264_pixels_\w\()_neon | |||||
| .endif | |||||
| endfunc | |||||
| .endm | |||||
| biweight_entry 16, 8 | |||||
| biweight_entry 16, 16, b=0 | |||||
| biweight_func 16 | biweight_func 16 | ||||
| biweight_entry 8, 16 | |||||
| biweight_entry 8, 4 | |||||
| biweight_entry 8, 8, b=0 | |||||
| biweight_func 8 | biweight_func 8 | ||||
| biweight_entry 4, 8 | |||||
| biweight_entry 4, 2 | |||||
| biweight_entry 4, 4, b=0 | |||||
| biweight_func 4 | biweight_func 4 | ||||
| @ Weighted prediction | @ Weighted prediction | ||||
| .macro weight_16 add | .macro weight_16 add | ||||
| vdup.8 d0, r3 | |||||
| 1: subs ip, ip, #2 | |||||
| vdup.8 d0, r12 | |||||
| 1: subs r2, r2, #2 | |||||
| vld1.8 {d20-d21},[r0,:128], r1 | vld1.8 {d20-d21},[r0,:128], r1 | ||||
| vmull.u8 q2, d0, d20 | vmull.u8 q2, d0, d20 | ||||
| pld [r0] | pld [r0] | ||||
| @@ -1785,8 +1767,8 @@ endfunc | |||||
| .endm | .endm | ||||
| .macro weight_8 add | .macro weight_8 add | ||||
| vdup.8 d0, r3 | |||||
| 1: subs ip, ip, #2 | |||||
| vdup.8 d0, r12 | |||||
| 1: subs r2, r2, #2 | |||||
| vld1.8 {d4},[r0,:64], r1 | vld1.8 {d4},[r0,:64], r1 | ||||
| vmull.u8 q1, d0, d4 | vmull.u8 q1, d0, d4 | ||||
| pld [r0] | pld [r0] | ||||
| @@ -1806,10 +1788,10 @@ endfunc | |||||
| .endm | .endm | ||||
| .macro weight_4 add | .macro weight_4 add | ||||
| vdup.8 d0, r3 | |||||
| vdup.8 d0, r12 | |||||
| vmov q1, q8 | vmov q1, q8 | ||||
| vmov q10, q8 | vmov q10, q8 | ||||
| 1: subs ip, ip, #4 | |||||
| 1: subs r2, r2, #4 | |||||
| vld1.32 {d4[0]},[r0,:32], r1 | vld1.32 {d4[0]},[r0,:32], r1 | ||||
| vld1.32 {d4[1]},[r0,:32], r1 | vld1.32 {d4[1]},[r0,:32], r1 | ||||
| vmull.u8 q1, d0, d4 | vmull.u8 q1, d0, d4 | ||||
| @@ -1842,50 +1824,32 @@ endfunc | |||||
| .endm | .endm | ||||
| .macro weight_func w | .macro weight_func w | ||||
| function weight_h264_pixels_\w\()_neon | |||||
| function ff_weight_h264_pixels_\w\()_neon, export=1 | |||||
| push {r4, lr} | push {r4, lr} | ||||
| ldr r4, [sp, #8] | |||||
| cmp r2, #1 | |||||
| lsl r4, r4, r2 | |||||
| ldr r12, [sp, #8] | |||||
| ldr r4, [sp, #12] | |||||
| cmp r3, #1 | |||||
| lsl r4, r4, r3 | |||||
| vdup.16 q8, r4 | vdup.16 q8, r4 | ||||
| mov r4, r0 | mov r4, r0 | ||||
| ble 20f | ble 20f | ||||
| rsb lr, r2, #1 | |||||
| rsb lr, r3, #1 | |||||
| vdup.16 q9, lr | vdup.16 q9, lr | ||||
| cmp r3, #0 | |||||
| cmp r12, #0 | |||||
| blt 10f | blt 10f | ||||
| weight_\w vhadd.s16 | weight_\w vhadd.s16 | ||||
| 10: rsb r3, r3, #0 | |||||
| 10: rsb r12, r12, #0 | |||||
| weight_\w vhsub.s16 | weight_\w vhsub.s16 | ||||
| 20: rsb lr, r2, #0 | |||||
| 20: rsb lr, r3, #0 | |||||
| vdup.16 q9, lr | vdup.16 q9, lr | ||||
| cmp r3, #0 | |||||
| cmp r12, #0 | |||||
| blt 10f | blt 10f | ||||
| weight_\w vadd.s16 | weight_\w vadd.s16 | ||||
| 10: rsb r3, r3, #0 | |||||
| 10: rsb r12, r12, #0 | |||||
| weight_\w vsub.s16 | weight_\w vsub.s16 | ||||
| endfunc | endfunc | ||||
| .endm | .endm | ||||
| .macro weight_entry w, h, b=1 | |||||
| function ff_weight_h264_pixels_\w\()x\h\()_neon, export=1 | |||||
| mov ip, #\h | |||||
| .if \b | |||||
| b weight_h264_pixels_\w\()_neon | |||||
| .endif | |||||
| endfunc | |||||
| .endm | |||||
| weight_entry 16, 8 | |||||
| weight_entry 16, 16, b=0 | |||||
| weight_func 16 | weight_func 16 | ||||
| weight_entry 8, 16 | |||||
| weight_entry 8, 4 | |||||
| weight_entry 8, 8, b=0 | |||||
| weight_func 8 | weight_func 8 | ||||
| weight_entry 4, 8 | |||||
| weight_entry 4, 2 | |||||
| weight_entry 4, 4, b=0 | |||||
| weight_func 4 | weight_func 4 | ||||
| @@ -70,7 +70,15 @@ typedef struct FmtConvertContext { | |||||
| long len, int channels); | long len, int channels); | ||||
| /** | /** | ||||
| * Convert an array of interleaved float to multiple arrays of float. | |||||
| * Convert multiple arrays of float to an array of interleaved float. | |||||
| * | |||||
| * @param dst destination array of interleaved float. | |||||
| * constraints: 16-byte aligned | |||||
| * @param src source array of float arrays, one for each channel. | |||||
| * constraints: 16-byte aligned | |||||
| * @param len number of elements to convert. | |||||
| * constraints: multiple of 8 | |||||
| * @param channels number of channels | |||||
| */ | */ | ||||
| void (*float_interleave)(float *dst, const float **src, unsigned int len, | void (*float_interleave)(float *dst, const float **src, unsigned int len, | ||||
| int channels); | int channels); | ||||
| @@ -460,11 +460,14 @@ static void chroma_dc_dct_c(DCTELEM *block){ | |||||
| } | } | ||||
| #endif | #endif | ||||
| static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, | |||||
| uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |||||
| int src_x_offset, int src_y_offset, | |||||
| qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op, | |||||
| int pixel_shift, int chroma444){ | |||||
| static av_always_inline void | |||||
| mc_dir_part(H264Context *h, Picture *pic, int n, int square, | |||||
| int height, int delta, int list, | |||||
| uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |||||
| int src_x_offset, int src_y_offset, | |||||
| qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op, | |||||
| int pixel_shift, int chroma_idc) | |||||
| { | |||||
| MpegEncContext * const s = &h->s; | MpegEncContext * const s = &h->s; | ||||
| const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; | const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; | ||||
| int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; | int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; | ||||
| @@ -479,6 +482,7 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, | |||||
| const int full_my= my>>2; | const int full_my= my>>2; | ||||
| const int pic_width = 16*s->mb_width; | const int pic_width = 16*s->mb_width; | ||||
| const int pic_height = 16*s->mb_height >> MB_FIELD; | const int pic_height = 16*s->mb_height >> MB_FIELD; | ||||
| int ysh; | |||||
| if(mx&7) extra_width -= 3; | if(mx&7) extra_width -= 3; | ||||
| if(my&7) extra_height -= 3; | if(my&7) extra_height -= 3; | ||||
| @@ -487,7 +491,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, | |||||
| || full_my < 0-extra_height | || full_my < 0-extra_height | ||||
| || full_mx + 16/*FIXME*/ > pic_width + extra_width | || full_mx + 16/*FIXME*/ > pic_width + extra_width | ||||
| || full_my + 16/*FIXME*/ > pic_height + extra_height){ | || full_my + 16/*FIXME*/ > pic_height + extra_height){ | ||||
| s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); | |||||
| s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, | |||||
| 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); | |||||
| src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize; | src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize; | ||||
| emu=1; | emu=1; | ||||
| } | } | ||||
| @@ -499,7 +504,7 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, | |||||
| if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return; | if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return; | ||||
| if(chroma444){ | |||||
| if(chroma_idc == 3 /* yuv444 */){ | |||||
| src_cb = pic->f.data[1] + offset; | src_cb = pic->f.data[1] + offset; | ||||
| if(emu){ | if(emu){ | ||||
| s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, | s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, | ||||
| @@ -524,42 +529,55 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, | |||||
| return; | return; | ||||
| } | } | ||||
| if(MB_FIELD){ | |||||
| ysh = 3 - (chroma_idc == 2 /* yuv422 */); | |||||
| if(chroma_idc == 1 /* yuv420 */ && MB_FIELD){ | |||||
| // chroma offset when predicting from a field of opposite parity | // chroma offset when predicting from a field of opposite parity | ||||
| my += 2 * ((s->mb_y & 1) - (pic->f.reference - 1)); | my += 2 * ((s->mb_y & 1) - (pic->f.reference - 1)); | ||||
| emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); | emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); | ||||
| } | } | ||||
| src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + (my >> 3) * h->mb_uvlinesize; | |||||
| src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + (my >> 3) * h->mb_uvlinesize; | |||||
| src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize; | |||||
| src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize; | |||||
| if(emu){ | if(emu){ | ||||
| s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); | |||||
| s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, | |||||
| 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), | |||||
| pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); | |||||
| src_cb= s->edge_emu_buffer; | src_cb= s->edge_emu_buffer; | ||||
| } | } | ||||
| chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7); | |||||
| chroma_op(dest_cb, src_cb, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */), | |||||
| mx&7, (my << (chroma_idc == 2 /* yuv422 */)) &7); | |||||
| if(emu){ | if(emu){ | ||||
| s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); | |||||
| s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, | |||||
| 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), | |||||
| pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); | |||||
| src_cr= s->edge_emu_buffer; | src_cr= s->edge_emu_buffer; | ||||
| } | } | ||||
| chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7); | |||||
| chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */), | |||||
| mx&7, (my << (chroma_idc == 2 /* yuv422 */)) &7); | |||||
| } | } | ||||
| static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta, | |||||
| uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |||||
| int x_offset, int y_offset, | |||||
| qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, | |||||
| qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, | |||||
| int list0, int list1, int pixel_shift, int chroma444){ | |||||
| static av_always_inline void | |||||
| mc_part_std(H264Context *h, int n, int square, int height, int delta, | |||||
| uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |||||
| int x_offset, int y_offset, | |||||
| qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, | |||||
| qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, | |||||
| int list0, int list1, int pixel_shift, int chroma_idc) | |||||
| { | |||||
| MpegEncContext * const s = &h->s; | MpegEncContext * const s = &h->s; | ||||
| qpel_mc_func *qpix_op= qpix_put; | qpel_mc_func *qpix_op= qpix_put; | ||||
| h264_chroma_mc_func chroma_op= chroma_put; | h264_chroma_mc_func chroma_op= chroma_put; | ||||
| dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; | dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; | ||||
| if(chroma444){ | |||||
| if (chroma_idc == 3 /* yuv444 */) { | |||||
| dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; | dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; | ||||
| dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; | dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; | ||||
| }else{ | |||||
| } else if (chroma_idc == 2 /* yuv422 */) { | |||||
| dest_cb += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize; | |||||
| dest_cr += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize; | |||||
| } else /* yuv420 */ { | |||||
| dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; | dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; | ||||
| dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; | dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; | ||||
| } | } | ||||
| @@ -568,9 +586,9 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei | |||||
| if(list0){ | if(list0){ | ||||
| Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; | Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; | ||||
| mc_dir_part(h, ref, n, square, chroma_height, delta, 0, | |||||
| mc_dir_part(h, ref, n, square, height, delta, 0, | |||||
| dest_y, dest_cb, dest_cr, x_offset, y_offset, | dest_y, dest_cb, dest_cr, x_offset, y_offset, | ||||
| qpix_op, chroma_op, pixel_shift, chroma444); | |||||
| qpix_op, chroma_op, pixel_shift, chroma_idc); | |||||
| qpix_op= qpix_avg; | qpix_op= qpix_avg; | ||||
| chroma_op= chroma_avg; | chroma_op= chroma_avg; | ||||
| @@ -578,28 +596,36 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei | |||||
| if(list1){ | if(list1){ | ||||
| Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ]; | Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ]; | ||||
| mc_dir_part(h, ref, n, square, chroma_height, delta, 1, | |||||
| mc_dir_part(h, ref, n, square, height, delta, 1, | |||||
| dest_y, dest_cb, dest_cr, x_offset, y_offset, | dest_y, dest_cb, dest_cr, x_offset, y_offset, | ||||
| qpix_op, chroma_op, pixel_shift, chroma444); | |||||
| qpix_op, chroma_op, pixel_shift, chroma_idc); | |||||
| } | } | ||||
| } | } | ||||
| static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta, | |||||
| uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |||||
| int x_offset, int y_offset, | |||||
| qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, | |||||
| h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, | |||||
| h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, | |||||
| int list0, int list1, int pixel_shift, int chroma444){ | |||||
| static av_always_inline void | |||||
| mc_part_weighted(H264Context *h, int n, int square, int height, int delta, | |||||
| uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |||||
| int x_offset, int y_offset, | |||||
| qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, | |||||
| h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, | |||||
| h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, | |||||
| int list0, int list1, int pixel_shift, int chroma_idc){ | |||||
| MpegEncContext * const s = &h->s; | MpegEncContext * const s = &h->s; | ||||
| int chroma_height; | |||||
| dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; | dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; | ||||
| if(chroma444){ | |||||
| if (chroma_idc == 3 /* yuv444 */) { | |||||
| chroma_height = height; | |||||
| chroma_weight_avg = luma_weight_avg; | chroma_weight_avg = luma_weight_avg; | ||||
| chroma_weight_op = luma_weight_op; | chroma_weight_op = luma_weight_op; | ||||
| dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; | dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; | ||||
| dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; | dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; | ||||
| }else{ | |||||
| } else if (chroma_idc == 2 /* yuv422 */) { | |||||
| chroma_height = height; | |||||
| dest_cb += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize; | |||||
| dest_cr += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize; | |||||
| } else /* yuv420 */ { | |||||
| chroma_height = height >> 1; | |||||
| dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; | dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; | ||||
| dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; | dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; | ||||
| } | } | ||||
| @@ -615,27 +641,32 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom | |||||
| int refn0 = h->ref_cache[0][ scan8[n] ]; | int refn0 = h->ref_cache[0][ scan8[n] ]; | ||||
| int refn1 = h->ref_cache[1][ scan8[n] ]; | int refn1 = h->ref_cache[1][ scan8[n] ]; | ||||
| mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0, | |||||
| mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0, | |||||
| dest_y, dest_cb, dest_cr, | dest_y, dest_cb, dest_cr, | ||||
| x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444); | |||||
| mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1, | |||||
| x_offset, y_offset, qpix_put, chroma_put, | |||||
| pixel_shift, chroma_idc); | |||||
| mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1, | |||||
| tmp_y, tmp_cb, tmp_cr, | tmp_y, tmp_cb, tmp_cr, | ||||
| x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444); | |||||
| x_offset, y_offset, qpix_put, chroma_put, | |||||
| pixel_shift, chroma_idc); | |||||
| if(h->use_weight == 2){ | if(h->use_weight == 2){ | ||||
| int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1]; | int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1]; | ||||
| int weight1 = 64 - weight0; | int weight1 = 64 - weight0; | ||||
| luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0); | |||||
| chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0); | |||||
| chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0); | |||||
| luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, | |||||
| height, 5, weight0, weight1, 0); | |||||
| chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, | |||||
| chroma_height, 5, weight0, weight1, 0); | |||||
| chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, | |||||
| chroma_height, 5, weight0, weight1, 0); | |||||
| }else{ | }else{ | ||||
| luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom, | |||||
| luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, h->luma_log2_weight_denom, | |||||
| h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0], | h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0], | ||||
| h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]); | h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]); | ||||
| chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, | |||||
| chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom, | |||||
| h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0], | h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0], | ||||
| h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]); | h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]); | ||||
| chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, | |||||
| chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom, | |||||
| h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0], | h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0], | ||||
| h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]); | h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]); | ||||
| } | } | ||||
| @@ -643,42 +674,46 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom | |||||
| int list = list1 ? 1 : 0; | int list = list1 ? 1 : 0; | ||||
| int refn = h->ref_cache[list][ scan8[n] ]; | int refn = h->ref_cache[list][ scan8[n] ]; | ||||
| Picture *ref= &h->ref_list[list][refn]; | Picture *ref= &h->ref_list[list][refn]; | ||||
| mc_dir_part(h, ref, n, square, chroma_height, delta, list, | |||||
| mc_dir_part(h, ref, n, square, height, delta, list, | |||||
| dest_y, dest_cb, dest_cr, x_offset, y_offset, | dest_y, dest_cb, dest_cr, x_offset, y_offset, | ||||
| qpix_put, chroma_put, pixel_shift, chroma444); | |||||
| qpix_put, chroma_put, pixel_shift, chroma_idc); | |||||
| luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom, | |||||
| luma_weight_op(dest_y, h->mb_linesize, height, h->luma_log2_weight_denom, | |||||
| h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]); | h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]); | ||||
| if(h->use_weight_chroma){ | if(h->use_weight_chroma){ | ||||
| chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, | |||||
| chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom, | |||||
| h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]); | h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]); | ||||
| chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, | |||||
| chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom, | |||||
| h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]); | h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]); | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta, | |||||
| uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |||||
| int x_offset, int y_offset, | |||||
| qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, | |||||
| qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, | |||||
| h264_weight_func *weight_op, h264_biweight_func *weight_avg, | |||||
| int list0, int list1, int pixel_shift, int chroma444){ | |||||
| static av_always_inline void | |||||
| mc_part(H264Context *h, int n, int square, int height, int delta, | |||||
| uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |||||
| int x_offset, int y_offset, | |||||
| qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, | |||||
| qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, | |||||
| h264_weight_func *weight_op, h264_biweight_func *weight_avg, | |||||
| int list0, int list1, int pixel_shift, int chroma_idc) | |||||
| { | |||||
| if((h->use_weight==2 && list0 && list1 | if((h->use_weight==2 && list0 && list1 | ||||
| && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32)) | && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32)) | ||||
| || h->use_weight==1) | || h->use_weight==1) | ||||
| mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, | |||||
| mc_part_weighted(h, n, square, height, delta, dest_y, dest_cb, dest_cr, | |||||
| x_offset, y_offset, qpix_put, chroma_put, | x_offset, y_offset, qpix_put, chroma_put, | ||||
| weight_op[0], weight_op[3], weight_avg[0], | |||||
| weight_avg[3], list0, list1, pixel_shift, chroma444); | |||||
| weight_op[0], weight_op[1], weight_avg[0], | |||||
| weight_avg[1], list0, list1, pixel_shift, chroma_idc); | |||||
| else | else | ||||
| mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, | |||||
| mc_part_std(h, n, square, height, delta, dest_y, dest_cb, dest_cr, | |||||
| x_offset, y_offset, qpix_put, chroma_put, qpix_avg, | x_offset, y_offset, qpix_put, chroma_put, qpix_avg, | ||||
| chroma_avg, list0, list1, pixel_shift, chroma444); | |||||
| chroma_avg, list0, list1, pixel_shift, chroma_idc); | |||||
| } | } | ||||
| static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){ | |||||
| static av_always_inline void | |||||
| prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma_idc) | |||||
| { | |||||
| /* fetch pixels for estimated mv 4 macroblocks ahead | /* fetch pixels for estimated mv 4 macroblocks ahead | ||||
| * optimized for 64byte cache lines */ | * optimized for 64byte cache lines */ | ||||
| MpegEncContext * const s = &h->s; | MpegEncContext * const s = &h->s; | ||||
| @@ -689,7 +724,7 @@ static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, in | |||||
| uint8_t **src = h->ref_list[list][refn].f.data; | uint8_t **src = h->ref_list[list][refn].f.data; | ||||
| int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift); | int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift); | ||||
| s->dsp.prefetch(src[0]+off, s->linesize, 4); | s->dsp.prefetch(src[0]+off, s->linesize, 4); | ||||
| if(chroma444){ | |||||
| if (chroma_idc == 3 /* yuv444 */) { | |||||
| s->dsp.prefetch(src[1]+off, s->linesize, 4); | s->dsp.prefetch(src[1]+off, s->linesize, 4); | ||||
| s->dsp.prefetch(src[2]+off, s->linesize, 4); | s->dsp.prefetch(src[2]+off, s->linesize, 4); | ||||
| }else{ | }else{ | ||||
| @@ -703,7 +738,8 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t | |||||
| qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), | qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), | ||||
| qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), | qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), | ||||
| h264_weight_func *weight_op, h264_biweight_func *weight_avg, | h264_weight_func *weight_op, h264_biweight_func *weight_avg, | ||||
| int pixel_shift, int chroma444){ | |||||
| int pixel_shift, int chroma_idc) | |||||
| { | |||||
| MpegEncContext * const s = &h->s; | MpegEncContext * const s = &h->s; | ||||
| const int mb_xy= h->mb_xy; | const int mb_xy= h->mb_xy; | ||||
| const int mb_type = s->current_picture.f.mb_type[mb_xy]; | const int mb_type = s->current_picture.f.mb_type[mb_xy]; | ||||
| @@ -712,36 +748,36 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t | |||||
| if(HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME)) | if(HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME)) | ||||
| await_references(h); | await_references(h); | ||||
| prefetch_motion(h, 0, pixel_shift, chroma444); | |||||
| prefetch_motion(h, 0, pixel_shift, chroma_idc); | |||||
| if(IS_16X16(mb_type)){ | if(IS_16X16(mb_type)){ | ||||
| mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, | |||||
| mc_part(h, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0, | |||||
| qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], | qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], | ||||
| weight_op, weight_avg, | weight_op, weight_avg, | ||||
| IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), | ||||
| pixel_shift, chroma444); | |||||
| pixel_shift, chroma_idc); | |||||
| }else if(IS_16X8(mb_type)){ | }else if(IS_16X8(mb_type)){ | ||||
| mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0, | |||||
| mc_part(h, 0, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0, | |||||
| qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], | qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], | ||||
| &weight_op[1], &weight_avg[1], | |||||
| weight_op, weight_avg, | |||||
| IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), | ||||
| pixel_shift, chroma444); | |||||
| mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4, | |||||
| pixel_shift, chroma_idc); | |||||
| mc_part(h, 8, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4, | |||||
| qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], | qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], | ||||
| &weight_op[1], &weight_avg[1], | |||||
| weight_op, weight_avg, | |||||
| IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), | ||||
| pixel_shift, chroma444); | |||||
| pixel_shift, chroma_idc); | |||||
| }else if(IS_8X16(mb_type)){ | }else if(IS_8X16(mb_type)){ | ||||
| mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, | |||||
| mc_part(h, 0, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, | |||||
| qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], | qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], | ||||
| &weight_op[2], &weight_avg[2], | |||||
| &weight_op[1], &weight_avg[1], | |||||
| IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), | ||||
| pixel_shift, chroma444); | |||||
| mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, | |||||
| pixel_shift, chroma_idc); | |||||
| mc_part(h, 4, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, | |||||
| qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], | qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], | ||||
| &weight_op[2], &weight_avg[2], | |||||
| &weight_op[1], &weight_avg[1], | |||||
| IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), | ||||
| pixel_shift, chroma444); | |||||
| pixel_shift, chroma_idc); | |||||
| }else{ | }else{ | ||||
| int i; | int i; | ||||
| @@ -754,50 +790,72 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t | |||||
| int y_offset= (i&2)<<1; | int y_offset= (i&2)<<1; | ||||
| if(IS_SUB_8X8(sub_mb_type)){ | if(IS_SUB_8X8(sub_mb_type)){ | ||||
| mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, | |||||
| mc_part(h, n, 1, 8, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, | |||||
| qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], | qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], | ||||
| &weight_op[3], &weight_avg[3], | |||||
| &weight_op[1], &weight_avg[1], | |||||
| IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), | ||||
| pixel_shift, chroma444); | |||||
| pixel_shift, chroma_idc); | |||||
| }else if(IS_SUB_8X4(sub_mb_type)){ | }else if(IS_SUB_8X4(sub_mb_type)){ | ||||
| mc_part(h, n , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset, | |||||
| mc_part(h, n , 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset, | |||||
| qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], | qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], | ||||
| &weight_op[4], &weight_avg[4], | |||||
| &weight_op[1], &weight_avg[1], | |||||
| IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), | ||||
| pixel_shift, chroma444); | |||||
| mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, | |||||
| pixel_shift, chroma_idc); | |||||
| mc_part(h, n+2, 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, | |||||
| qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], | qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], | ||||
| &weight_op[4], &weight_avg[4], | |||||
| &weight_op[1], &weight_avg[1], | |||||
| IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), | ||||
| pixel_shift, chroma444); | |||||
| pixel_shift, chroma_idc); | |||||
| }else if(IS_SUB_4X8(sub_mb_type)){ | }else if(IS_SUB_4X8(sub_mb_type)){ | ||||
| mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, | |||||
| mc_part(h, n , 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, | |||||
| qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], | qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], | ||||
| &weight_op[5], &weight_avg[5], | |||||
| &weight_op[2], &weight_avg[2], | |||||
| IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), | ||||
| pixel_shift, chroma444); | |||||
| mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, | |||||
| pixel_shift, chroma_idc); | |||||
| mc_part(h, n+1, 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, | |||||
| qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], | qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], | ||||
| &weight_op[5], &weight_avg[5], | |||||
| &weight_op[2], &weight_avg[2], | |||||
| IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), | ||||
| pixel_shift, chroma444); | |||||
| pixel_shift, chroma_idc); | |||||
| }else{ | }else{ | ||||
| int j; | int j; | ||||
| assert(IS_SUB_4X4(sub_mb_type)); | assert(IS_SUB_4X4(sub_mb_type)); | ||||
| for(j=0; j<4; j++){ | for(j=0; j<4; j++){ | ||||
| int sub_x_offset= x_offset + 2*(j&1); | int sub_x_offset= x_offset + 2*(j&1); | ||||
| int sub_y_offset= y_offset + (j&2); | int sub_y_offset= y_offset + (j&2); | ||||
| mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, | |||||
| mc_part(h, n+j, 1, 4, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, | |||||
| qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], | qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], | ||||
| &weight_op[6], &weight_avg[6], | |||||
| &weight_op[2], &weight_avg[2], | |||||
| IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), | IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), | ||||
| pixel_shift, chroma444); | |||||
| pixel_shift, chroma_idc); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| prefetch_motion(h, 1, pixel_shift, chroma444); | |||||
| prefetch_motion(h, 1, pixel_shift, chroma_idc); | |||||
| } | |||||
| static av_always_inline void | |||||
| hl_motion_420(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |||||
| qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), | |||||
| qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), | |||||
| h264_weight_func *weight_op, h264_biweight_func *weight_avg, | |||||
| int pixel_shift) | |||||
| { | |||||
| hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, | |||||
| qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 1); | |||||
| } | |||||
| static av_always_inline void | |||||
| hl_motion_422(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |||||
| qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), | |||||
| qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), | |||||
| h264_weight_func *weight_op, h264_biweight_func *weight_avg, | |||||
| int pixel_shift) | |||||
| { | |||||
| hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, | |||||
| qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 2); | |||||
| } | } | ||||
| static void free_tables(H264Context *h, int free_rbsp){ | static void free_tables(H264Context *h, int free_rbsp){ | ||||
| @@ -1468,7 +1526,10 @@ static void decode_postinit(H264Context *h, int setup_finished){ | |||||
| ff_thread_finish_setup(s->avctx); | ff_thread_finish_setup(s->avctx); | ||||
| } | } | ||||
| static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){ | |||||
| static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, | |||||
| uint8_t *src_cb, uint8_t *src_cr, | |||||
| int linesize, int uvlinesize, int simple) | |||||
| { | |||||
| MpegEncContext * const s = &h->s; | MpegEncContext * const s = &h->s; | ||||
| uint8_t *top_border; | uint8_t *top_border; | ||||
| int top_idx = 1; | int top_idx = 1; | ||||
| @@ -1813,7 +1874,8 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, | |||||
| } | } | ||||
| } | } | ||||
| static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){ | |||||
| static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift) | |||||
| { | |||||
| MpegEncContext * const s = &h->s; | MpegEncContext * const s = &h->s; | ||||
| const int mb_x= s->mb_x; | const int mb_x= s->mb_x; | ||||
| const int mb_y= s->mb_y; | const int mb_y= s->mb_y; | ||||
| @@ -1827,7 +1889,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i | |||||
| /* is_h264 should always be true if SVQ3 is disabled. */ | /* is_h264 should always be true if SVQ3 is disabled. */ | ||||
| const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264; | const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264; | ||||
| void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); | void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); | ||||
| const int block_h = 16>>s->chroma_y_shift; | |||||
| const int block_h = 16 >> s->chroma_y_shift; | |||||
| const int chroma422 = CHROMA422; | |||||
| dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; | dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; | ||||
| dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h; | dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h; | ||||
| @@ -1844,8 +1907,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i | |||||
| block_offset = &h->block_offset[48]; | block_offset = &h->block_offset[48]; | ||||
| if(mb_y&1){ //FIXME move out of this function? | if(mb_y&1){ //FIXME move out of this function? | ||||
| dest_y -= s->linesize*15; | dest_y -= s->linesize*15; | ||||
| dest_cb-= s->uvlinesize*(block_h-1); | |||||
| dest_cr-= s->uvlinesize*(block_h-1); | |||||
| dest_cb-= s->uvlinesize * (block_h - 1); | |||||
| dest_cr-= s->uvlinesize * (block_h - 1); | |||||
| } | } | ||||
| if(FRAME_MBAFF) { | if(FRAME_MBAFF) { | ||||
| int list; | int list; | ||||
| @@ -1884,7 +1947,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i | |||||
| } | } | ||||
| if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ | if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ | ||||
| if (!h->sps.chroma_format_idc) { | if (!h->sps.chroma_format_idc) { | ||||
| for (i = 0; i < 8; i++) { | |||||
| for (i = 0; i < block_h; i++) { | |||||
| uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize); | uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize); | ||||
| uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize); | uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize); | ||||
| for (j = 0; j < 8; j++) { | for (j = 0; j < 8; j++) { | ||||
| @@ -1911,13 +1974,13 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i | |||||
| if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ | if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ | ||||
| if (!h->sps.chroma_format_idc) { | if (!h->sps.chroma_format_idc) { | ||||
| for (i=0; i<8; i++) { | for (i=0; i<8; i++) { | ||||
| memset(dest_cb+ i*uvlinesize, 1 << (bit_depth - 1), 8); | |||||
| memset(dest_cr+ i*uvlinesize, 1 << (bit_depth - 1), 8); | |||||
| memset(dest_cb + i*uvlinesize, 1 << (bit_depth - 1), 8); | |||||
| memset(dest_cr + i*uvlinesize, 1 << (bit_depth - 1), 8); | |||||
| } | } | ||||
| } else { | } else { | ||||
| for (i=0; i<block_h; i++) { | for (i=0; i<block_h; i++) { | ||||
| memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8); | |||||
| memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8); | |||||
| memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4, 8); | |||||
| memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4, 8); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -1937,11 +2000,21 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i | |||||
| if(h->deblocking_filter) | if(h->deblocking_filter) | ||||
| xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift); | xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift); | ||||
| }else if(is_h264){ | }else if(is_h264){ | ||||
| hl_motion(h, dest_y, dest_cb, dest_cr, | |||||
| s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, | |||||
| s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, | |||||
| h->h264dsp.weight_h264_pixels_tab, | |||||
| h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 0); | |||||
| if (chroma422) { | |||||
| hl_motion_422(h, dest_y, dest_cb, dest_cr, | |||||
| s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, | |||||
| s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, | |||||
| h->h264dsp.weight_h264_pixels_tab, | |||||
| h->h264dsp.biweight_h264_pixels_tab, | |||||
| pixel_shift); | |||||
| } else { | |||||
| hl_motion_420(h, dest_y, dest_cb, dest_cr, | |||||
| s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, | |||||
| s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, | |||||
| h->h264dsp.weight_h264_pixels_tab, | |||||
| h->h264dsp.biweight_h264_pixels_tab, | |||||
| pixel_shift); | |||||
| } | |||||
| } | } | ||||
| hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0); | hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0); | ||||
| @@ -1959,14 +2032,20 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i | |||||
| if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16)) | if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16)) | ||||
| idct_add (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize); | idct_add (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize); | ||||
| } | } | ||||
| if (chroma422) { | |||||
| for(i=j*16+4; i<j*16+8; i++){ | |||||
| if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16)) | |||||
| idct_add (dest[j-1] + block_offset[i+4], h->mb + (i*16 << pixel_shift), uvlinesize); | |||||
| } | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| }else{ | }else{ | ||||
| if(is_h264){ | if(is_h264){ | ||||
| int qp[2]; | int qp[2]; | ||||
| if (CHROMA422) { | |||||
| qp[0] = h->chroma_qp[0]+3; | |||||
| qp[1] = h->chroma_qp[1]+3; | |||||
| if (chroma422) { | |||||
| qp[0] = h->chroma_qp[0] + 3; | |||||
| qp[1] = h->chroma_qp[1] + 3; | |||||
| } else { | } else { | ||||
| qp[0] = h->chroma_qp[0]; | qp[0] = h->chroma_qp[0]; | ||||
| qp[1] = h->chroma_qp[1]; | qp[1] = h->chroma_qp[1]; | ||||
| @@ -2086,7 +2165,7 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simpl | |||||
| s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, | s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, | ||||
| s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, | s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, | ||||
| h->h264dsp.weight_h264_pixels_tab, | h->h264dsp.weight_h264_pixels_tab, | ||||
| h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 1); | |||||
| h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 3); | |||||
| } | } | ||||
| for (p = 0; p < plane_count; p++) | for (p = 0; p < plane_count; p++) | ||||
| @@ -2690,6 +2769,8 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ | |||||
| case 9 : | case 9 : | ||||
| if (CHROMA444) | if (CHROMA444) | ||||
| s->avctx->pix_fmt = PIX_FMT_YUV444P9; | s->avctx->pix_fmt = PIX_FMT_YUV444P9; | ||||
| else if (CHROMA422) | |||||
| s->avctx->pix_fmt = PIX_FMT_YUV422P9; | |||||
| else | else | ||||
| s->avctx->pix_fmt = PIX_FMT_YUV420P9; | s->avctx->pix_fmt = PIX_FMT_YUV420P9; | ||||
| break; | break; | ||||
| @@ -2708,7 +2789,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ | |||||
| s->avctx->pix_fmt = PIX_FMT_GBR24P; | s->avctx->pix_fmt = PIX_FMT_GBR24P; | ||||
| av_log(h->s.avctx, AV_LOG_DEBUG, "Detected GBR colorspace.\n"); | av_log(h->s.avctx, AV_LOG_DEBUG, "Detected GBR colorspace.\n"); | ||||
| } | } | ||||
| }else if (CHROMA422) { | |||||
| } else if (CHROMA422) { | |||||
| s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ422P : PIX_FMT_YUV422P; | s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ422P : PIX_FMT_YUV422P; | ||||
| }else{ | }else{ | ||||
| s->avctx->pix_fmt = s->avctx->get_format(s->avctx, | s->avctx->pix_fmt = s->avctx->get_format(s->avctx, | ||||
| @@ -3384,7 +3465,7 @@ static void loop_filter(H264Context *h, int start_x, int end_x){ | |||||
| const int end_mb_y= s->mb_y + FRAME_MBAFF; | const int end_mb_y= s->mb_y + FRAME_MBAFF; | ||||
| const int old_slice_type= h->slice_type; | const int old_slice_type= h->slice_type; | ||||
| const int pixel_shift = h->pixel_shift; | const int pixel_shift = h->pixel_shift; | ||||
| const int block_h = 16>>s->chroma_y_shift; | |||||
| const int block_h = 16 >> s->chroma_y_shift; | |||||
| if(h->deblocking_filter) { | if(h->deblocking_filter) { | ||||
| for(mb_x= start_x; mb_x<end_x; mb_x++){ | for(mb_x= start_x; mb_x<end_x; mb_x++){ | ||||
| @@ -3401,8 +3482,8 @@ static void loop_filter(H264Context *h, int start_x, int end_x){ | |||||
| s->mb_x= mb_x; | s->mb_x= mb_x; | ||||
| s->mb_y= mb_y; | s->mb_y= mb_y; | ||||
| dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; | dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; | ||||
| dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*(8<<CHROMA444) + mb_y * s->uvlinesize * block_h; | |||||
| dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift)*(8<<CHROMA444) + mb_y * s->uvlinesize * block_h; | |||||
| dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h; | |||||
| dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h; | |||||
| //FIXME simplify above | //FIXME simplify above | ||||
| if (MB_FIELD) { | if (MB_FIELD) { | ||||
| @@ -3410,8 +3491,8 @@ static void loop_filter(H264Context *h, int start_x, int end_x){ | |||||
| uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; | uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; | ||||
| if(mb_y&1){ //FIXME move out of this function? | if(mb_y&1){ //FIXME move out of this function? | ||||
| dest_y -= s->linesize*15; | dest_y -= s->linesize*15; | ||||
| dest_cb-= s->uvlinesize*(block_h-1); | |||||
| dest_cr-= s->uvlinesize*(block_h-1); | |||||
| dest_cb-= s->uvlinesize * (block_h - 1); | |||||
| dest_cr-= s->uvlinesize * (block_h - 1); | |||||
| } | } | ||||
| } else { | } else { | ||||
| linesize = h->mb_linesize = s->linesize; | linesize = h->mb_linesize = s->linesize; | ||||
| @@ -1565,7 +1565,12 @@ DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = { | |||||
| 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 | 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 | ||||
| }; | }; | ||||
| static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) { | |||||
| static av_always_inline void | |||||
| decode_cabac_residual_internal(H264Context *h, DCTELEM *block, | |||||
| int cat, int n, const uint8_t *scantable, | |||||
| const uint32_t *qmul, int max_coeff, | |||||
| int is_dc, int chroma422) | |||||
| { | |||||
| static const int significant_coeff_flag_offset[2][14] = { | static const int significant_coeff_flag_offset[2][14] = { | ||||
| { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 }, | { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 }, | ||||
| { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 } | { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 } | ||||
| @@ -1593,7 +1598,10 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT | |||||
| * map node ctx => cabac ctx for level=1 */ | * map node ctx => cabac ctx for level=1 */ | ||||
| static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 }; | static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 }; | ||||
| /* map node ctx => cabac ctx for level>1 */ | /* map node ctx => cabac ctx for level>1 */ | ||||
| static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 }; | |||||
| static const uint8_t coeff_abs_levelgt1_ctx[2][8] = { | |||||
| { 5, 5, 5, 5, 6, 7, 8, 9 }, | |||||
| { 5, 5, 5, 5, 6, 7, 8, 8 }, // 422/dc case | |||||
| }; | |||||
| static const uint8_t coeff_abs_level_transition[2][8] = { | static const uint8_t coeff_abs_level_transition[2][8] = { | ||||
| /* update node ctx after decoding a level=1 */ | /* update node ctx after decoding a level=1 */ | ||||
| { 1, 2, 3, 3, 4, 5, 6, 7 }, | { 1, 2, 3, 3, 4, 5, 6, 7 }, | ||||
| @@ -1652,7 +1660,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT | |||||
| coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, | coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, | ||||
| last_coeff_ctx_base, sig_off); | last_coeff_ctx_base, sig_off); | ||||
| } else { | } else { | ||||
| if (is_dc && max_coeff == 8) { // dc 422 | |||||
| if (is_dc && chroma422) { // dc 422 | |||||
| DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); | DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); | ||||
| } else { | } else { | ||||
| coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index, | coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index, | ||||
| @@ -1661,7 +1669,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT | |||||
| #else | #else | ||||
| DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); | DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); | ||||
| } else { | } else { | ||||
| if (is_dc && max_coeff == 8) { // dc 422 | |||||
| if (is_dc && chroma422) { // dc 422 | |||||
| DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); | DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); | ||||
| } else { | } else { | ||||
| DECODE_SIGNIFICANCE(max_coeff - 1, last, last); | DECODE_SIGNIFICANCE(max_coeff - 1, last, last); | ||||
| @@ -1701,9 +1709,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT | |||||
| } \ | } \ | ||||
| } else { \ | } else { \ | ||||
| int coeff_abs = 2; \ | int coeff_abs = 2; \ | ||||
| if (is_dc && max_coeff == 8) \ | |||||
| node_ctx = FFMIN(node_ctx, 6); \ | |||||
| ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \ | |||||
| ctx = coeff_abs_levelgt1_ctx[is_dc && chroma422][node_ctx] + abs_level_m1_ctx_base; \ | |||||
| node_ctx = coeff_abs_level_transition[1][node_ctx]; \ | node_ctx = coeff_abs_level_transition[1][node_ctx]; \ | ||||
| \ | \ | ||||
| while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \ | while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \ | ||||
| @@ -1745,11 +1751,18 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT | |||||
| } | } | ||||
| static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) { | static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) { | ||||
| decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1); | |||||
| decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 0); | |||||
| } | |||||
| static void decode_cabac_residual_dc_internal_422(H264Context *h, DCTELEM *block, | |||||
| int cat, int n, const uint8_t *scantable, | |||||
| int max_coeff) | |||||
| { | |||||
| decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 1); | |||||
| } | } | ||||
| static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { | static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { | ||||
| decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0); | |||||
| decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0, 0); | |||||
| } | } | ||||
| /* cat: 0-> DC 16x16 n = 0 | /* cat: 0-> DC 16x16 n = 0 | ||||
| @@ -1773,6 +1786,19 @@ static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM * | |||||
| decode_cabac_residual_dc_internal( h, block, cat, n, scantable, max_coeff ); | decode_cabac_residual_dc_internal( h, block, cat, n, scantable, max_coeff ); | ||||
| } | } | ||||
| static av_always_inline void | |||||
| decode_cabac_residual_dc_422(H264Context *h, DCTELEM *block, | |||||
| int cat, int n, const uint8_t *scantable, | |||||
| int max_coeff) | |||||
| { | |||||
| /* read coded block flag */ | |||||
| if (get_cabac(&h->cabac, &h->cabac_state[get_cabac_cbf_ctx(h, cat, n, max_coeff, 1)]) == 0) { | |||||
| h->non_zero_count_cache[scan8[n]] = 0; | |||||
| return; | |||||
| } | |||||
| decode_cabac_residual_dc_internal_422(h, block, cat, n, scantable, max_coeff); | |||||
| } | |||||
| static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { | static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { | ||||
| /* read coded block flag */ | /* read coded block flag */ | ||||
| if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) { | if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) { | ||||
| @@ -2325,17 +2351,14 @@ decode_intra_mb: | |||||
| if(CHROMA444){ | if(CHROMA444){ | ||||
| decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1); | decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1); | ||||
| decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2); | decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2); | ||||
| } else { | |||||
| const int num_c8x8 = h->sps.chroma_format_idc; | |||||
| } else if (CHROMA422) { | |||||
| if( cbp&0x30 ){ | if( cbp&0x30 ){ | ||||
| int c; | int c; | ||||
| for( c = 0; c < 2; c++ ) { | for( c = 0; c < 2; c++ ) { | ||||
| //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c ); | //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c ); | ||||
| decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, | |||||
| CHROMA_DC_BLOCK_INDEX+c, | |||||
| CHROMA422 ? chroma422_dc_scan : chroma_dc_scan, | |||||
| 4*num_c8x8); | |||||
| decode_cabac_residual_dc_422(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, | |||||
| CHROMA_DC_BLOCK_INDEX + c, | |||||
| chroma422_dc_scan, 8); | |||||
| } | } | ||||
| } | } | ||||
| @@ -2344,7 +2367,7 @@ decode_intra_mb: | |||||
| for( c = 0; c < 2; c++ ) { | for( c = 0; c < 2; c++ ) { | ||||
| DCTELEM *mb = h->mb + (16*(16 + 16*c) << pixel_shift); | DCTELEM *mb = h->mb + (16*(16 + 16*c) << pixel_shift); | ||||
| qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]]; | qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]]; | ||||
| for (i8x8 = 0; i8x8 < num_c8x8; i8x8++) { | |||||
| for (i8x8 = 0; i8x8 < 2; i8x8++) { | |||||
| for (i = 0; i < 4; i++) { | for (i = 0; i < 4; i++) { | ||||
| const int index = 16 + 16 * c + 8*i8x8 + i; | const int index = 16 + 16 * c + 8*i8x8 + i; | ||||
| //av_log(s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16); | //av_log(s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16); | ||||
| @@ -2357,6 +2380,29 @@ decode_intra_mb: | |||||
| fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); | fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); | ||||
| fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); | fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); | ||||
| } | } | ||||
| } else /* yuv420 */ { | |||||
| if( cbp&0x30 ){ | |||||
| int c; | |||||
| for( c = 0; c < 2; c++ ) { | |||||
| //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c ); | |||||
| decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4); | |||||
| } | |||||
| } | |||||
| if( cbp&0x20 ) { | |||||
| int c, i; | |||||
| for( c = 0; c < 2; c++ ) { | |||||
| qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]]; | |||||
| for( i = 0; i < 4; i++ ) { | |||||
| const int index = 16 + 16 * c + i; | |||||
| //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 ); | |||||
| decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15); | |||||
| } | |||||
| } | |||||
| } else { | |||||
| fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); | |||||
| fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); | |||||
| } | |||||
| } | } | ||||
| } else { | } else { | ||||
| fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1); | fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1); | ||||
| @@ -415,7 +415,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){ | |||||
| #endif | #endif | ||||
| sps->crop= get_bits1(&s->gb); | sps->crop= get_bits1(&s->gb); | ||||
| if(sps->crop){ | if(sps->crop){ | ||||
| int crop_vertical_limit = sps->chroma_format_idc & 2 ? 16 : 8; | |||||
| int crop_vertical_limit = sps->chroma_format_idc & 2 ? 16 : 8; | |||||
| int crop_horizontal_limit = sps->chroma_format_idc == 3 ? 16 : 8; | int crop_horizontal_limit = sps->chroma_format_idc == 3 ? 16 : 8; | ||||
| sps->crop_left = get_ue_golomb(&s->gb); | sps->crop_left = get_ue_golomb(&s->gb); | ||||
| sps->crop_right = get_ue_golomb(&s->gb); | sps->crop_right = get_ue_golomb(&s->gb); | ||||
| @@ -64,26 +64,14 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_fo | |||||
| else\ | else\ | ||||
| c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\ | c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\ | ||||
| \ | \ | ||||
| c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\ | |||||
| c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\ | |||||
| c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels8x16, depth);\ | |||||
| c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels8x8, depth);\ | |||||
| c->weight_h264_pixels_tab[4]= FUNC(weight_h264_pixels8x4, depth);\ | |||||
| c->weight_h264_pixels_tab[5]= FUNC(weight_h264_pixels4x8, depth);\ | |||||
| c->weight_h264_pixels_tab[6]= FUNC(weight_h264_pixels4x4, depth);\ | |||||
| c->weight_h264_pixels_tab[7]= FUNC(weight_h264_pixels4x2, depth);\ | |||||
| c->weight_h264_pixels_tab[8]= FUNC(weight_h264_pixels2x4, depth);\ | |||||
| c->weight_h264_pixels_tab[9]= FUNC(weight_h264_pixels2x2, depth);\ | |||||
| c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16x16, depth);\ | |||||
| c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels16x8, depth);\ | |||||
| c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels8x16, depth);\ | |||||
| c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels8x8, depth);\ | |||||
| c->biweight_h264_pixels_tab[4]= FUNC(biweight_h264_pixels8x4, depth);\ | |||||
| c->biweight_h264_pixels_tab[5]= FUNC(biweight_h264_pixels4x8, depth);\ | |||||
| c->biweight_h264_pixels_tab[6]= FUNC(biweight_h264_pixels4x4, depth);\ | |||||
| c->biweight_h264_pixels_tab[7]= FUNC(biweight_h264_pixels4x2, depth);\ | |||||
| c->biweight_h264_pixels_tab[8]= FUNC(biweight_h264_pixels2x4, depth);\ | |||||
| c->biweight_h264_pixels_tab[9]= FUNC(biweight_h264_pixels2x2, depth);\ | |||||
| c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16, depth);\ | |||||
| c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels8, depth);\ | |||||
| c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels4, depth);\ | |||||
| c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels2, depth);\ | |||||
| c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16, depth);\ | |||||
| c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels8, depth);\ | |||||
| c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels4, depth);\ | |||||
| c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels2, depth);\ | |||||
| \ | \ | ||||
| c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\ | c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\ | ||||
| c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\ | c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\ | ||||
| @@ -31,16 +31,18 @@ | |||||
| #include "dsputil.h" | #include "dsputil.h" | ||||
| //typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); | //typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); | ||||
| typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset); | |||||
| typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset); | |||||
| typedef void (*h264_weight_func)(uint8_t *block, int stride, int height, | |||||
| int log2_denom, int weight, int offset); | |||||
| typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int height, | |||||
| int log2_denom, int weightd, int weights, int offset); | |||||
| /** | /** | ||||
| * Context for storing H.264 DSP functions | * Context for storing H.264 DSP functions | ||||
| */ | */ | ||||
| typedef struct H264DSPContext{ | typedef struct H264DSPContext{ | ||||
| /* weighted MC */ | /* weighted MC */ | ||||
| h264_weight_func weight_h264_pixels_tab[10]; | |||||
| h264_biweight_func biweight_h264_pixels_tab[10]; | |||||
| h264_weight_func weight_h264_pixels_tab[4]; | |||||
| h264_biweight_func biweight_h264_pixels_tab[4]; | |||||
| /* loop filter */ | /* loop filter */ | ||||
| void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); | void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); | ||||
| @@ -29,14 +29,16 @@ | |||||
| #define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom ) | #define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom ) | ||||
| #define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) | #define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) | ||||
| #define H264_WEIGHT(W,H) \ | |||||
| static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *p_block, int stride, int log2_denom, int weight, int offset){ \ | |||||
| #define H264_WEIGHT(W) \ | |||||
| static void FUNCC(weight_h264_pixels ## W)(uint8_t *_block, int stride, int height, \ | |||||
| int log2_denom, int weight, int offset) \ | |||||
| { \ | |||||
| int y; \ | int y; \ | ||||
| pixel *block = (pixel*)p_block; \ | |||||
| pixel *block = (pixel*)_block; \ | |||||
| stride >>= sizeof(pixel)-1; \ | stride >>= sizeof(pixel)-1; \ | ||||
| offset <<= (log2_denom + (BIT_DEPTH-8)); \ | offset <<= (log2_denom + (BIT_DEPTH-8)); \ | ||||
| if(log2_denom) offset += 1<<(log2_denom-1); \ | if(log2_denom) offset += 1<<(log2_denom-1); \ | ||||
| for(y=0; y<H; y++, block += stride){ \ | |||||
| for (y = 0; y < height; y++, block += stride) { \ | |||||
| op_scale1(0); \ | op_scale1(0); \ | ||||
| op_scale1(1); \ | op_scale1(1); \ | ||||
| if(W==2) continue; \ | if(W==2) continue; \ | ||||
| @@ -58,14 +60,16 @@ static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *p_block, int strid | |||||
| op_scale1(15); \ | op_scale1(15); \ | ||||
| } \ | } \ | ||||
| } \ | } \ | ||||
| static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_src, int stride, int log2_denom, int weightd, int weights, int offset){ \ | |||||
| static void FUNCC(biweight_h264_pixels ## W)(uint8_t *_dst, uint8_t *_src, int stride, int height, \ | |||||
| int log2_denom, int weightd, int weights, int offset) \ | |||||
| { \ | |||||
| int y; \ | int y; \ | ||||
| pixel *dst = (pixel*)_dst; \ | pixel *dst = (pixel*)_dst; \ | ||||
| pixel *src = (pixel*)_src; \ | pixel *src = (pixel*)_src; \ | ||||
| stride >>= sizeof(pixel)-1; \ | stride >>= sizeof(pixel)-1; \ | ||||
| offset <<= (BIT_DEPTH-8); \ | offset <<= (BIT_DEPTH-8); \ | ||||
| offset = ((offset + 1) | 1) << log2_denom; \ | offset = ((offset + 1) | 1) << log2_denom; \ | ||||
| for(y=0; y<H; y++, dst += stride, src += stride){ \ | |||||
| for (y = 0; y < height; y++, dst += stride, src += stride) { \ | |||||
| op_scale2(0); \ | op_scale2(0); \ | ||||
| op_scale2(1); \ | op_scale2(1); \ | ||||
| if(W==2) continue; \ | if(W==2) continue; \ | ||||
| @@ -88,16 +92,10 @@ static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_ | |||||
| } \ | } \ | ||||
| } | } | ||||
| H264_WEIGHT(16,16) | |||||
| H264_WEIGHT(16,8) | |||||
| H264_WEIGHT(8,16) | |||||
| H264_WEIGHT(8,8) | |||||
| H264_WEIGHT(8,4) | |||||
| H264_WEIGHT(4,8) | |||||
| H264_WEIGHT(4,4) | |||||
| H264_WEIGHT(4,2) | |||||
| H264_WEIGHT(2,4) | |||||
| H264_WEIGHT(2,2) | |||||
| H264_WEIGHT(16) | |||||
| H264_WEIGHT(8) | |||||
| H264_WEIGHT(4) | |||||
| H264_WEIGHT(2) | |||||
| #undef op_scale1 | #undef op_scale1 | ||||
| #undef op_scale2 | #undef op_scale2 | ||||
| @@ -228,16 +228,6 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM * | |||||
| void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){ | void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){ | ||||
| int i, j; | int i, j; | ||||
| #if 0 | |||||
| av_log(NULL, AV_LOG_INFO, "idct\n"); | |||||
| int32_t *b = block; | |||||
| for (int i = 0; i < 256; i++) { | |||||
| av_log(NULL, AV_LOG_INFO, "%5d ", b[i+256]); | |||||
| if (!((i+1) % 16)) | |||||
| av_log(NULL, AV_LOG_INFO, "\n"); | |||||
| } | |||||
| #endif | |||||
| for(j=1; j<3; j++){ | for(j=1; j<3; j++){ | ||||
| for(i=j*16; i<j*16+4; i++){ | for(i=j*16; i<j*16+4; i++){ | ||||
| if(nnzc[ scan8[i] ]) | if(nnzc[ scan8[i] ]) | ||||
| @@ -296,13 +286,13 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *p_output, DCTELEM *p_input, in | |||||
| #undef stride | #undef stride | ||||
| } | } | ||||
| void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *p_block, int qmul){ | |||||
| void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *_block, int qmul){ | |||||
| const int stride= 16*2; | const int stride= 16*2; | ||||
| const int xStride= 16; | const int xStride= 16; | ||||
| int i; | int i; | ||||
| int temp[8]; | int temp[8]; | ||||
| static const uint8_t x_offset[2]={0, 16}; | static const uint8_t x_offset[2]={0, 16}; | ||||
| dctcoef *block = (dctcoef*)p_block; | |||||
| dctcoef *block = (dctcoef*)_block; | |||||
| for(i=0; i<4; i++){ | for(i=0; i<4; i++){ | ||||
| temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1]; | temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1]; | ||||
| @@ -321,22 +311,13 @@ void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *p_block, int qmul){ | |||||
| block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8; | block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8; | ||||
| block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8; | block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8; | ||||
| } | } | ||||
| #if 0 | |||||
| av_log(NULL, AV_LOG_INFO, "after chroma dc\n"); | |||||
| for (int i = 0; i < 256; i++) { | |||||
| av_log(NULL, AV_LOG_INFO, "%5d ", block[i]); | |||||
| if (!((i+1) % 16)) | |||||
| av_log(NULL, AV_LOG_INFO, "\n"); | |||||
| } | |||||
| #endif | |||||
| } | } | ||||
| void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *p_block, int qmul){ | |||||
| void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){ | |||||
| const int stride= 16*2; | const int stride= 16*2; | ||||
| const int xStride= 16; | const int xStride= 16; | ||||
| int a,b,c,d,e; | int a,b,c,d,e; | ||||
| dctcoef *block = (dctcoef*)p_block; | |||||
| dctcoef *block = (dctcoef*)_block; | |||||
| a= block[stride*0 + xStride*0]; | a= block[stride*0 + xStride*0]; | ||||
| b= block[stride*0 + xStride*1]; | b= block[stride*0 + xStride*1]; | ||||
| @@ -462,10 +462,10 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, co | |||||
| h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x16_dc , depth);\ | h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x16_dc , depth);\ | ||||
| h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc , depth);\ | h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc , depth);\ | ||||
| h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc , depth);\ | h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc , depth);\ | ||||
| h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\ | |||||
| h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\ | |||||
| h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ | |||||
| h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ | |||||
| h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l0t, depth);\ | |||||
| h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0lt, depth);\ | |||||
| h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l00, depth);\ | |||||
| h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0l0, depth);\ | |||||
| }\ | }\ | ||||
| }else{\ | }else{\ | ||||
| h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\ | h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\ | ||||
| @@ -510,8 +510,13 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, co | |||||
| h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\ | h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\ | ||||
| h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\ | h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\ | ||||
| h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\ | h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\ | ||||
| if (chroma_format_idc == 1) {\ | |||||
| h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add , depth);\ | h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add , depth);\ | ||||
| h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\ | h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\ | ||||
| } else {\ | |||||
| h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x16_vertical_add , depth);\ | |||||
| h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x16_horizontal_add , depth);\ | |||||
| }\ | |||||
| h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add , depth);\ | h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add , depth);\ | ||||
| h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add , depth);\ | h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add , depth);\ | ||||
| @@ -663,23 +663,45 @@ static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){ | |||||
| FUNCC(pred4x4_dc)(src, NULL, stride); | FUNCC(pred4x4_dc)(src, NULL, stride); | ||||
| } | } | ||||
| static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, int stride){ | |||||
| FUNCC(pred8x16_top_dc)(src, stride); | |||||
| FUNCC(pred4x4_dc)(src, NULL, stride); | |||||
| } | |||||
| static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){ | static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){ | ||||
| FUNCC(pred8x8_dc)(src, stride); | FUNCC(pred8x8_dc)(src, stride); | ||||
| FUNCC(pred4x4_top_dc)(src, NULL, stride); | FUNCC(pred4x4_top_dc)(src, NULL, stride); | ||||
| } | } | ||||
| static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, int stride){ | |||||
| FUNCC(pred8x16_dc)(src, stride); | |||||
| FUNCC(pred4x4_top_dc)(src, NULL, stride); | |||||
| } | |||||
| static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){ | static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){ | ||||
| FUNCC(pred8x8_left_dc)(src, stride); | FUNCC(pred8x8_left_dc)(src, stride); | ||||
| FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); | FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); | ||||
| FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); | FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); | ||||
| } | } | ||||
| static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, int stride){ | |||||
| FUNCC(pred8x16_left_dc)(src, stride); | |||||
| FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); | |||||
| FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); | |||||
| } | |||||
| static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){ | static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){ | ||||
| FUNCC(pred8x8_left_dc)(src, stride); | FUNCC(pred8x8_left_dc)(src, stride); | ||||
| FUNCC(pred4x4_128_dc)(src , NULL, stride); | FUNCC(pred4x4_128_dc)(src , NULL, stride); | ||||
| FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); | FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); | ||||
| } | } | ||||
| static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, int stride){ | |||||
| FUNCC(pred8x16_left_dc)(src, stride); | |||||
| FUNCC(pred4x4_128_dc)(src , NULL, stride); | |||||
| FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); | |||||
| } | |||||
| static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){ | static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){ | ||||
| int j, k; | int j, k; | ||||
| int a; | int a; | ||||
| @@ -1126,8 +1148,24 @@ static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, c | |||||
| FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); | FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); | ||||
| } | } | ||||
| static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ | |||||
| int i; | |||||
| for(i=0; i<4; i++) | |||||
| FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); | |||||
| for(i=4; i<8; i++) | |||||
| FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride); | |||||
| } | |||||
| static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ | static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ | ||||
| int i; | int i; | ||||
| for(i=0; i<4; i++) | for(i=0; i<4; i++) | ||||
| FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); | FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); | ||||
| } | } | ||||
| static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ | |||||
| int i; | |||||
| for(i=0; i<4; i++) | |||||
| FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); | |||||
| for(i=4; i<8; i++) | |||||
| FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride); | |||||
| } | |||||
| @@ -18,11 +18,11 @@ | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
| */ | */ | ||||
| #include "avcodec.h" | |||||
| #include <speex/speex.h> | #include <speex/speex.h> | ||||
| #include <speex/speex_header.h> | #include <speex/speex_header.h> | ||||
| #include <speex/speex_stereo.h> | #include <speex/speex_stereo.h> | ||||
| #include <speex/speex_callbacks.h> | #include <speex/speex_callbacks.h> | ||||
| #include "avcodec.h" | |||||
| typedef struct { | typedef struct { | ||||
| SpeexBits bits; | SpeexBits bits; | ||||
| @@ -60,14 +60,14 @@ static av_cold int libspeex_decode_init(AVCodecContext *avctx) | |||||
| mode = speex_lib_get_mode(s->header->mode); | mode = speex_lib_get_mode(s->header->mode); | ||||
| if (!mode) { | if (!mode) { | ||||
| av_log(avctx, AV_LOG_ERROR, "Unknown Speex mode %d", s->header->mode); | av_log(avctx, AV_LOG_ERROR, "Unknown Speex mode %d", s->header->mode); | ||||
| return -1; | |||||
| return AVERROR_INVALIDDATA; | |||||
| } | } | ||||
| } else | } else | ||||
| av_log(avctx, AV_LOG_INFO, "Missing Speex header, assuming defaults.\n"); | av_log(avctx, AV_LOG_INFO, "Missing Speex header, assuming defaults.\n"); | ||||
| if (avctx->channels > 2) { | if (avctx->channels > 2) { | ||||
| av_log(avctx, AV_LOG_ERROR, "Only stereo and mono are supported.\n"); | av_log(avctx, AV_LOG_ERROR, "Only stereo and mono are supported.\n"); | ||||
| return -1; | |||||
| return AVERROR(EINVAL); | |||||
| } | } | ||||
| speex_bits_init(&s->bits); | speex_bits_init(&s->bits); | ||||
| @@ -99,32 +99,42 @@ static int libspeex_decode_frame(AVCodecContext *avctx, | |||||
| uint8_t *buf = avpkt->data; | uint8_t *buf = avpkt->data; | ||||
| int buf_size = avpkt->size; | int buf_size = avpkt->size; | ||||
| LibSpeexContext *s = avctx->priv_data; | LibSpeexContext *s = avctx->priv_data; | ||||
| int16_t *output = data, *end; | |||||
| int i, num_samples; | |||||
| num_samples = s->frame_size * avctx->channels; | |||||
| end = output + *data_size / sizeof(*output); | |||||
| speex_bits_read_from(&s->bits, buf, buf_size); | |||||
| for (i = 0; speex_bits_remaining(&s->bits) && output + num_samples < end; i++) { | |||||
| int ret = speex_decode_int(s->dec_state, &s->bits, output); | |||||
| if (ret <= -2) { | |||||
| av_log(avctx, AV_LOG_ERROR, "Error decoding Speex frame.\n"); | |||||
| return -1; | |||||
| } else if (ret == -1) | |||||
| // end of stream | |||||
| break; | |||||
| int16_t *output = data; | |||||
| int out_size, ret, consumed = 0; | |||||
| /* check output buffer size */ | |||||
| out_size = s->frame_size * avctx->channels * | |||||
| av_get_bytes_per_sample(avctx->sample_fmt); | |||||
| if (*data_size < out_size) { | |||||
| av_log(avctx, AV_LOG_ERROR, "Output buffer is too small\n"); | |||||
| return AVERROR(EINVAL); | |||||
| } | |||||
| if (avctx->channels == 2) | |||||
| speex_decode_stereo_int(output, s->frame_size, &s->stereo); | |||||
| /* if there is not enough data left for the smallest possible frame, | |||||
| reset the libspeex buffer using the current packet, otherwise ignore | |||||
| the current packet and keep decoding frames from the libspeex buffer. */ | |||||
| if (speex_bits_remaining(&s->bits) < 43) { | |||||
| /* check for flush packet */ | |||||
| if (!buf || !buf_size) { | |||||
| *data_size = 0; | |||||
| return buf_size; | |||||
| } | |||||
| /* set new buffer */ | |||||
| speex_bits_read_from(&s->bits, buf, buf_size); | |||||
| consumed = buf_size; | |||||
| } | |||||
| output += num_samples; | |||||
| /* decode a single frame */ | |||||
| ret = speex_decode_int(s->dec_state, &s->bits, output); | |||||
| if (ret <= -2) { | |||||
| av_log(avctx, AV_LOG_ERROR, "Error decoding Speex frame.\n"); | |||||
| return AVERROR_INVALIDDATA; | |||||
| } | } | ||||
| if (avctx->channels == 2) | |||||
| speex_decode_stereo_int(output, s->frame_size, &s->stereo); | |||||
| avctx->frame_size = s->frame_size * i; | |||||
| *data_size = avctx->channels * avctx->frame_size * sizeof(*output); | |||||
| return buf_size; | |||||
| *data_size = out_size; | |||||
| return consumed; | |||||
| } | } | ||||
| static av_cold int libspeex_decode_close(AVCodecContext *avctx) | static av_cold int libspeex_decode_close(AVCodecContext *avctx) | ||||
| @@ -138,6 +148,12 @@ static av_cold int libspeex_decode_close(AVCodecContext *avctx) | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| static av_cold void libspeex_decode_flush(AVCodecContext *avctx) | |||||
| { | |||||
| LibSpeexContext *s = avctx->priv_data; | |||||
| speex_bits_reset(&s->bits); | |||||
| } | |||||
| AVCodec ff_libspeex_decoder = { | AVCodec ff_libspeex_decoder = { | ||||
| .name = "libspeex", | .name = "libspeex", | ||||
| .type = AVMEDIA_TYPE_AUDIO, | .type = AVMEDIA_TYPE_AUDIO, | ||||
| @@ -146,5 +162,7 @@ AVCodec ff_libspeex_decoder = { | |||||
| .init = libspeex_decode_init, | .init = libspeex_decode_init, | ||||
| .close = libspeex_decode_close, | .close = libspeex_decode_close, | ||||
| .decode = libspeex_decode_frame, | .decode = libspeex_decode_frame, | ||||
| .flush = libspeex_decode_flush, | |||||
| .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DELAY, | |||||
| .long_name = NULL_IF_CONFIG_SMALL("libspeex Speex"), | .long_name = NULL_IF_CONFIG_SMALL("libspeex Speex"), | ||||
| }; | }; | ||||
| @@ -1893,24 +1893,50 @@ typedef struct MP3On4DecodeContext { | |||||
| int syncword; ///< syncword patch | int syncword; ///< syncword patch | ||||
| const uint8_t *coff; ///< channels offsets in output buffer | const uint8_t *coff; ///< channels offsets in output buffer | ||||
| MPADecodeContext *mp3decctx[5]; ///< MPADecodeContext for every decoder instance | MPADecodeContext *mp3decctx[5]; ///< MPADecodeContext for every decoder instance | ||||
| OUT_INT *decoded_buf; ///< output buffer for decoded samples | |||||
| } MP3On4DecodeContext; | } MP3On4DecodeContext; | ||||
| #include "mpeg4audio.h" | #include "mpeg4audio.h" | ||||
| /* Next 3 arrays are indexed by channel config number (passed via codecdata) */ | /* Next 3 arrays are indexed by channel config number (passed via codecdata) */ | ||||
| static const uint8_t mp3Frames[8] = {0,1,1,2,3,3,4,5}; /* number of mp3 decoder instances */ | static const uint8_t mp3Frames[8] = {0,1,1,2,3,3,4,5}; /* number of mp3 decoder instances */ | ||||
| /* offsets into output buffer, assume output order is FL FR BL BR C LFE */ | |||||
| /* offsets into output buffer, assume output order is FL FR C LFE BL BR SL SR */ | |||||
| static const uint8_t chan_offset[8][5] = { | static const uint8_t chan_offset[8][5] = { | ||||
| {0}, | {0}, | ||||
| {0}, // C | {0}, // C | ||||
| {0}, // FLR | {0}, // FLR | ||||
| {2,0}, // C FLR | {2,0}, // C FLR | ||||
| {2,0,3}, // C FLR BS | {2,0,3}, // C FLR BS | ||||
| {4,0,2}, // C FLR BLRS | |||||
| {4,0,2,5}, // C FLR BLRS LFE | |||||
| {4,0,2,6,5}, // C FLR BLRS BLR LFE | |||||
| {2,0,3}, // C FLR BLRS | |||||
| {2,0,4,3}, // C FLR BLRS LFE | |||||
| {2,0,6,4,3}, // C FLR BLRS BLR LFE | |||||
| }; | }; | ||||
| /* mp3on4 channel layouts */ | |||||
| static const int16_t chan_layout[8] = { | |||||
| 0, | |||||
| AV_CH_LAYOUT_MONO, | |||||
| AV_CH_LAYOUT_STEREO, | |||||
| AV_CH_LAYOUT_SURROUND, | |||||
| AV_CH_LAYOUT_4POINT0, | |||||
| AV_CH_LAYOUT_5POINT0, | |||||
| AV_CH_LAYOUT_5POINT1, | |||||
| AV_CH_LAYOUT_7POINT1 | |||||
| }; | |||||
| static av_cold int decode_close_mp3on4(AVCodecContext * avctx) | |||||
| { | |||||
| MP3On4DecodeContext *s = avctx->priv_data; | |||||
| int i; | |||||
| for (i = 0; i < s->frames; i++) | |||||
| av_free(s->mp3decctx[i]); | |||||
| av_freep(&s->decoded_buf); | |||||
| return 0; | |||||
| } | |||||
| static int decode_init_mp3on4(AVCodecContext * avctx) | static int decode_init_mp3on4(AVCodecContext * avctx) | ||||
| { | { | ||||
| @@ -1931,6 +1957,7 @@ static int decode_init_mp3on4(AVCodecContext * avctx) | |||||
| s->frames = mp3Frames[cfg.chan_config]; | s->frames = mp3Frames[cfg.chan_config]; | ||||
| s->coff = chan_offset[cfg.chan_config]; | s->coff = chan_offset[cfg.chan_config]; | ||||
| avctx->channels = ff_mpeg4audio_channels[cfg.chan_config]; | avctx->channels = ff_mpeg4audio_channels[cfg.chan_config]; | ||||
| avctx->channel_layout = chan_layout[cfg.chan_config]; | |||||
| if (cfg.sample_rate < 16000) | if (cfg.sample_rate < 16000) | ||||
| s->syncword = 0xffe00000; | s->syncword = 0xffe00000; | ||||
| @@ -1944,6 +1971,8 @@ static int decode_init_mp3on4(AVCodecContext * avctx) | |||||
| */ | */ | ||||
| // Allocate zeroed memory for the first decoder context | // Allocate zeroed memory for the first decoder context | ||||
| s->mp3decctx[0] = av_mallocz(sizeof(MPADecodeContext)); | s->mp3decctx[0] = av_mallocz(sizeof(MPADecodeContext)); | ||||
| if (!s->mp3decctx[0]) | |||||
| goto alloc_fail; | |||||
| // Put decoder context in place to make init_decode() happy | // Put decoder context in place to make init_decode() happy | ||||
| avctx->priv_data = s->mp3decctx[0]; | avctx->priv_data = s->mp3decctx[0]; | ||||
| decode_init(avctx); | decode_init(avctx); | ||||
| @@ -1956,23 +1985,38 @@ static int decode_init_mp3on4(AVCodecContext * avctx) | |||||
| */ | */ | ||||
| for (i = 1; i < s->frames; i++) { | for (i = 1; i < s->frames; i++) { | ||||
| s->mp3decctx[i] = av_mallocz(sizeof(MPADecodeContext)); | s->mp3decctx[i] = av_mallocz(sizeof(MPADecodeContext)); | ||||
| if (!s->mp3decctx[i]) | |||||
| goto alloc_fail; | |||||
| s->mp3decctx[i]->adu_mode = 1; | s->mp3decctx[i]->adu_mode = 1; | ||||
| s->mp3decctx[i]->avctx = avctx; | s->mp3decctx[i]->avctx = avctx; | ||||
| s->mp3decctx[i]->mpadsp = s->mp3decctx[0]->mpadsp; | |||||
| } | |||||
| /* Allocate buffer for multi-channel output if needed */ | |||||
| if (s->frames > 1) { | |||||
| s->decoded_buf = av_malloc(MPA_FRAME_SIZE * MPA_MAX_CHANNELS * | |||||
| sizeof(*s->decoded_buf)); | |||||
| if (!s->decoded_buf) | |||||
| goto alloc_fail; | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| alloc_fail: | |||||
| decode_close_mp3on4(avctx); | |||||
| return AVERROR(ENOMEM); | |||||
| } | } | ||||
| static av_cold int decode_close_mp3on4(AVCodecContext * avctx) | |||||
| static void flush_mp3on4(AVCodecContext *avctx) | |||||
| { | { | ||||
| MP3On4DecodeContext *s = avctx->priv_data; | |||||
| int i; | int i; | ||||
| MP3On4DecodeContext *s = avctx->priv_data; | |||||
| for (i = 0; i < s->frames; i++) | |||||
| av_free(s->mp3decctx[i]); | |||||
| return 0; | |||||
| for (i = 0; i < s->frames; i++) { | |||||
| MPADecodeContext *m = s->mp3decctx[i]; | |||||
| memset(m->synth_buf, 0, sizeof(m->synth_buf)); | |||||
| m->last_buf_size = 0; | |||||
| } | |||||
| } | } | ||||
| @@ -1987,12 +2031,13 @@ static int decode_frame_mp3on4(AVCodecContext * avctx, | |||||
| int fsize, len = buf_size, out_size = 0; | int fsize, len = buf_size, out_size = 0; | ||||
| uint32_t header; | uint32_t header; | ||||
| OUT_INT *out_samples = data; | OUT_INT *out_samples = data; | ||||
| OUT_INT decoded_buf[MPA_FRAME_SIZE * MPA_MAX_CHANNELS]; | |||||
| OUT_INT *outptr, *bp; | OUT_INT *outptr, *bp; | ||||
| int fr, j, n; | |||||
| int fr, j, n, ch; | |||||
| if(*data_size < MPA_FRAME_SIZE * MPA_MAX_CHANNELS * s->frames * sizeof(OUT_INT)) | |||||
| return -1; | |||||
| if (*data_size < MPA_FRAME_SIZE * avctx->channels * sizeof(OUT_INT)) { | |||||
| av_log(avctx, AV_LOG_ERROR, "output buffer is too small\n"); | |||||
| return AVERROR(EINVAL); | |||||
| } | |||||
| *data_size = 0; | *data_size = 0; | ||||
| // Discard too short frames | // Discard too short frames | ||||
| @@ -2000,10 +2045,11 @@ static int decode_frame_mp3on4(AVCodecContext * avctx, | |||||
| return -1; | return -1; | ||||
| // If only one decoder interleave is not needed | // If only one decoder interleave is not needed | ||||
| outptr = s->frames == 1 ? out_samples : decoded_buf; | |||||
| outptr = s->frames == 1 ? out_samples : s->decoded_buf; | |||||
| avctx->bit_rate = 0; | avctx->bit_rate = 0; | ||||
| ch = 0; | |||||
| for (fr = 0; fr < s->frames; fr++) { | for (fr = 0; fr < s->frames; fr++) { | ||||
| fsize = AV_RB16(buf) >> 4; | fsize = AV_RB16(buf) >> 4; | ||||
| fsize = FFMIN3(fsize, len, MPA_MAX_CODED_FRAME_SIZE); | fsize = FFMIN3(fsize, len, MPA_MAX_CODED_FRAME_SIZE); | ||||
| @@ -2016,6 +2062,14 @@ static int decode_frame_mp3on4(AVCodecContext * avctx, | |||||
| break; | break; | ||||
| avpriv_mpegaudio_decode_header((MPADecodeHeader *)m, header); | avpriv_mpegaudio_decode_header((MPADecodeHeader *)m, header); | ||||
| if (ch + m->nb_channels > avctx->channels) { | |||||
| av_log(avctx, AV_LOG_ERROR, "frame channel count exceeds codec " | |||||
| "channel count\n"); | |||||
| return AVERROR_INVALIDDATA; | |||||
| } | |||||
| ch += m->nb_channels; | |||||
| out_size += mp_decode_frame(m, outptr, buf, fsize); | out_size += mp_decode_frame(m, outptr, buf, fsize); | ||||
| buf += fsize; | buf += fsize; | ||||
| len -= fsize; | len -= fsize; | ||||
| @@ -2026,13 +2080,13 @@ static int decode_frame_mp3on4(AVCodecContext * avctx, | |||||
| bp = out_samples + s->coff[fr]; | bp = out_samples + s->coff[fr]; | ||||
| if(m->nb_channels == 1) { | if(m->nb_channels == 1) { | ||||
| for(j = 0; j < n; j++) { | for(j = 0; j < n; j++) { | ||||
| *bp = decoded_buf[j]; | |||||
| *bp = s->decoded_buf[j]; | |||||
| bp += avctx->channels; | bp += avctx->channels; | ||||
| } | } | ||||
| } else { | } else { | ||||
| for(j = 0; j < n; j++) { | for(j = 0; j < n; j++) { | ||||
| bp[0] = decoded_buf[j++]; | |||||
| bp[1] = decoded_buf[j]; | |||||
| bp[0] = s->decoded_buf[j++]; | |||||
| bp[1] = s->decoded_buf[j]; | |||||
| bp += avctx->channels; | bp += avctx->channels; | ||||
| } | } | ||||
| } | } | ||||
| @@ -2110,7 +2164,7 @@ AVCodec ff_mp3on4_decoder = { | |||||
| .init = decode_init_mp3on4, | .init = decode_init_mp3on4, | ||||
| .close = decode_close_mp3on4, | .close = decode_close_mp3on4, | ||||
| .decode = decode_frame_mp3on4, | .decode = decode_frame_mp3on4, | ||||
| .flush = flush, | |||||
| .flush = flush_mp3on4, | |||||
| .long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"), | .long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"), | ||||
| }; | }; | ||||
| #endif | #endif | ||||
| @@ -83,7 +83,7 @@ AVCodec ff_mp3on4float_decoder = { | |||||
| .init = decode_init_mp3on4, | .init = decode_init_mp3on4, | ||||
| .close = decode_close_mp3on4, | .close = decode_close_mp3on4, | ||||
| .decode = decode_frame_mp3on4, | .decode = decode_frame_mp3on4, | ||||
| .flush = flush, | |||||
| .flush = flush_mp3on4, | |||||
| .long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"), | .long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"), | ||||
| }; | }; | ||||
| #endif | #endif | ||||
| @@ -843,7 +843,8 @@ static void h264_h_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, | |||||
| } | } | ||||
| static av_always_inline | static av_always_inline | ||||
| void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset, int w, int h) | |||||
| void weight_h264_W_altivec(uint8_t *block, int stride, int height, | |||||
| int log2_denom, int weight, int offset, int w) | |||||
| { | { | ||||
| int y, aligned; | int y, aligned; | ||||
| vec_u8 vblock; | vec_u8 vblock; | ||||
| @@ -864,7 +865,7 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei | |||||
| voffset = vec_splat(vtemp, 5); | voffset = vec_splat(vtemp, 5); | ||||
| aligned = !((unsigned long)block & 0xf); | aligned = !((unsigned long)block & 0xf); | ||||
| for (y=0; y<h; y++) { | |||||
| for (y = 0; y < height; y++) { | |||||
| vblock = vec_ld(0, block); | vblock = vec_ld(0, block); | ||||
| v0 = (vec_s16)vec_mergeh(zero_u8v, vblock); | v0 = (vec_s16)vec_mergeh(zero_u8v, vblock); | ||||
| @@ -888,8 +889,8 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei | |||||
| } | } | ||||
| static av_always_inline | static av_always_inline | ||||
| void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, | |||||
| int weightd, int weights, int offset, int w, int h) | |||||
| void biweight_h264_W_altivec(uint8_t *dst, uint8_t *src, int stride, int height, | |||||
| int log2_denom, int weightd, int weights, int offset, int w) | |||||
| { | { | ||||
| int y, dst_aligned, src_aligned; | int y, dst_aligned, src_aligned; | ||||
| vec_u8 vsrc, vdst; | vec_u8 vsrc, vdst; | ||||
| @@ -912,7 +913,7 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_ | |||||
| dst_aligned = !((unsigned long)dst & 0xf); | dst_aligned = !((unsigned long)dst & 0xf); | ||||
| src_aligned = !((unsigned long)src & 0xf); | src_aligned = !((unsigned long)src & 0xf); | ||||
| for (y=0; y<h; y++) { | |||||
| for (y = 0; y < height; y++) { | |||||
| vdst = vec_ld(0, dst); | vdst = vec_ld(0, dst); | ||||
| vsrc = vec_ld(0, src); | vsrc = vec_ld(0, src); | ||||
| @@ -952,19 +953,18 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_ | |||||
| } | } | ||||
| } | } | ||||
| #define H264_WEIGHT(W,H) \ | |||||
| static void ff_weight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ | |||||
| weight_h264_WxH_altivec(block, stride, log2_denom, weight, offset, W, H); \ | |||||
| #define H264_WEIGHT(W) \ | |||||
| static void ff_weight_h264_pixels ## W ## _altivec(uint8_t *block, int stride, int height, \ | |||||
| int log2_denom, int weight, int offset){ \ | |||||
| weight_h264_W_altivec(block, stride, height, log2_denom, weight, offset, W); \ | |||||
| }\ | }\ | ||||
| static void ff_biweight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \ | |||||
| biweight_h264_WxH_altivec(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \ | |||||
| static void ff_biweight_h264_pixels ## W ## _altivec(uint8_t *dst, uint8_t *src, int stride, int height, \ | |||||
| int log2_denom, int weightd, int weights, int offset){ \ | |||||
| biweight_h264_W_altivec(dst, src, stride, height, log2_denom, weightd, weights, offset, W); \ | |||||
| } | } | ||||
| H264_WEIGHT(16,16) | |||||
| H264_WEIGHT(16, 8) | |||||
| H264_WEIGHT( 8,16) | |||||
| H264_WEIGHT( 8, 8) | |||||
| H264_WEIGHT( 8, 4) | |||||
| H264_WEIGHT(16) | |||||
| H264_WEIGHT( 8) | |||||
| void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { | void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { | ||||
| const int high_bit_depth = avctx->bits_per_raw_sample > 8; | const int high_bit_depth = avctx->bits_per_raw_sample > 8; | ||||
| @@ -1015,16 +1015,10 @@ void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chrom | |||||
| c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec; | c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec; | ||||
| c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec; | c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec; | ||||
| c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16x16_altivec; | |||||
| c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels16x8_altivec; | |||||
| c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec; | |||||
| c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec; | |||||
| c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec; | |||||
| c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec; | |||||
| c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec; | |||||
| c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec; | |||||
| c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec; | |||||
| c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec; | |||||
| c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16_altivec; | |||||
| c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels8_altivec; | |||||
| c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_altivec; | |||||
| c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_altivec; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -158,6 +158,8 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, int l | |||||
| case PIX_FMT_YUV420P9BE: | case PIX_FMT_YUV420P9BE: | ||||
| case PIX_FMT_YUV420P10LE: | case PIX_FMT_YUV420P10LE: | ||||
| case PIX_FMT_YUV420P10BE: | case PIX_FMT_YUV420P10BE: | ||||
| case PIX_FMT_YUV422P9LE: | |||||
| case PIX_FMT_YUV422P9BE: | |||||
| case PIX_FMT_YUV422P10LE: | case PIX_FMT_YUV422P10LE: | ||||
| case PIX_FMT_YUV422P10BE: | case PIX_FMT_YUV422P10BE: | ||||
| case PIX_FMT_YUV444P9LE: | case PIX_FMT_YUV444P9LE: | ||||
| @@ -41,24 +41,57 @@ static void free_buffers(VP8Context *s) | |||||
| av_freep(&s->top_nnz); | av_freep(&s->top_nnz); | ||||
| av_freep(&s->edge_emu_buffer); | av_freep(&s->edge_emu_buffer); | ||||
| av_freep(&s->top_border); | av_freep(&s->top_border); | ||||
| av_freep(&s->segmentation_map); | |||||
| s->macroblocks = NULL; | s->macroblocks = NULL; | ||||
| } | } | ||||
| static void vp8_decode_flush(AVCodecContext *avctx) | |||||
| static int vp8_alloc_frame(VP8Context *s, AVFrame *f) | |||||
| { | |||||
| int ret; | |||||
| if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0) | |||||
| return ret; | |||||
| if (!s->maps_are_invalid && s->num_maps_to_be_freed) { | |||||
| f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed]; | |||||
| } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) { | |||||
| ff_thread_release_buffer(s->avctx, f); | |||||
| return AVERROR(ENOMEM); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| static void vp8_release_frame(VP8Context *s, AVFrame *f, int is_close) | |||||
| { | |||||
| if (!is_close) { | |||||
| if (f->ref_index[0]) { | |||||
| assert(s->num_maps_to_be_freed < FF_ARRAY_ELEMS(s->segmentation_maps)); | |||||
| s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0]; | |||||
| f->ref_index[0] = NULL; | |||||
| } | |||||
| } else { | |||||
| av_freep(&f->ref_index[0]); | |||||
| } | |||||
| ff_thread_release_buffer(s->avctx, f); | |||||
| } | |||||
| static void vp8_decode_flush_impl(AVCodecContext *avctx, int force, int is_close) | |||||
| { | { | ||||
| VP8Context *s = avctx->priv_data; | VP8Context *s = avctx->priv_data; | ||||
| int i; | int i; | ||||
| if (!avctx->is_copy) { | |||||
| if (!avctx->is_copy || force) { | |||||
| for (i = 0; i < 5; i++) | for (i = 0; i < 5; i++) | ||||
| if (s->frames[i].data[0]) | if (s->frames[i].data[0]) | ||||
| ff_thread_release_buffer(avctx, &s->frames[i]); | |||||
| vp8_release_frame(s, &s->frames[i], is_close); | |||||
| } | } | ||||
| memset(s->framep, 0, sizeof(s->framep)); | memset(s->framep, 0, sizeof(s->framep)); | ||||
| free_buffers(s); | free_buffers(s); | ||||
| s->maps_are_invalid = 1; | |||||
| } | |||||
| static void vp8_decode_flush(AVCodecContext *avctx) | |||||
| { | |||||
| vp8_decode_flush_impl(avctx, 0, 0); | |||||
| } | } | ||||
| static int update_dimensions(VP8Context *s, int width, int height) | static int update_dimensions(VP8Context *s, int width, int height) | ||||
| @@ -68,7 +101,7 @@ static int update_dimensions(VP8Context *s, int width, int height) | |||||
| if (av_image_check_size(width, height, 0, s->avctx)) | if (av_image_check_size(width, height, 0, s->avctx)) | ||||
| return AVERROR_INVALIDDATA; | return AVERROR_INVALIDDATA; | ||||
| vp8_decode_flush(s->avctx); | |||||
| vp8_decode_flush_impl(s->avctx, 1, 0); | |||||
| avcodec_set_dimensions(s->avctx, width, height); | avcodec_set_dimensions(s->avctx, width, height); | ||||
| } | } | ||||
| @@ -81,10 +114,9 @@ static int update_dimensions(VP8Context *s, int width, int height) | |||||
| s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4); | s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4); | ||||
| s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); | s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); | ||||
| s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); | s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); | ||||
| s->segmentation_map = av_mallocz(s->mb_width*s->mb_height); | |||||
| if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top || | if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top || | ||||
| !s->top_nnz || !s->top_border || !s->segmentation_map) | |||||
| !s->top_nnz || !s->top_border) | |||||
| return AVERROR(ENOMEM); | return AVERROR(ENOMEM); | ||||
| s->macroblocks = s->macroblocks_base + 1; | s->macroblocks = s->macroblocks_base + 1; | ||||
| @@ -1508,6 +1540,14 @@ static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y) | |||||
| } | } | ||||
| } | } | ||||
| static void release_queued_segmaps(VP8Context *s, int is_close) | |||||
| { | |||||
| int leave_behind = is_close ? 0 : !s->maps_are_invalid; | |||||
| while (s->num_maps_to_be_freed > leave_behind) | |||||
| av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]); | |||||
| s->maps_are_invalid = 0; | |||||
| } | |||||
| static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | ||||
| AVPacket *avpkt) | AVPacket *avpkt) | ||||
| { | { | ||||
| @@ -1516,6 +1556,8 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | |||||
| enum AVDiscard skip_thresh; | enum AVDiscard skip_thresh; | ||||
| AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT]; | AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT]; | ||||
| release_queued_segmaps(s, 0); | |||||
| if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0) | if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0) | ||||
| return ret; | return ret; | ||||
| @@ -1538,7 +1580,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | |||||
| &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && | &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && | ||||
| &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && | &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && | ||||
| &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) | &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) | ||||
| ff_thread_release_buffer(avctx, &s->frames[i]); | |||||
| vp8_release_frame(s, &s->frames[i], 0); | |||||
| // find a free buffer | // find a free buffer | ||||
| for (i = 0; i < 5; i++) | for (i = 0; i < 5; i++) | ||||
| @@ -1559,8 +1601,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | |||||
| curframe->key_frame = s->keyframe; | curframe->key_frame = s->keyframe; | ||||
| curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P; | curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P; | ||||
| curframe->reference = referenced ? 3 : 0; | curframe->reference = referenced ? 3 : 0; | ||||
| curframe->ref_index[0] = s->segmentation_map; | |||||
| if ((ret = ff_thread_get_buffer(avctx, curframe))) { | |||||
| if ((ret = vp8_alloc_frame(s, curframe))) { | |||||
| av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n"); | av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n"); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -1652,8 +1693,8 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | |||||
| s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); | s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); | ||||
| s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); | s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); | ||||
| decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy, | |||||
| prev_frame ? prev_frame->ref_index[0] + mb_xy : NULL); | |||||
| decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy, | |||||
| prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL); | |||||
| prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); | prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); | ||||
| @@ -1736,7 +1777,8 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx) | |||||
| static av_cold int vp8_decode_free(AVCodecContext *avctx) | static av_cold int vp8_decode_free(AVCodecContext *avctx) | ||||
| { | { | ||||
| vp8_decode_flush(avctx); | |||||
| vp8_decode_flush_impl(avctx, 0, 1); | |||||
| release_queued_segmaps(avctx->priv_data, 1); | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -130,7 +130,6 @@ typedef struct { | |||||
| uint8_t *intra4x4_pred_mode_top; | uint8_t *intra4x4_pred_mode_top; | ||||
| uint8_t intra4x4_pred_mode_left[4]; | uint8_t intra4x4_pred_mode_left[4]; | ||||
| uint8_t *segmentation_map; | |||||
| /** | /** | ||||
| * Macroblocks can have one of 4 different quants in a frame when | * Macroblocks can have one of 4 different quants in a frame when | ||||
| @@ -237,6 +236,16 @@ typedef struct { | |||||
| H264PredContext hpc; | H264PredContext hpc; | ||||
| vp8_mc_func put_pixels_tab[3][3][3]; | vp8_mc_func put_pixels_tab[3][3][3]; | ||||
| AVFrame frames[5]; | AVFrame frames[5]; | ||||
| /** | |||||
| * A list of segmentation_map buffers that are to be free()'ed in | |||||
| * the next decoding iteration. We can't free() them right away | |||||
| * because the map may still be used by subsequent decoding threads. | |||||
| * Unused if frame threading is off. | |||||
| */ | |||||
| uint8_t *segmentation_maps[5]; | |||||
| int num_maps_to_be_freed; | |||||
| int maps_are_invalid; | |||||
| } VP8Context; | } VP8Context; | ||||
| #endif /* AVCODEC_VP8_H */ | #endif /* AVCODEC_VP8_H */ | ||||
| @@ -1055,14 +1055,6 @@ emu_edge mmx | |||||
| ; int32_t max, unsigned int len) | ; int32_t max, unsigned int len) | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| %macro SPLATD_MMX 1 | |||||
| punpckldq %1, %1 | |||||
| %endmacro | |||||
| %macro SPLATD_SSE2 1 | |||||
| pshufd %1, %1, 0 | |||||
| %endmacro | |||||
| %macro VECTOR_CLIP_INT32 4 | %macro VECTOR_CLIP_INT32 4 | ||||
| cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len | cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len | ||||
| %ifidn %1, sse2 | %ifidn %1, sse2 | ||||
| @@ -24,6 +24,146 @@ | |||||
| SECTION_TEXT | SECTION_TEXT | ||||
| ;--------------------------------------------------------------------------------- | |||||
| ; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len); | |||||
| ;--------------------------------------------------------------------------------- | |||||
| %macro INT32_TO_FLOAT_FMUL_SCALAR 2 | |||||
| %ifdef ARCH_X86_64 | |||||
| cglobal int32_to_float_fmul_scalar_%1, 3,3,%2, dst, src, len | |||||
| %else | |||||
| cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len | |||||
| movss m0, mulm | |||||
| %endif | |||||
| SPLATD m0 | |||||
| shl lenq, 2 | |||||
| add srcq, lenq | |||||
| add dstq, lenq | |||||
| neg lenq | |||||
| .loop: | |||||
| %ifidn %1, sse2 | |||||
| cvtdq2ps m1, [srcq+lenq ] | |||||
| cvtdq2ps m2, [srcq+lenq+16] | |||||
| %else | |||||
| cvtpi2ps m1, [srcq+lenq ] | |||||
| cvtpi2ps m3, [srcq+lenq+ 8] | |||||
| cvtpi2ps m2, [srcq+lenq+16] | |||||
| cvtpi2ps m4, [srcq+lenq+24] | |||||
| movlhps m1, m3 | |||||
| movlhps m2, m4 | |||||
| %endif | |||||
| mulps m1, m0 | |||||
| mulps m2, m0 | |||||
| mova [dstq+lenq ], m1 | |||||
| mova [dstq+lenq+16], m2 | |||||
| add lenq, 32 | |||||
| jl .loop | |||||
| REP_RET | |||||
| %endmacro | |||||
| INIT_XMM | |||||
| %define SPLATD SPLATD_SSE | |||||
| %define movdqa movaps | |||||
| INT32_TO_FLOAT_FMUL_SCALAR sse, 5 | |||||
| %undef movdqa | |||||
| %define SPLATD SPLATD_SSE2 | |||||
| INT32_TO_FLOAT_FMUL_SCALAR sse2, 3 | |||||
| %undef SPLATD | |||||
| ;------------------------------------------------------------------------------ | |||||
| ; void ff_float_to_int16(int16_t *dst, const float *src, long len); | |||||
| ;------------------------------------------------------------------------------ | |||||
| %macro FLOAT_TO_INT16 2 | |||||
| cglobal float_to_int16_%1, 3,3,%2, dst, src, len | |||||
| add lenq, lenq | |||||
| lea srcq, [srcq+2*lenq] | |||||
| add dstq, lenq | |||||
| neg lenq | |||||
| .loop: | |||||
| %ifidn %1, sse2 | |||||
| cvtps2dq m0, [srcq+2*lenq ] | |||||
| cvtps2dq m1, [srcq+2*lenq+16] | |||||
| packssdw m0, m1 | |||||
| mova [dstq+lenq], m0 | |||||
| %else | |||||
| cvtps2pi m0, [srcq+2*lenq ] | |||||
| cvtps2pi m1, [srcq+2*lenq+ 8] | |||||
| cvtps2pi m2, [srcq+2*lenq+16] | |||||
| cvtps2pi m3, [srcq+2*lenq+24] | |||||
| packssdw m0, m1 | |||||
| packssdw m2, m3 | |||||
| mova [dstq+lenq ], m0 | |||||
| mova [dstq+lenq+8], m2 | |||||
| %endif | |||||
| add lenq, 16 | |||||
| js .loop | |||||
| %ifnidn %1, sse2 | |||||
| emms | |||||
| %endif | |||||
| REP_RET | |||||
| %endmacro | |||||
| INIT_XMM | |||||
| FLOAT_TO_INT16 sse2, 2 | |||||
| INIT_MMX | |||||
| FLOAT_TO_INT16 sse, 0 | |||||
| %define cvtps2pi pf2id | |||||
| FLOAT_TO_INT16 3dnow, 0 | |||||
| %undef cvtps2pi | |||||
| ;------------------------------------------------------------------------------- | |||||
| ; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len); | |||||
| ;------------------------------------------------------------------------------- | |||||
| %macro FLOAT_TO_INT16_INTERLEAVE2 1 | |||||
| cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len | |||||
| lea lenq, [4*r2q] | |||||
| mov src1q, [src0q+gprsize] | |||||
| mov src0q, [src0q] | |||||
| add dstq, lenq | |||||
| add src0q, lenq | |||||
| add src1q, lenq | |||||
| neg lenq | |||||
| .loop: | |||||
| %ifidn %1, sse2 | |||||
| cvtps2dq m0, [src0q+lenq] | |||||
| cvtps2dq m1, [src1q+lenq] | |||||
| packssdw m0, m1 | |||||
| movhlps m1, m0 | |||||
| punpcklwd m0, m1 | |||||
| mova [dstq+lenq], m0 | |||||
| %else | |||||
| cvtps2pi m0, [src0q+lenq ] | |||||
| cvtps2pi m1, [src0q+lenq+8] | |||||
| cvtps2pi m2, [src1q+lenq ] | |||||
| cvtps2pi m3, [src1q+lenq+8] | |||||
| packssdw m0, m1 | |||||
| packssdw m2, m3 | |||||
| mova m1, m0 | |||||
| punpcklwd m0, m2 | |||||
| punpckhwd m1, m2 | |||||
| mova [dstq+lenq ], m0 | |||||
| mova [dstq+lenq+8], m1 | |||||
| %endif | |||||
| add lenq, 16 | |||||
| js .loop | |||||
| %ifnidn %1, sse2 | |||||
| emms | |||||
| %endif | |||||
| REP_RET | |||||
| %endmacro | |||||
| INIT_MMX | |||||
| %define cvtps2pi pf2id | |||||
| FLOAT_TO_INT16_INTERLEAVE2 3dnow | |||||
| %undef cvtps2pi | |||||
| %define movdqa movaps | |||||
| FLOAT_TO_INT16_INTERLEAVE2 sse | |||||
| %undef movdqa | |||||
| INIT_XMM | |||||
| FLOAT_TO_INT16_INTERLEAVE2 sse2 | |||||
| %macro PSWAPD_SSE 2 | %macro PSWAPD_SSE 2 | ||||
| pshufw %1, %2, 0x4e | pshufw %1, %2, 0x4e | ||||
| %endmacro | %endmacro | ||||
| @@ -26,133 +26,32 @@ | |||||
| #include "libavutil/x86_cpu.h" | #include "libavutil/x86_cpu.h" | ||||
| #include "libavcodec/fmtconvert.h" | #include "libavcodec/fmtconvert.h" | ||||
| static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len) | |||||
| { | |||||
| x86_reg i = -4*len; | |||||
| __asm__ volatile( | |||||
| "movss %3, %%xmm4 \n" | |||||
| "shufps $0, %%xmm4, %%xmm4 \n" | |||||
| "1: \n" | |||||
| "cvtpi2ps (%2,%0), %%xmm0 \n" | |||||
| "cvtpi2ps 8(%2,%0), %%xmm1 \n" | |||||
| "cvtpi2ps 16(%2,%0), %%xmm2 \n" | |||||
| "cvtpi2ps 24(%2,%0), %%xmm3 \n" | |||||
| "movlhps %%xmm1, %%xmm0 \n" | |||||
| "movlhps %%xmm3, %%xmm2 \n" | |||||
| "mulps %%xmm4, %%xmm0 \n" | |||||
| "mulps %%xmm4, %%xmm2 \n" | |||||
| "movaps %%xmm0, (%1,%0) \n" | |||||
| "movaps %%xmm2, 16(%1,%0) \n" | |||||
| "add $32, %0 \n" | |||||
| "jl 1b \n" | |||||
| :"+r"(i) | |||||
| :"r"(dst+len), "r"(src+len), "m"(mul) | |||||
| ); | |||||
| } | |||||
| static void int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len) | |||||
| { | |||||
| x86_reg i = -4*len; | |||||
| __asm__ volatile( | |||||
| "movss %3, %%xmm4 \n" | |||||
| "shufps $0, %%xmm4, %%xmm4 \n" | |||||
| "1: \n" | |||||
| "cvtdq2ps (%2,%0), %%xmm0 \n" | |||||
| "cvtdq2ps 16(%2,%0), %%xmm1 \n" | |||||
| "mulps %%xmm4, %%xmm0 \n" | |||||
| "mulps %%xmm4, %%xmm1 \n" | |||||
| "movaps %%xmm0, (%1,%0) \n" | |||||
| "movaps %%xmm1, 16(%1,%0) \n" | |||||
| "add $32, %0 \n" | |||||
| "jl 1b \n" | |||||
| :"+r"(i) | |||||
| :"r"(dst+len), "r"(src+len), "m"(mul) | |||||
| ); | |||||
| } | |||||
| #if HAVE_YASM | |||||
| static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){ | |||||
| x86_reg reglen = len; | |||||
| // not bit-exact: pf2id uses different rounding than C and SSE | |||||
| __asm__ volatile( | |||||
| "add %0 , %0 \n\t" | |||||
| "lea (%2,%0,2) , %2 \n\t" | |||||
| "add %0 , %1 \n\t" | |||||
| "neg %0 \n\t" | |||||
| "1: \n\t" | |||||
| "pf2id (%2,%0,2) , %%mm0 \n\t" | |||||
| "pf2id 8(%2,%0,2) , %%mm1 \n\t" | |||||
| "pf2id 16(%2,%0,2) , %%mm2 \n\t" | |||||
| "pf2id 24(%2,%0,2) , %%mm3 \n\t" | |||||
| "packssdw %%mm1 , %%mm0 \n\t" | |||||
| "packssdw %%mm3 , %%mm2 \n\t" | |||||
| "movq %%mm0 , (%1,%0) \n\t" | |||||
| "movq %%mm2 , 8(%1,%0) \n\t" | |||||
| "add $16 , %0 \n\t" | |||||
| " js 1b \n\t" | |||||
| "femms \n\t" | |||||
| :"+r"(reglen), "+r"(dst), "+r"(src) | |||||
| ); | |||||
| } | |||||
| void ff_int32_to_float_fmul_scalar_sse (float *dst, const int *src, float mul, int len); | |||||
| void ff_int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len); | |||||
| static void float_to_int16_sse(int16_t *dst, const float *src, long len){ | |||||
| x86_reg reglen = len; | |||||
| __asm__ volatile( | |||||
| "add %0 , %0 \n\t" | |||||
| "lea (%2,%0,2) , %2 \n\t" | |||||
| "add %0 , %1 \n\t" | |||||
| "neg %0 \n\t" | |||||
| "1: \n\t" | |||||
| "cvtps2pi (%2,%0,2) , %%mm0 \n\t" | |||||
| "cvtps2pi 8(%2,%0,2) , %%mm1 \n\t" | |||||
| "cvtps2pi 16(%2,%0,2) , %%mm2 \n\t" | |||||
| "cvtps2pi 24(%2,%0,2) , %%mm3 \n\t" | |||||
| "packssdw %%mm1 , %%mm0 \n\t" | |||||
| "packssdw %%mm3 , %%mm2 \n\t" | |||||
| "movq %%mm0 , (%1,%0) \n\t" | |||||
| "movq %%mm2 , 8(%1,%0) \n\t" | |||||
| "add $16 , %0 \n\t" | |||||
| " js 1b \n\t" | |||||
| "emms \n\t" | |||||
| :"+r"(reglen), "+r"(dst), "+r"(src) | |||||
| ); | |||||
| } | |||||
| void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len); | |||||
| void ff_float_to_int16_sse (int16_t *dst, const float *src, long len); | |||||
| void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len); | |||||
| static void float_to_int16_sse2(int16_t *dst, const float *src, long len){ | |||||
| x86_reg reglen = len; | |||||
| __asm__ volatile( | |||||
| "add %0 , %0 \n\t" | |||||
| "lea (%2,%0,2) , %2 \n\t" | |||||
| "add %0 , %1 \n\t" | |||||
| "neg %0 \n\t" | |||||
| "1: \n\t" | |||||
| "cvtps2dq (%2,%0,2) , %%xmm0 \n\t" | |||||
| "cvtps2dq 16(%2,%0,2) , %%xmm1 \n\t" | |||||
| "packssdw %%xmm1 , %%xmm0 \n\t" | |||||
| "movdqa %%xmm0 , (%1,%0) \n\t" | |||||
| "add $16 , %0 \n\t" | |||||
| " js 1b \n\t" | |||||
| :"+r"(reglen), "+r"(dst), "+r"(src) | |||||
| ); | |||||
| } | |||||
| void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len); | |||||
| void ff_float_to_int16_interleave2_sse (int16_t *dst, const float **src, long len); | |||||
| void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len); | |||||
| void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); | void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); | ||||
| void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); | void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); | ||||
| void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); | void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); | ||||
| #if !HAVE_YASM | |||||
| #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6) | |||||
| #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6) | |||||
| #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6) | |||||
| #endif | |||||
| #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse | #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse | ||||
| #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \ | |||||
| #define FLOAT_TO_INT16_INTERLEAVE(cpu) \ | |||||
| /* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\ | /* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\ | ||||
| static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\ | static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\ | ||||
| DECLARE_ALIGNED(16, int16_t, tmp)[len];\ | DECLARE_ALIGNED(16, int16_t, tmp)[len];\ | ||||
| int i,j,c;\ | int i,j,c;\ | ||||
| for(c=0; c<channels; c++){\ | for(c=0; c<channels; c++){\ | ||||
| float_to_int16_##cpu(tmp, src[c], len);\ | |||||
| ff_float_to_int16_##cpu(tmp, src[c], len);\ | |||||
| for(i=0, j=c; i<len; i++, j+=channels)\ | for(i=0, j=c; i<len; i++, j+=channels)\ | ||||
| dst[j] = tmp[i];\ | dst[j] = tmp[i];\ | ||||
| }\ | }\ | ||||
| @@ -160,73 +59,18 @@ static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const | |||||
| \ | \ | ||||
| static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\ | static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\ | ||||
| if(channels==1)\ | if(channels==1)\ | ||||
| float_to_int16_##cpu(dst, src[0], len);\ | |||||
| ff_float_to_int16_##cpu(dst, src[0], len);\ | |||||
| else if(channels==2){\ | else if(channels==2){\ | ||||
| x86_reg reglen = len; \ | |||||
| const float *src0 = src[0];\ | |||||
| const float *src1 = src[1];\ | |||||
| __asm__ volatile(\ | |||||
| "shl $2, %0 \n"\ | |||||
| "add %0, %1 \n"\ | |||||
| "add %0, %2 \n"\ | |||||
| "add %0, %3 \n"\ | |||||
| "neg %0 \n"\ | |||||
| body\ | |||||
| :"+r"(reglen), "+r"(dst), "+r"(src0), "+r"(src1)\ | |||||
| );\ | |||||
| ff_float_to_int16_interleave2_##cpu(dst, src, len);\ | |||||
| }else if(channels==6){\ | }else if(channels==6){\ | ||||
| ff_float_to_int16_interleave6_##cpu(dst, src, len);\ | ff_float_to_int16_interleave6_##cpu(dst, src, len);\ | ||||
| }else\ | }else\ | ||||
| float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\ | float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\ | ||||
| } | } | ||||
| FLOAT_TO_INT16_INTERLEAVE(3dnow, | |||||
| "1: \n" | |||||
| "pf2id (%2,%0), %%mm0 \n" | |||||
| "pf2id 8(%2,%0), %%mm1 \n" | |||||
| "pf2id (%3,%0), %%mm2 \n" | |||||
| "pf2id 8(%3,%0), %%mm3 \n" | |||||
| "packssdw %%mm1, %%mm0 \n" | |||||
| "packssdw %%mm3, %%mm2 \n" | |||||
| "movq %%mm0, %%mm1 \n" | |||||
| "punpcklwd %%mm2, %%mm0 \n" | |||||
| "punpckhwd %%mm2, %%mm1 \n" | |||||
| "movq %%mm0, (%1,%0)\n" | |||||
| "movq %%mm1, 8(%1,%0)\n" | |||||
| "add $16, %0 \n" | |||||
| "js 1b \n" | |||||
| "femms \n" | |||||
| ) | |||||
| FLOAT_TO_INT16_INTERLEAVE(sse, | |||||
| "1: \n" | |||||
| "cvtps2pi (%2,%0), %%mm0 \n" | |||||
| "cvtps2pi 8(%2,%0), %%mm1 \n" | |||||
| "cvtps2pi (%3,%0), %%mm2 \n" | |||||
| "cvtps2pi 8(%3,%0), %%mm3 \n" | |||||
| "packssdw %%mm1, %%mm0 \n" | |||||
| "packssdw %%mm3, %%mm2 \n" | |||||
| "movq %%mm0, %%mm1 \n" | |||||
| "punpcklwd %%mm2, %%mm0 \n" | |||||
| "punpckhwd %%mm2, %%mm1 \n" | |||||
| "movq %%mm0, (%1,%0)\n" | |||||
| "movq %%mm1, 8(%1,%0)\n" | |||||
| "add $16, %0 \n" | |||||
| "js 1b \n" | |||||
| "emms \n" | |||||
| ) | |||||
| FLOAT_TO_INT16_INTERLEAVE(sse2, | |||||
| "1: \n" | |||||
| "cvtps2dq (%2,%0), %%xmm0 \n" | |||||
| "cvtps2dq (%3,%0), %%xmm1 \n" | |||||
| "packssdw %%xmm1, %%xmm0 \n" | |||||
| "movhlps %%xmm0, %%xmm1 \n" | |||||
| "punpcklwd %%xmm1, %%xmm0 \n" | |||||
| "movdqa %%xmm0, (%1,%0) \n" | |||||
| "add $16, %0 \n" | |||||
| "js 1b \n" | |||||
| ) | |||||
| FLOAT_TO_INT16_INTERLEAVE(3dnow) | |||||
| FLOAT_TO_INT16_INTERLEAVE(sse) | |||||
| FLOAT_TO_INT16_INTERLEAVE(sse2) | |||||
| static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){ | static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){ | ||||
| if(channels==6) | if(channels==6) | ||||
| @@ -235,7 +79,6 @@ static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long | |||||
| float_to_int16_interleave_3dnow(dst, src, len, channels); | float_to_int16_interleave_3dnow(dst, src, len, channels); | ||||
| } | } | ||||
| #if HAVE_YASM | |||||
| void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len); | void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len); | ||||
| void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len); | void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len); | ||||
| @@ -269,34 +112,32 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx) | |||||
| { | { | ||||
| int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
| if (mm_flags & AV_CPU_FLAG_MMX) { | |||||
| #if HAVE_YASM | #if HAVE_YASM | ||||
| if (mm_flags & AV_CPU_FLAG_MMX) { | |||||
| c->float_interleave = float_interleave_mmx; | c->float_interleave = float_interleave_mmx; | ||||
| #endif | |||||
| if(mm_flags & AV_CPU_FLAG_3DNOW){ | |||||
| if (HAVE_AMD3DNOW && mm_flags & AV_CPU_FLAG_3DNOW) { | |||||
| if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | ||||
| c->float_to_int16 = float_to_int16_3dnow; | |||||
| c->float_to_int16 = ff_float_to_int16_3dnow; | |||||
| c->float_to_int16_interleave = float_to_int16_interleave_3dnow; | c->float_to_int16_interleave = float_to_int16_interleave_3dnow; | ||||
| } | } | ||||
| } | } | ||||
| if(mm_flags & AV_CPU_FLAG_3DNOWEXT){ | |||||
| if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) { | |||||
| if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | ||||
| c->float_to_int16_interleave = float_to_int16_interleave_3dn2; | c->float_to_int16_interleave = float_to_int16_interleave_3dn2; | ||||
| } | } | ||||
| } | } | ||||
| if(mm_flags & AV_CPU_FLAG_SSE){ | |||||
| c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse; | |||||
| c->float_to_int16 = float_to_int16_sse; | |||||
| if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) { | |||||
| c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse; | |||||
| c->float_to_int16 = ff_float_to_int16_sse; | |||||
| c->float_to_int16_interleave = float_to_int16_interleave_sse; | c->float_to_int16_interleave = float_to_int16_interleave_sse; | ||||
| #if HAVE_YASM | |||||
| c->float_interleave = float_interleave_sse; | c->float_interleave = float_interleave_sse; | ||||
| #endif | |||||
| } | } | ||||
| if(mm_flags & AV_CPU_FLAG_SSE2){ | |||||
| c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2; | |||||
| c->float_to_int16 = float_to_int16_sse2; | |||||
| if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE2) { | |||||
| c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2; | |||||
| c->float_to_int16 = ff_float_to_int16_sse2; | |||||
| c->float_to_int16_interleave = float_to_int16_interleave_sse2; | c->float_to_int16_interleave = float_to_int16_interleave_sse2; | ||||
| } | } | ||||
| } | } | ||||
| #endif | |||||
| } | } | ||||
| @@ -28,21 +28,20 @@ SECTION .text | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; biweight pred: | ; biweight pred: | ||||
| ; | ; | ||||
| ; void h264_biweight_16x16_sse2(uint8_t *dst, uint8_t *src, int stride, | |||||
| ; int log2_denom, int weightd, int weights, | |||||
| ; int offset); | |||||
| ; void h264_biweight_16_sse2(uint8_t *dst, uint8_t *src, int stride, | |||||
| ; int height, int log2_denom, int weightd, | |||||
| ; int weights, int offset); | |||||
| ; and | ; and | ||||
| ; void h264_weight_16x16_sse2(uint8_t *dst, int stride, | |||||
| ; int log2_denom, int weight, | |||||
| ; int offset); | |||||
| ; void h264_weight_16_sse2(uint8_t *dst, int stride, int height, | |||||
| ; int log2_denom, int weight, int offset); | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| %macro WEIGHT_SETUP 0 | %macro WEIGHT_SETUP 0 | ||||
| add r4, r4 | |||||
| inc r4 | |||||
| movd m3, r3d | |||||
| movd m5, r4d | |||||
| movd m6, r2d | |||||
| add r5, r5 | |||||
| inc r5 | |||||
| movd m3, r4d | |||||
| movd m5, r5d | |||||
| movd m6, r3d | |||||
| pslld m5, m6 | pslld m5, m6 | ||||
| psrld m5, 1 | psrld m5, 1 | ||||
| %if mmsize == 16 | %if mmsize == 16 | ||||
| @@ -71,60 +70,41 @@ SECTION .text | |||||
| packuswb m0, m1 | packuswb m0, m1 | ||||
| %endmacro | %endmacro | ||||
| %macro WEIGHT_FUNC_DBL_MM 1 | |||||
| cglobal h264_weight_16x%1_mmx2, 5, 5, 0 | |||||
| INIT_MMX | |||||
| cglobal h264_weight_16_mmx2, 6, 6, 0 | |||||
| WEIGHT_SETUP | WEIGHT_SETUP | ||||
| mov r2, %1 | |||||
| %if %1 == 16 | |||||
| .nextrow | .nextrow | ||||
| WEIGHT_OP 0, 4 | WEIGHT_OP 0, 4 | ||||
| mova [r0 ], m0 | mova [r0 ], m0 | ||||
| WEIGHT_OP 8, 12 | WEIGHT_OP 8, 12 | ||||
| mova [r0+8], m0 | mova [r0+8], m0 | ||||
| add r0, r1 | add r0, r1 | ||||
| dec r2 | |||||
| dec r2d | |||||
| jnz .nextrow | jnz .nextrow | ||||
| REP_RET | REP_RET | ||||
| %else | |||||
| jmp mangle(ff_h264_weight_16x16_mmx2.nextrow) | |||||
| %endif | |||||
| %endmacro | |||||
| INIT_MMX | |||||
| WEIGHT_FUNC_DBL_MM 16 | |||||
| WEIGHT_FUNC_DBL_MM 8 | |||||
| %macro WEIGHT_FUNC_MM 4 | |||||
| cglobal h264_weight_%1x%2_%4, 7, 7, %3 | |||||
| %macro WEIGHT_FUNC_MM 3 | |||||
| cglobal h264_weight_%1_%3, 6, 6, %2 | |||||
| WEIGHT_SETUP | WEIGHT_SETUP | ||||
| mov r2, %2 | |||||
| %if %2 == 16 | |||||
| .nextrow | .nextrow | ||||
| WEIGHT_OP 0, mmsize/2 | WEIGHT_OP 0, mmsize/2 | ||||
| mova [r0], m0 | mova [r0], m0 | ||||
| add r0, r1 | add r0, r1 | ||||
| dec r2 | |||||
| dec r2d | |||||
| jnz .nextrow | jnz .nextrow | ||||
| REP_RET | REP_RET | ||||
| %else | |||||
| jmp mangle(ff_h264_weight_%1x16_%4.nextrow) | |||||
| %endif | |||||
| %endmacro | %endmacro | ||||
| INIT_MMX | INIT_MMX | ||||
| WEIGHT_FUNC_MM 8, 16, 0, mmx2 | |||||
| WEIGHT_FUNC_MM 8, 8, 0, mmx2 | |||||
| WEIGHT_FUNC_MM 8, 4, 0, mmx2 | |||||
| WEIGHT_FUNC_MM 8, 0, mmx2 | |||||
| INIT_XMM | INIT_XMM | ||||
| WEIGHT_FUNC_MM 16, 16, 8, sse2 | |||||
| WEIGHT_FUNC_MM 16, 8, 8, sse2 | |||||
| WEIGHT_FUNC_MM 16, 8, sse2 | |||||
| %macro WEIGHT_FUNC_HALF_MM 5 | |||||
| cglobal h264_weight_%1x%2_%5, 5, 5, %4 | |||||
| %macro WEIGHT_FUNC_HALF_MM 3 | |||||
| cglobal h264_weight_%1_%3, 6, 6, %2 | |||||
| WEIGHT_SETUP | WEIGHT_SETUP | ||||
| mov r2, %2/2 | |||||
| sar r2d, 1 | |||||
| lea r3, [r1*2] | lea r3, [r1*2] | ||||
| %if %2 == mmsize | |||||
| .nextrow | .nextrow | ||||
| WEIGHT_OP 0, r1 | WEIGHT_OP 0, r1 | ||||
| movh [r0], m0 | movh [r0], m0 | ||||
| @@ -135,31 +115,34 @@ cglobal h264_weight_%1x%2_%5, 5, 5, %4 | |||||
| movh [r0+r1], m0 | movh [r0+r1], m0 | ||||
| %endif | %endif | ||||
| add r0, r3 | add r0, r3 | ||||
| dec r2 | |||||
| dec r2d | |||||
| jnz .nextrow | jnz .nextrow | ||||
| REP_RET | REP_RET | ||||
| %else | |||||
| jmp mangle(ff_h264_weight_%1x%3_%5.nextrow) | |||||
| %endif | |||||
| %endmacro | %endmacro | ||||
| INIT_MMX | INIT_MMX | ||||
| WEIGHT_FUNC_HALF_MM 4, 8, 8, 0, mmx2 | |||||
| WEIGHT_FUNC_HALF_MM 4, 4, 8, 0, mmx2 | |||||
| WEIGHT_FUNC_HALF_MM 4, 2, 8, 0, mmx2 | |||||
| WEIGHT_FUNC_HALF_MM 4, 0, mmx2 | |||||
| WEIGHT_FUNC_HALF_MM 4, 0, mmx2 | |||||
| WEIGHT_FUNC_HALF_MM 4, 0, mmx2 | |||||
| INIT_XMM | INIT_XMM | ||||
| WEIGHT_FUNC_HALF_MM 8, 16, 16, 8, sse2 | |||||
| WEIGHT_FUNC_HALF_MM 8, 8, 16, 8, sse2 | |||||
| WEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2 | |||||
| WEIGHT_FUNC_HALF_MM 8, 8, sse2 | |||||
| WEIGHT_FUNC_HALF_MM 8, 8, sse2 | |||||
| WEIGHT_FUNC_HALF_MM 8, 8, sse2 | |||||
| %macro BIWEIGHT_SETUP 0 | %macro BIWEIGHT_SETUP 0 | ||||
| add r6, 1 | |||||
| or r6, 1 | |||||
| add r3, 1 | |||||
| movd m3, r4d | |||||
| movd m4, r5d | |||||
| movd m5, r6d | |||||
| movd m6, r3d | |||||
| %ifdef ARCH_X86_64 | |||||
| %define off_regd r11d | |||||
| %else | |||||
| %define off_regd r3d | |||||
| %endif | |||||
| mov off_regd, r7m | |||||
| add off_regd, 1 | |||||
| or off_regd, 1 | |||||
| add r4, 1 | |||||
| movd m3, r5d | |||||
| movd m4, r6d | |||||
| movd m5, off_regd | |||||
| movd m6, r4d | |||||
| pslld m5, m6 | pslld m5, m6 | ||||
| psrld m5, 1 | psrld m5, 1 | ||||
| %if mmsize == 16 | %if mmsize == 16 | ||||
| @@ -195,11 +178,10 @@ WEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2 | |||||
| packuswb m0, m1 | packuswb m0, m1 | ||||
| %endmacro | %endmacro | ||||
| %macro BIWEIGHT_FUNC_DBL_MM 1 | |||||
| cglobal h264_biweight_16x%1_mmx2, 7, 7, 0 | |||||
| INIT_MMX | |||||
| cglobal h264_biweight_16_mmx2, 7, 7, 0 | |||||
| BIWEIGHT_SETUP | BIWEIGHT_SETUP | ||||
| mov r3, %1 | |||||
| %if %1 == 16 | |||||
| movifnidn r3d, r3m | |||||
| .nextrow | .nextrow | ||||
| BIWEIGHT_STEPA 0, 1, 0 | BIWEIGHT_STEPA 0, 1, 0 | ||||
| BIWEIGHT_STEPA 1, 2, 4 | BIWEIGHT_STEPA 1, 2, 4 | ||||
| @@ -211,23 +193,14 @@ cglobal h264_biweight_16x%1_mmx2, 7, 7, 0 | |||||
| mova [r0+8], m0 | mova [r0+8], m0 | ||||
| add r0, r2 | add r0, r2 | ||||
| add r1, r2 | add r1, r2 | ||||
| dec r3 | |||||
| dec r3d | |||||
| jnz .nextrow | jnz .nextrow | ||||
| REP_RET | REP_RET | ||||
| %else | |||||
| jmp mangle(ff_h264_biweight_16x16_mmx2.nextrow) | |||||
| %endif | |||||
| %endmacro | |||||
| INIT_MMX | |||||
| BIWEIGHT_FUNC_DBL_MM 16 | |||||
| BIWEIGHT_FUNC_DBL_MM 8 | |||||
| %macro BIWEIGHT_FUNC_MM 4 | |||||
| cglobal h264_biweight_%1x%2_%4, 7, 7, %3 | |||||
| %macro BIWEIGHT_FUNC_MM 3 | |||||
| cglobal h264_biweight_%1_%3, 7, 7, %2 | |||||
| BIWEIGHT_SETUP | BIWEIGHT_SETUP | ||||
| mov r3, %2 | |||||
| %if %2 == 16 | |||||
| movifnidn r3d, r3m | |||||
| .nextrow | .nextrow | ||||
| BIWEIGHT_STEPA 0, 1, 0 | BIWEIGHT_STEPA 0, 1, 0 | ||||
| BIWEIGHT_STEPA 1, 2, mmsize/2 | BIWEIGHT_STEPA 1, 2, mmsize/2 | ||||
| @@ -235,28 +208,22 @@ cglobal h264_biweight_%1x%2_%4, 7, 7, %3 | |||||
| mova [r0], m0 | mova [r0], m0 | ||||
| add r0, r2 | add r0, r2 | ||||
| add r1, r2 | add r1, r2 | ||||
| dec r3 | |||||
| dec r3d | |||||
| jnz .nextrow | jnz .nextrow | ||||
| REP_RET | REP_RET | ||||
| %else | |||||
| jmp mangle(ff_h264_biweight_%1x16_%4.nextrow) | |||||
| %endif | |||||
| %endmacro | %endmacro | ||||
| INIT_MMX | INIT_MMX | ||||
| BIWEIGHT_FUNC_MM 8, 16, 0, mmx2 | |||||
| BIWEIGHT_FUNC_MM 8, 8, 0, mmx2 | |||||
| BIWEIGHT_FUNC_MM 8, 4, 0, mmx2 | |||||
| BIWEIGHT_FUNC_MM 8, 0, mmx2 | |||||
| INIT_XMM | INIT_XMM | ||||
| BIWEIGHT_FUNC_MM 16, 16, 8, sse2 | |||||
| BIWEIGHT_FUNC_MM 16, 8, 8, sse2 | |||||
| BIWEIGHT_FUNC_MM 16, 8, sse2 | |||||
| %macro BIWEIGHT_FUNC_HALF_MM 5 | |||||
| cglobal h264_biweight_%1x%2_%5, 7, 7, %4 | |||||
| %macro BIWEIGHT_FUNC_HALF_MM 3 | |||||
| cglobal h264_biweight_%1_%3, 7, 7, %2 | |||||
| BIWEIGHT_SETUP | BIWEIGHT_SETUP | ||||
| mov r3, %2/2 | |||||
| movifnidn r3d, r3m | |||||
| sar r3, 1 | |||||
| lea r4, [r2*2] | lea r4, [r2*2] | ||||
| %if %2 == mmsize | |||||
| .nextrow | .nextrow | ||||
| BIWEIGHT_STEPA 0, 1, 0 | BIWEIGHT_STEPA 0, 1, 0 | ||||
| BIWEIGHT_STEPA 1, 2, r2 | BIWEIGHT_STEPA 1, 2, r2 | ||||
| @@ -270,31 +237,30 @@ cglobal h264_biweight_%1x%2_%5, 7, 7, %4 | |||||
| %endif | %endif | ||||
| add r0, r4 | add r0, r4 | ||||
| add r1, r4 | add r1, r4 | ||||
| dec r3 | |||||
| dec r3d | |||||
| jnz .nextrow | jnz .nextrow | ||||
| REP_RET | REP_RET | ||||
| %else | |||||
| jmp mangle(ff_h264_biweight_%1x%3_%5.nextrow) | |||||
| %endif | |||||
| %endmacro | %endmacro | ||||
| INIT_MMX | INIT_MMX | ||||
| BIWEIGHT_FUNC_HALF_MM 4, 8, 8, 0, mmx2 | |||||
| BIWEIGHT_FUNC_HALF_MM 4, 4, 8, 0, mmx2 | |||||
| BIWEIGHT_FUNC_HALF_MM 4, 2, 8, 0, mmx2 | |||||
| BIWEIGHT_FUNC_HALF_MM 4, 0, mmx2 | |||||
| INIT_XMM | INIT_XMM | ||||
| BIWEIGHT_FUNC_HALF_MM 8, 16, 16, 8, sse2 | |||||
| BIWEIGHT_FUNC_HALF_MM 8, 8, 16, 8, sse2 | |||||
| BIWEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2 | |||||
| BIWEIGHT_FUNC_HALF_MM 8, 8, sse2 | |||||
| %macro BIWEIGHT_SSSE3_SETUP 0 | %macro BIWEIGHT_SSSE3_SETUP 0 | ||||
| add r6, 1 | |||||
| or r6, 1 | |||||
| add r3, 1 | |||||
| movd m4, r4d | |||||
| movd m0, r5d | |||||
| movd m5, r6d | |||||
| movd m6, r3d | |||||
| %ifdef ARCH_X86_64 | |||||
| %define off_regd r11d | |||||
| %else | |||||
| %define off_regd r3d | |||||
| %endif | |||||
| mov off_regd, r7m | |||||
| add off_regd, 1 | |||||
| or off_regd, 1 | |||||
| add r4, 1 | |||||
| movd m4, r5d | |||||
| movd m0, r6d | |||||
| movd m5, off_regd | |||||
| movd m6, r4d | |||||
| pslld m5, m6 | pslld m5, m6 | ||||
| psrld m5, 1 | psrld m5, 1 | ||||
| punpcklbw m4, m0 | punpcklbw m4, m0 | ||||
| @@ -314,12 +280,11 @@ BIWEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2 | |||||
| packuswb m0, m2 | packuswb m0, m2 | ||||
| %endmacro | %endmacro | ||||
| %macro BIWEIGHT_SSSE3_16 1 | |||||
| cglobal h264_biweight_16x%1_ssse3, 7, 7, 8 | |||||
| INIT_XMM | |||||
| cglobal h264_biweight_16_ssse3, 7, 7, 8 | |||||
| BIWEIGHT_SSSE3_SETUP | BIWEIGHT_SSSE3_SETUP | ||||
| mov r3, %1 | |||||
| movifnidn r3d, r3m | |||||
| %if %1 == 16 | |||||
| .nextrow | .nextrow | ||||
| movh m0, [r0] | movh m0, [r0] | ||||
| movh m2, [r0+8] | movh m2, [r0+8] | ||||
| @@ -330,25 +295,17 @@ cglobal h264_biweight_16x%1_ssse3, 7, 7, 8 | |||||
| mova [r0], m0 | mova [r0], m0 | ||||
| add r0, r2 | add r0, r2 | ||||
| add r1, r2 | add r1, r2 | ||||
| dec r3 | |||||
| dec r3d | |||||
| jnz .nextrow | jnz .nextrow | ||||
| REP_RET | REP_RET | ||||
| %else | |||||
| jmp mangle(ff_h264_biweight_16x16_ssse3.nextrow) | |||||
| %endif | |||||
| %endmacro | |||||
| INIT_XMM | INIT_XMM | ||||
| BIWEIGHT_SSSE3_16 16 | |||||
| BIWEIGHT_SSSE3_16 8 | |||||
| %macro BIWEIGHT_SSSE3_8 1 | |||||
| cglobal h264_biweight_8x%1_ssse3, 7, 7, 8 | |||||
| cglobal h264_biweight_8_ssse3, 7, 7, 8 | |||||
| BIWEIGHT_SSSE3_SETUP | BIWEIGHT_SSSE3_SETUP | ||||
| mov r3, %1/2 | |||||
| movifnidn r3d, r3m | |||||
| sar r3, 1 | |||||
| lea r4, [r2*2] | lea r4, [r2*2] | ||||
| %if %1 == 16 | |||||
| .nextrow | .nextrow | ||||
| movh m0, [r0] | movh m0, [r0] | ||||
| movh m1, [r1] | movh m1, [r1] | ||||
| @@ -361,15 +318,6 @@ cglobal h264_biweight_8x%1_ssse3, 7, 7, 8 | |||||
| movhps [r0+r2], m0 | movhps [r0+r2], m0 | ||||
| add r0, r4 | add r0, r4 | ||||
| add r1, r4 | add r1, r4 | ||||
| dec r3 | |||||
| dec r3d | |||||
| jnz .nextrow | jnz .nextrow | ||||
| REP_RET | REP_RET | ||||
| %else | |||||
| jmp mangle(ff_h264_biweight_8x16_ssse3.nextrow) | |||||
| %endif | |||||
| %endmacro | |||||
| INIT_XMM | |||||
| BIWEIGHT_SSSE3_8 16 | |||||
| BIWEIGHT_SSSE3_8 8 | |||||
| BIWEIGHT_SSSE3_8 4 | |||||
| @@ -36,33 +36,26 @@ cextern pw_1 | |||||
| SECTION .text | SECTION .text | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void h264_weight(uint8_t *dst, int stride, int log2_denom, | |||||
| ; void h264_weight(uint8_t *dst, int stride, int height, int log2_denom, | |||||
| ; int weight, int offset); | ; int weight, int offset); | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| %ifdef ARCH_X86_32 | |||||
| DECLARE_REG_TMP 2 | |||||
| %else | |||||
| DECLARE_REG_TMP 10 | |||||
| %endif | |||||
| %macro WEIGHT_PROLOGUE 1 | |||||
| mov t0, %1 | |||||
| %macro WEIGHT_PROLOGUE 0 | |||||
| .prologue | .prologue | ||||
| PROLOGUE 0,5,8 | |||||
| PROLOGUE 0,6,8 | |||||
| movifnidn r0, r0mp | movifnidn r0, r0mp | ||||
| movifnidn r1d, r1m | movifnidn r1d, r1m | ||||
| movifnidn r3d, r3m | |||||
| movifnidn r4d, r4m | movifnidn r4d, r4m | ||||
| movifnidn r5d, r5m | |||||
| %endmacro | %endmacro | ||||
| %macro WEIGHT_SETUP 1 | %macro WEIGHT_SETUP 1 | ||||
| mova m0, [pw_1] | mova m0, [pw_1] | ||||
| movd m2, r2m | |||||
| movd m2, r3m | |||||
| pslld m0, m2 ; 1<<log2_denom | pslld m0, m2 ; 1<<log2_denom | ||||
| SPLATW m0, m0 | SPLATW m0, m0 | ||||
| shl r4, 19 ; *8, move to upper half of dword | |||||
| lea r4, [r4+r3*2+0x10000] | |||||
| movd m3, r4d ; weight<<1 | 1+(offset<<(3)) | |||||
| shl r5, 19 ; *8, move to upper half of dword | |||||
| lea r5, [r5+r4*2+0x10000] | |||||
| movd m3, r5d ; weight<<1 | 1+(offset<<(3)) | |||||
| pshufd m3, m3, 0 | pshufd m3, m3, 0 | ||||
| mova m4, [pw_pixel_max] | mova m4, [pw_pixel_max] | ||||
| paddw m2, [sq_1] ; log2_denom+1 | paddw m2, [sq_1] ; log2_denom+1 | ||||
| @@ -96,8 +89,8 @@ DECLARE_REG_TMP 10 | |||||
| %endmacro | %endmacro | ||||
| %macro WEIGHT_FUNC_DBL 1 | %macro WEIGHT_FUNC_DBL 1 | ||||
| cglobal h264_weight_16x16_10_%1 | |||||
| WEIGHT_PROLOGUE 16 | |||||
| cglobal h264_weight_16_10_%1 | |||||
| WEIGHT_PROLOGUE | |||||
| WEIGHT_SETUP %1 | WEIGHT_SETUP %1 | ||||
| .nextrow | .nextrow | ||||
| WEIGHT_OP %1, 0 | WEIGHT_OP %1, 0 | ||||
| @@ -105,13 +98,9 @@ cglobal h264_weight_16x16_10_%1 | |||||
| WEIGHT_OP %1, 16 | WEIGHT_OP %1, 16 | ||||
| mova [r0+16], m5 | mova [r0+16], m5 | ||||
| add r0, r1 | add r0, r1 | ||||
| dec t0 | |||||
| dec r2d | |||||
| jnz .nextrow | jnz .nextrow | ||||
| REP_RET | REP_RET | ||||
| cglobal h264_weight_16x8_10_%1 | |||||
| mov t0, 8 | |||||
| jmp mangle(ff_h264_weight_16x16_10_%1.prologue) | |||||
| %endmacro | %endmacro | ||||
| INIT_XMM | INIT_XMM | ||||
| @@ -120,24 +109,16 @@ WEIGHT_FUNC_DBL sse4 | |||||
| %macro WEIGHT_FUNC_MM 1 | %macro WEIGHT_FUNC_MM 1 | ||||
| cglobal h264_weight_8x16_10_%1 | |||||
| WEIGHT_PROLOGUE 16 | |||||
| cglobal h264_weight_8_10_%1 | |||||
| WEIGHT_PROLOGUE | |||||
| WEIGHT_SETUP %1 | WEIGHT_SETUP %1 | ||||
| .nextrow | .nextrow | ||||
| WEIGHT_OP %1, 0 | WEIGHT_OP %1, 0 | ||||
| mova [r0], m5 | mova [r0], m5 | ||||
| add r0, r1 | add r0, r1 | ||||
| dec t0 | |||||
| dec r2d | |||||
| jnz .nextrow | jnz .nextrow | ||||
| REP_RET | REP_RET | ||||
| cglobal h264_weight_8x8_10_%1 | |||||
| mov t0, 8 | |||||
| jmp mangle(ff_h264_weight_8x16_10_%1.prologue) | |||||
| cglobal h264_weight_8x4_10_%1 | |||||
| mov t0, 4 | |||||
| jmp mangle(ff_h264_weight_8x16_10_%1.prologue) | |||||
| %endmacro | %endmacro | ||||
| INIT_XMM | INIT_XMM | ||||
| @@ -146,8 +127,9 @@ WEIGHT_FUNC_MM sse4 | |||||
| %macro WEIGHT_FUNC_HALF_MM 1 | %macro WEIGHT_FUNC_HALF_MM 1 | ||||
| cglobal h264_weight_4x8_10_%1 | |||||
| WEIGHT_PROLOGUE 4 | |||||
| cglobal h264_weight_4_10_%1 | |||||
| WEIGHT_PROLOGUE | |||||
| sar r2d, 1 | |||||
| WEIGHT_SETUP %1 | WEIGHT_SETUP %1 | ||||
| lea r3, [r1*2] | lea r3, [r1*2] | ||||
| .nextrow | .nextrow | ||||
| @@ -155,17 +137,9 @@ cglobal h264_weight_4x8_10_%1 | |||||
| movh [r0], m5 | movh [r0], m5 | ||||
| movhps [r0+r1], m5 | movhps [r0+r1], m5 | ||||
| add r0, r3 | add r0, r3 | ||||
| dec t0 | |||||
| dec r2d | |||||
| jnz .nextrow | jnz .nextrow | ||||
| REP_RET | REP_RET | ||||
| cglobal h264_weight_4x4_10_%1 | |||||
| mov t0, 2 | |||||
| jmp mangle(ff_h264_weight_4x8_10_%1.prologue) | |||||
| cglobal h264_weight_4x2_10_%1 | |||||
| mov t0, 1 | |||||
| jmp mangle(ff_h264_weight_4x8_10_%1.prologue) | |||||
| %endmacro | %endmacro | ||||
| INIT_XMM | INIT_XMM | ||||
| @@ -174,40 +148,40 @@ WEIGHT_FUNC_HALF_MM sse4 | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int log2_denom, | |||||
| ; int weightd, int weights, int offset); | |||||
| ; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int height, | |||||
| ; int log2_denom, int weightd, int weights, int offset); | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| %ifdef ARCH_X86_32 | %ifdef ARCH_X86_32 | ||||
| DECLARE_REG_TMP 2,3 | |||||
| DECLARE_REG_TMP 3 | |||||
| %else | %else | ||||
| DECLARE_REG_TMP 10,2 | |||||
| DECLARE_REG_TMP 10 | |||||
| %endif | %endif | ||||
| %macro BIWEIGHT_PROLOGUE 1 | |||||
| mov t0, %1 | |||||
| %macro BIWEIGHT_PROLOGUE 0 | |||||
| .prologue | .prologue | ||||
| PROLOGUE 0,7,8 | PROLOGUE 0,7,8 | ||||
| movifnidn r0, r0mp | movifnidn r0, r0mp | ||||
| movifnidn r1, r1mp | movifnidn r1, r1mp | ||||
| movifnidn t1d, r2m | |||||
| movifnidn r4d, r4m | |||||
| movifnidn r2d, r2m | |||||
| movifnidn r5d, r5m | movifnidn r5d, r5m | ||||
| movifnidn r6d, r6m | movifnidn r6d, r6m | ||||
| movifnidn t0d, r7m | |||||
| %endmacro | %endmacro | ||||
| %macro BIWEIGHT_SETUP 1 | %macro BIWEIGHT_SETUP 1 | ||||
| lea r6, [r6*4+1] ; (offset<<2)+1 | |||||
| or r6, 1 | |||||
| shl r5, 16 | |||||
| or r4, r5 | |||||
| movd m4, r4d ; weightd | weights | |||||
| movd m5, r6d ; (offset+1)|1 | |||||
| movd m6, r3m ; log2_denom | |||||
| lea t0, [t0*4+1] ; (offset<<2)+1 | |||||
| or t0, 1 | |||||
| shl r6, 16 | |||||
| or r5, r6 | |||||
| movd m4, r5d ; weightd | weights | |||||
| movd m5, t0d ; (offset+1)|1 | |||||
| movd m6, r4m ; log2_denom | |||||
| pslld m5, m6 ; (((offset<<2)+1)|1)<<log2_denom | pslld m5, m6 ; (((offset<<2)+1)|1)<<log2_denom | ||||
| paddd m6, [sq_1] | paddd m6, [sq_1] | ||||
| pshufd m4, m4, 0 | pshufd m4, m4, 0 | ||||
| pshufd m5, m5, 0 | pshufd m5, m5, 0 | ||||
| mova m3, [pw_pixel_max] | mova m3, [pw_pixel_max] | ||||
| movifnidn r3d, r3m | |||||
| %ifnidn %1, sse4 | %ifnidn %1, sse4 | ||||
| pxor m7, m7 | pxor m7, m7 | ||||
| %endif | %endif | ||||
| @@ -243,23 +217,19 @@ DECLARE_REG_TMP 10,2 | |||||
| %endmacro | %endmacro | ||||
| %macro BIWEIGHT_FUNC_DBL 1 | %macro BIWEIGHT_FUNC_DBL 1 | ||||
| cglobal h264_biweight_16x16_10_%1 | |||||
| BIWEIGHT_PROLOGUE 16 | |||||
| cglobal h264_biweight_16_10_%1 | |||||
| BIWEIGHT_PROLOGUE | |||||
| BIWEIGHT_SETUP %1 | BIWEIGHT_SETUP %1 | ||||
| .nextrow | .nextrow | ||||
| BIWEIGHT %1, 0 | BIWEIGHT %1, 0 | ||||
| mova [r0 ], m0 | mova [r0 ], m0 | ||||
| BIWEIGHT %1, 16 | BIWEIGHT %1, 16 | ||||
| mova [r0+16], m0 | mova [r0+16], m0 | ||||
| add r0, t1 | |||||
| add r1, t1 | |||||
| dec t0 | |||||
| add r0, r2 | |||||
| add r1, r2 | |||||
| dec r3d | |||||
| jnz .nextrow | jnz .nextrow | ||||
| REP_RET | REP_RET | ||||
| cglobal h264_biweight_16x8_10_%1 | |||||
| mov t0, 8 | |||||
| jmp mangle(ff_h264_biweight_16x16_10_%1.prologue) | |||||
| %endmacro | %endmacro | ||||
| INIT_XMM | INIT_XMM | ||||
| @@ -267,25 +237,17 @@ BIWEIGHT_FUNC_DBL sse2 | |||||
| BIWEIGHT_FUNC_DBL sse4 | BIWEIGHT_FUNC_DBL sse4 | ||||
| %macro BIWEIGHT_FUNC 1 | %macro BIWEIGHT_FUNC 1 | ||||
| cglobal h264_biweight_8x16_10_%1 | |||||
| BIWEIGHT_PROLOGUE 16 | |||||
| cglobal h264_biweight_8_10_%1 | |||||
| BIWEIGHT_PROLOGUE | |||||
| BIWEIGHT_SETUP %1 | BIWEIGHT_SETUP %1 | ||||
| .nextrow | .nextrow | ||||
| BIWEIGHT %1, 0 | BIWEIGHT %1, 0 | ||||
| mova [r0], m0 | mova [r0], m0 | ||||
| add r0, t1 | |||||
| add r1, t1 | |||||
| dec t0 | |||||
| add r0, r2 | |||||
| add r1, r2 | |||||
| dec r3d | |||||
| jnz .nextrow | jnz .nextrow | ||||
| REP_RET | REP_RET | ||||
| cglobal h264_biweight_8x8_10_%1 | |||||
| mov t0, 8 | |||||
| jmp mangle(ff_h264_biweight_8x16_10_%1.prologue) | |||||
| cglobal h264_biweight_8x4_10_%1 | |||||
| mov t0, 4 | |||||
| jmp mangle(ff_h264_biweight_8x16_10_%1.prologue) | |||||
| %endmacro | %endmacro | ||||
| INIT_XMM | INIT_XMM | ||||
| @@ -293,27 +255,20 @@ BIWEIGHT_FUNC sse2 | |||||
| BIWEIGHT_FUNC sse4 | BIWEIGHT_FUNC sse4 | ||||
| %macro BIWEIGHT_FUNC_HALF 1 | %macro BIWEIGHT_FUNC_HALF 1 | ||||
| cglobal h264_biweight_4x8_10_%1 | |||||
| BIWEIGHT_PROLOGUE 4 | |||||
| cglobal h264_biweight_4_10_%1 | |||||
| BIWEIGHT_PROLOGUE | |||||
| BIWEIGHT_SETUP %1 | BIWEIGHT_SETUP %1 | ||||
| lea r4, [t1*2] | |||||
| sar r3d, 1 | |||||
| lea r4, [r2*2] | |||||
| .nextrow | .nextrow | ||||
| BIWEIGHT %1, 0, t1 | |||||
| BIWEIGHT %1, 0, r2 | |||||
| movh [r0 ], m0 | movh [r0 ], m0 | ||||
| movhps [r0+t1], m0 | |||||
| movhps [r0+r2], m0 | |||||
| add r0, r4 | add r0, r4 | ||||
| add r1, r4 | add r1, r4 | ||||
| dec t0 | |||||
| dec r3d | |||||
| jnz .nextrow | jnz .nextrow | ||||
| REP_RET | REP_RET | ||||
| cglobal h264_biweight_4x4_10_%1 | |||||
| mov t0, 2 | |||||
| jmp mangle(ff_h264_biweight_4x8_10_%1.prologue) | |||||
| cglobal h264_biweight_4x2_10_%1 | |||||
| mov t0, 1 | |||||
| jmp mangle(ff_h264_biweight_4x8_10_%1.prologue) | |||||
| %endmacro | %endmacro | ||||
| INIT_XMM | INIT_XMM | ||||
| @@ -298,63 +298,53 @@ LF_IFUNC(v, luma_intra, 10, mmxext) | |||||
| /***********************************/ | /***********************************/ | ||||
| /* weighted prediction */ | /* weighted prediction */ | ||||
| #define H264_WEIGHT(W, H, OPT) \ | |||||
| void ff_h264_weight_ ## W ## x ## H ## _ ## OPT(uint8_t *dst, \ | |||||
| int stride, int log2_denom, int weight, int offset); | |||||
| #define H264_WEIGHT(W, OPT) \ | |||||
| void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, \ | |||||
| int stride, int height, int log2_denom, int weight, int offset); | |||||
| #define H264_BIWEIGHT(W, H, OPT) \ | |||||
| void ff_h264_biweight_ ## W ## x ## H ## _ ## OPT(uint8_t *dst, \ | |||||
| uint8_t *src, int stride, int log2_denom, int weightd, \ | |||||
| #define H264_BIWEIGHT(W, OPT) \ | |||||
| void ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, \ | |||||
| uint8_t *src, int stride, int height, int log2_denom, int weightd, \ | |||||
| int weights, int offset); | int weights, int offset); | ||||
| #define H264_BIWEIGHT_MMX(W,H) \ | |||||
| H264_WEIGHT (W, H, mmx2) \ | |||||
| H264_BIWEIGHT(W, H, mmx2) | |||||
| #define H264_BIWEIGHT_MMX_SSE(W,H) \ | |||||
| H264_BIWEIGHT_MMX(W, H) \ | |||||
| H264_WEIGHT (W, H, sse2) \ | |||||
| H264_BIWEIGHT (W, H, sse2) \ | |||||
| H264_BIWEIGHT (W, H, ssse3) | |||||
| H264_BIWEIGHT_MMX_SSE(16, 16) | |||||
| H264_BIWEIGHT_MMX_SSE(16, 8) | |||||
| H264_BIWEIGHT_MMX_SSE( 8, 16) | |||||
| H264_BIWEIGHT_MMX_SSE( 8, 8) | |||||
| H264_BIWEIGHT_MMX_SSE( 8, 4) | |||||
| H264_BIWEIGHT_MMX ( 4, 8) | |||||
| H264_BIWEIGHT_MMX ( 4, 4) | |||||
| H264_BIWEIGHT_MMX ( 4, 2) | |||||
| #define H264_WEIGHT_10(W, H, DEPTH, OPT) \ | |||||
| void ff_h264_weight_ ## W ## x ## H ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \ | |||||
| int stride, int log2_denom, int weight, int offset); | |||||
| #define H264_BIWEIGHT_10(W, H, DEPTH, OPT) \ | |||||
| void ff_h264_biweight_ ## W ## x ## H ## _ ## DEPTH ## _ ## OPT \ | |||||
| (uint8_t *dst, uint8_t *src, int stride, int log2_denom, \ | |||||
| #define H264_BIWEIGHT_MMX(W) \ | |||||
| H264_WEIGHT (W, mmx2) \ | |||||
| H264_BIWEIGHT(W, mmx2) | |||||
| #define H264_BIWEIGHT_MMX_SSE(W) \ | |||||
| H264_BIWEIGHT_MMX(W) \ | |||||
| H264_WEIGHT (W, sse2) \ | |||||
| H264_BIWEIGHT (W, sse2) \ | |||||
| H264_BIWEIGHT (W, ssse3) | |||||
| H264_BIWEIGHT_MMX_SSE(16) | |||||
| H264_BIWEIGHT_MMX_SSE( 8) | |||||
| H264_BIWEIGHT_MMX ( 4) | |||||
| #define H264_WEIGHT_10(W, DEPTH, OPT) \ | |||||
| void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \ | |||||
| int stride, int height, int log2_denom, int weight, int offset); | |||||
| #define H264_BIWEIGHT_10(W, DEPTH, OPT) \ | |||||
| void ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT \ | |||||
| (uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, \ | |||||
| int weightd, int weights, int offset); | int weightd, int weights, int offset); | ||||
| #define H264_BIWEIGHT_10_SSE(W, H, DEPTH) \ | |||||
| H264_WEIGHT_10 (W, H, DEPTH, sse2) \ | |||||
| H264_WEIGHT_10 (W, H, DEPTH, sse4) \ | |||||
| H264_BIWEIGHT_10(W, H, DEPTH, sse2) \ | |||||
| H264_BIWEIGHT_10(W, H, DEPTH, sse4) | |||||
| H264_BIWEIGHT_10_SSE(16, 16, 10) | |||||
| H264_BIWEIGHT_10_SSE(16, 8, 10) | |||||
| H264_BIWEIGHT_10_SSE( 8, 16, 10) | |||||
| H264_BIWEIGHT_10_SSE( 8, 8, 10) | |||||
| H264_BIWEIGHT_10_SSE( 8, 4, 10) | |||||
| H264_BIWEIGHT_10_SSE( 4, 8, 10) | |||||
| H264_BIWEIGHT_10_SSE( 4, 4, 10) | |||||
| H264_BIWEIGHT_10_SSE( 4, 2, 10) | |||||
| #define H264_BIWEIGHT_10_SSE(W, DEPTH) \ | |||||
| H264_WEIGHT_10 (W, DEPTH, sse2) \ | |||||
| H264_WEIGHT_10 (W, DEPTH, sse4) \ | |||||
| H264_BIWEIGHT_10(W, DEPTH, sse2) \ | |||||
| H264_BIWEIGHT_10(W, DEPTH, sse4) | |||||
| H264_BIWEIGHT_10_SSE(16, 10) | |||||
| H264_BIWEIGHT_10_SSE( 8, 10) | |||||
| H264_BIWEIGHT_10_SSE( 4, 10) | |||||
| void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) | void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) | ||||
| { | { | ||||
| int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
| if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
| if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMX2) { | |||||
| c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2; | c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2; | ||||
| } | } | ||||
| @@ -394,23 +384,13 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom | |||||
| c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext; | c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext; | ||||
| c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext; | c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext; | ||||
| #endif | #endif | ||||
| c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2; | |||||
| c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2; | |||||
| c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2; | |||||
| c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2; | |||||
| c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2; | |||||
| c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2; | |||||
| c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2; | |||||
| c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2; | |||||
| c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2; | |||||
| c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2; | |||||
| c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2; | |||||
| c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2; | |||||
| c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2; | |||||
| c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2; | |||||
| c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2; | |||||
| c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2; | |||||
| c->weight_h264_pixels_tab[0]= ff_h264_weight_16_mmx2; | |||||
| c->weight_h264_pixels_tab[1]= ff_h264_weight_8_mmx2; | |||||
| c->weight_h264_pixels_tab[2]= ff_h264_weight_4_mmx2; | |||||
| c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_mmx2; | |||||
| c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_mmx2; | |||||
| c->biweight_h264_pixels_tab[2]= ff_h264_biweight_4_mmx2; | |||||
| if (mm_flags&AV_CPU_FLAG_SSE2) { | if (mm_flags&AV_CPU_FLAG_SSE2) { | ||||
| c->h264_idct8_add = ff_h264_idct8_add_8_sse2; | c->h264_idct8_add = ff_h264_idct8_add_8_sse2; | ||||
| @@ -422,17 +402,11 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom | |||||
| c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2; | c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2; | ||||
| c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2; | c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2; | ||||
| c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_sse2; | |||||
| c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_sse2; | |||||
| c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_sse2; | |||||
| c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_sse2; | |||||
| c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_sse2; | |||||
| c->weight_h264_pixels_tab[0]= ff_h264_weight_16_sse2; | |||||
| c->weight_h264_pixels_tab[1]= ff_h264_weight_8_sse2; | |||||
| c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_sse2; | |||||
| c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_sse2; | |||||
| c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_sse2; | |||||
| c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2; | |||||
| c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2; | |||||
| c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_sse2; | |||||
| c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_sse2; | |||||
| #if HAVE_ALIGNED_STACK | #if HAVE_ALIGNED_STACK | ||||
| c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2; | c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2; | ||||
| @@ -442,11 +416,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom | |||||
| #endif | #endif | ||||
| } | } | ||||
| if (mm_flags&AV_CPU_FLAG_SSSE3) { | if (mm_flags&AV_CPU_FLAG_SSSE3) { | ||||
| c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3; | |||||
| c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_ssse3; | |||||
| c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_ssse3; | |||||
| c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3; | |||||
| c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_ssse3; | |||||
| c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_ssse3; | |||||
| c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_ssse3; | |||||
| } | } | ||||
| if (HAVE_AVX && mm_flags&AV_CPU_FLAG_AVX) { | if (HAVE_AVX && mm_flags&AV_CPU_FLAG_AVX) { | ||||
| #if HAVE_ALIGNED_STACK | #if HAVE_ALIGNED_STACK | ||||
| @@ -485,23 +456,13 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom | |||||
| c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2; | c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2; | ||||
| #endif | #endif | ||||
| c->weight_h264_pixels_tab[0] = ff_h264_weight_16x16_10_sse2; | |||||
| c->weight_h264_pixels_tab[1] = ff_h264_weight_16x8_10_sse2; | |||||
| c->weight_h264_pixels_tab[2] = ff_h264_weight_8x16_10_sse2; | |||||
| c->weight_h264_pixels_tab[3] = ff_h264_weight_8x8_10_sse2; | |||||
| c->weight_h264_pixels_tab[4] = ff_h264_weight_8x4_10_sse2; | |||||
| c->weight_h264_pixels_tab[5] = ff_h264_weight_4x8_10_sse2; | |||||
| c->weight_h264_pixels_tab[6] = ff_h264_weight_4x4_10_sse2; | |||||
| c->weight_h264_pixels_tab[7] = ff_h264_weight_4x2_10_sse2; | |||||
| c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16x16_10_sse2; | |||||
| c->biweight_h264_pixels_tab[1] = ff_h264_biweight_16x8_10_sse2; | |||||
| c->biweight_h264_pixels_tab[2] = ff_h264_biweight_8x16_10_sse2; | |||||
| c->biweight_h264_pixels_tab[3] = ff_h264_biweight_8x8_10_sse2; | |||||
| c->biweight_h264_pixels_tab[4] = ff_h264_biweight_8x4_10_sse2; | |||||
| c->biweight_h264_pixels_tab[5] = ff_h264_biweight_4x8_10_sse2; | |||||
| c->biweight_h264_pixels_tab[6] = ff_h264_biweight_4x4_10_sse2; | |||||
| c->biweight_h264_pixels_tab[7] = ff_h264_biweight_4x2_10_sse2; | |||||
| c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2; | |||||
| c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2; | |||||
| c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2; | |||||
| c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2; | |||||
| c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2; | |||||
| c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2; | |||||
| c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_sse2; | c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_sse2; | ||||
| c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_sse2; | c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_sse2; | ||||
| @@ -513,23 +474,13 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom | |||||
| #endif | #endif | ||||
| } | } | ||||
| if (mm_flags&AV_CPU_FLAG_SSE4) { | if (mm_flags&AV_CPU_FLAG_SSE4) { | ||||
| c->weight_h264_pixels_tab[0] = ff_h264_weight_16x16_10_sse4; | |||||
| c->weight_h264_pixels_tab[1] = ff_h264_weight_16x8_10_sse4; | |||||
| c->weight_h264_pixels_tab[2] = ff_h264_weight_8x16_10_sse4; | |||||
| c->weight_h264_pixels_tab[3] = ff_h264_weight_8x8_10_sse4; | |||||
| c->weight_h264_pixels_tab[4] = ff_h264_weight_8x4_10_sse4; | |||||
| c->weight_h264_pixels_tab[5] = ff_h264_weight_4x8_10_sse4; | |||||
| c->weight_h264_pixels_tab[6] = ff_h264_weight_4x4_10_sse4; | |||||
| c->weight_h264_pixels_tab[7] = ff_h264_weight_4x2_10_sse4; | |||||
| c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16x16_10_sse4; | |||||
| c->biweight_h264_pixels_tab[1] = ff_h264_biweight_16x8_10_sse4; | |||||
| c->biweight_h264_pixels_tab[2] = ff_h264_biweight_8x16_10_sse4; | |||||
| c->biweight_h264_pixels_tab[3] = ff_h264_biweight_8x8_10_sse4; | |||||
| c->biweight_h264_pixels_tab[4] = ff_h264_biweight_8x4_10_sse4; | |||||
| c->biweight_h264_pixels_tab[5] = ff_h264_biweight_4x8_10_sse4; | |||||
| c->biweight_h264_pixels_tab[6] = ff_h264_biweight_4x4_10_sse4; | |||||
| c->biweight_h264_pixels_tab[7] = ff_h264_biweight_4x2_10_sse4; | |||||
| c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4; | |||||
| c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4; | |||||
| c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4; | |||||
| c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4; | |||||
| c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4; | |||||
| c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4; | |||||
| } | } | ||||
| #if HAVE_AVX | #if HAVE_AVX | ||||
| if (mm_flags&AV_CPU_FLAG_AVX) { | if (mm_flags&AV_CPU_FLAG_AVX) { | ||||
| @@ -10,7 +10,7 @@ OBJS = alldevices.o avdevice.o | |||||
| # input/output devices | # input/output devices | ||||
| OBJS-$(CONFIG_ALSA_INDEV) += alsa-audio-common.o \ | OBJS-$(CONFIG_ALSA_INDEV) += alsa-audio-common.o \ | ||||
| alsa-audio-dec.o | |||||
| alsa-audio-dec.o timefilter.o | |||||
| OBJS-$(CONFIG_ALSA_OUTDEV) += alsa-audio-common.o \ | OBJS-$(CONFIG_ALSA_OUTDEV) += alsa-audio-common.o \ | ||||
| alsa-audio-enc.o | alsa-audio-enc.o | ||||
| OBJS-$(CONFIG_BKTR_INDEV) += bktr.o | OBJS-$(CONFIG_BKTR_INDEV) += bktr.o | ||||
| @@ -19,7 +19,7 @@ OBJS-$(CONFIG_DSHOW_INDEV) += dshow.o dshow_enummediatypes.o \ | |||||
| dshow_pin.o dshow_common.o | dshow_pin.o dshow_common.o | ||||
| OBJS-$(CONFIG_DV1394_INDEV) += dv1394.o | OBJS-$(CONFIG_DV1394_INDEV) += dv1394.o | ||||
| OBJS-$(CONFIG_FBDEV_INDEV) += fbdev.o | OBJS-$(CONFIG_FBDEV_INDEV) += fbdev.o | ||||
| OBJS-$(CONFIG_JACK_INDEV) += jack_audio.o | |||||
| OBJS-$(CONFIG_JACK_INDEV) += jack_audio.o timefilter.o | |||||
| OBJS-$(CONFIG_LAVFI_INDEV) += lavfi.o | OBJS-$(CONFIG_LAVFI_INDEV) += lavfi.o | ||||
| OBJS-$(CONFIG_OPENAL_INDEV) += openal-dec.o | OBJS-$(CONFIG_OPENAL_INDEV) += openal-dec.o | ||||
| OBJS-$(CONFIG_OSS_INDEV) += oss_audio.o | OBJS-$(CONFIG_OSS_INDEV) += oss_audio.o | ||||
| @@ -39,4 +39,6 @@ OBJS-$(CONFIG_LIBDC1394_INDEV) += libdc1394.o | |||||
| SKIPHEADERS-$(HAVE_ALSA_ASOUNDLIB_H) += alsa-audio.h | SKIPHEADERS-$(HAVE_ALSA_ASOUNDLIB_H) += alsa-audio.h | ||||
| SKIPHEADERS-$(HAVE_SNDIO_H) += sndio_common.h | SKIPHEADERS-$(HAVE_SNDIO_H) += sndio_common.h | ||||
| TESTPROGS = timefilter | |||||
| include $(SRC_PATH)/subdir.mak | include $(SRC_PATH)/subdir.mak | ||||
| @@ -33,7 +33,7 @@ | |||||
| #include <alsa/asoundlib.h> | #include <alsa/asoundlib.h> | ||||
| #include "config.h" | #include "config.h" | ||||
| #include "libavutil/log.h" | #include "libavutil/log.h" | ||||
| #include "libavformat/timefilter.h" | |||||
| #include "timefilter.h" | |||||
| #include "avdevice.h" | #include "avdevice.h" | ||||
| /* XXX: we make the assumption that the soundcard accepts this format */ | /* XXX: we make the assumption that the soundcard accepts this format */ | ||||
| @@ -28,7 +28,8 @@ | |||||
| #include "libavutil/fifo.h" | #include "libavutil/fifo.h" | ||||
| #include "libavutil/opt.h" | #include "libavutil/opt.h" | ||||
| #include "libavcodec/avcodec.h" | #include "libavcodec/avcodec.h" | ||||
| #include "libavformat/timefilter.h" | |||||
| #include "libavformat/avformat.h" | |||||
| #include "timefilter.h" | |||||
| #include "avdevice.h" | #include "avdevice.h" | ||||
| /** | /** | ||||
| @@ -24,8 +24,8 @@ | |||||
| #include "config.h" | #include "config.h" | ||||
| #include "avformat.h" | |||||
| #include "timefilter.h" | #include "timefilter.h" | ||||
| #include "libavutil/mem.h" | |||||
| struct TimeFilter { | struct TimeFilter { | ||||
| /// Delay Locked Loop data. These variables refer to mathematical | /// Delay Locked Loop data. These variables refer to mathematical | ||||
| @@ -22,8 +22,8 @@ | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
| */ | */ | ||||
| #ifndef AVFORMAT_TIMEFILTER_H | |||||
| #define AVFORMAT_TIMEFILTER_H | |||||
| #ifndef AVDEVICE_TIMEFILTER_H | |||||
| #define AVDEVICE_TIMEFILTER_H | |||||
| /** | /** | ||||
| * Opaque type representing a time filter state | * Opaque type representing a time filter state | ||||
| @@ -94,4 +94,4 @@ void ff_timefilter_reset(TimeFilter *); | |||||
| */ | */ | ||||
| void ff_timefilter_destroy(TimeFilter *); | void ff_timefilter_destroy(TimeFilter *); | ||||
| #endif /* AVFORMAT_TIMEFILTER_H */ | |||||
| #endif /* AVDEVICE_TIMEFILTER_H */ | |||||
| @@ -354,11 +354,8 @@ OBJS-$(CONFIG_RTP_PROTOCOL) += rtpproto.o | |||||
| OBJS-$(CONFIG_TCP_PROTOCOL) += tcp.o | OBJS-$(CONFIG_TCP_PROTOCOL) += tcp.o | ||||
| OBJS-$(CONFIG_UDP_PROTOCOL) += udp.o | OBJS-$(CONFIG_UDP_PROTOCOL) += udp.o | ||||
| # libavdevice dependencies | |||||
| OBJS-$(CONFIG_ALSA_INDEV) += timefilter.o | |||||
| OBJS-$(CONFIG_JACK_INDEV) += timefilter.o | |||||
| TESTPROGS = seek timefilter | |||||
| TESTPROGS = seek | |||||
| TOOLS = pktdumper probetest | TOOLS = pktdumper probetest | ||||
| include $(SRC_PATH)/subdir.mak | include $(SRC_PATH)/subdir.mak | ||||
| @@ -228,8 +228,9 @@ static int amf_parse_object(AVFormatContext *s, AVStream *astream, AVStream *vst | |||||
| case AMF_DATA_TYPE_OBJECT: { | case AMF_DATA_TYPE_OBJECT: { | ||||
| unsigned int keylen; | unsigned int keylen; | ||||
| if (vstream && ioc->seekable && key && !strcmp(KEYFRAMES_TAG, key) && depth == 1) | |||||
| if (parse_keyframes_index(s, ioc, vstream, max_pos) < 0) | |||||
| if ((vstream || astream) && ioc->seekable && key && !strcmp(KEYFRAMES_TAG, key) && depth == 1) | |||||
| if (parse_keyframes_index(s, ioc, vstream ? vstream : astream, | |||||
| max_pos) < 0) | |||||
| av_log(s, AV_LOG_ERROR, "Keyframe index parsing failed\n"); | av_log(s, AV_LOG_ERROR, "Keyframe index parsing failed\n"); | ||||
| while(avio_tell(ioc) < max_pos - 2 && (keylen = avio_rb16(ioc))) { | while(avio_tell(ioc) < max_pos - 2 && (keylen = avio_rb16(ioc))) { | ||||
| @@ -60,10 +60,13 @@ typedef struct FLVContext { | |||||
| int64_t duration_offset; | int64_t duration_offset; | ||||
| int64_t filesize_offset; | int64_t filesize_offset; | ||||
| int64_t duration; | int64_t duration; | ||||
| int delay; ///< first dts delay for AVC | |||||
| int64_t last_ts; | |||||
| } FLVContext; | } FLVContext; | ||||
| typedef struct FLVStreamContext { | |||||
| int delay; ///< first dts delay for each stream (needed for AVC & Speex) | |||||
| int64_t last_ts; ///< last timestamp for each stream | |||||
| } FLVStreamContext; | |||||
| static int get_audio_flags(AVCodecContext *enc){ | static int get_audio_flags(AVCodecContext *enc){ | ||||
| int flags = (enc->bits_per_coded_sample == 16) ? FLV_SAMPLESSIZE_16BIT : FLV_SAMPLESSIZE_8BIT; | int flags = (enc->bits_per_coded_sample == 16) ? FLV_SAMPLESSIZE_16BIT : FLV_SAMPLESSIZE_8BIT; | ||||
| @@ -182,6 +185,7 @@ static int flv_write_header(AVFormatContext *s) | |||||
| for(i=0; i<s->nb_streams; i++){ | for(i=0; i<s->nb_streams; i++){ | ||||
| AVCodecContext *enc = s->streams[i]->codec; | AVCodecContext *enc = s->streams[i]->codec; | ||||
| FLVStreamContext *sc; | |||||
| if (enc->codec_type == AVMEDIA_TYPE_VIDEO) { | if (enc->codec_type == AVMEDIA_TYPE_VIDEO) { | ||||
| if (s->streams[i]->r_frame_rate.den && s->streams[i]->r_frame_rate.num) { | if (s->streams[i]->r_frame_rate.den && s->streams[i]->r_frame_rate.num) { | ||||
| framerate = av_q2d(s->streams[i]->r_frame_rate); | framerate = av_q2d(s->streams[i]->r_frame_rate); | ||||
| @@ -199,6 +203,12 @@ static int flv_write_header(AVFormatContext *s) | |||||
| return -1; | return -1; | ||||
| } | } | ||||
| av_set_pts_info(s->streams[i], 32, 1, 1000); /* 32 bit pts in ms */ | av_set_pts_info(s->streams[i], 32, 1, 1000); /* 32 bit pts in ms */ | ||||
| sc = av_mallocz(sizeof(FLVStreamContext)); | |||||
| if (!sc) | |||||
| return AVERROR(ENOMEM); | |||||
| s->streams[i]->priv_data = sc; | |||||
| sc->last_ts = -1; | |||||
| } | } | ||||
| avio_write(pb, "FLV", 3); | avio_write(pb, "FLV", 3); | ||||
| avio_w8(pb,1); | avio_w8(pb,1); | ||||
| @@ -218,8 +228,6 @@ static int flv_write_header(AVFormatContext *s) | |||||
| } | } | ||||
| } | } | ||||
| flv->last_ts = -1; | |||||
| /* write meta_tag */ | /* write meta_tag */ | ||||
| avio_w8(pb, 18); // tag type META | avio_w8(pb, 18); // tag type META | ||||
| metadata_size_pos= avio_tell(pb); | metadata_size_pos= avio_tell(pb); | ||||
| @@ -361,9 +369,10 @@ static int flv_write_trailer(AVFormatContext *s) | |||||
| /* Add EOS tag */ | /* Add EOS tag */ | ||||
| for (i = 0; i < s->nb_streams; i++) { | for (i = 0; i < s->nb_streams; i++) { | ||||
| AVCodecContext *enc = s->streams[i]->codec; | AVCodecContext *enc = s->streams[i]->codec; | ||||
| FLVStreamContext *sc = s->streams[i]->priv_data; | |||||
| if (enc->codec_type == AVMEDIA_TYPE_VIDEO && | if (enc->codec_type == AVMEDIA_TYPE_VIDEO && | ||||
| (enc->codec_id == CODEC_ID_H264 || enc->codec_id == CODEC_ID_MPEG4)) { | (enc->codec_id == CODEC_ID_H264 || enc->codec_id == CODEC_ID_MPEG4)) { | ||||
| put_avc_eos_tag(pb, flv->last_ts); | |||||
| put_avc_eos_tag(pb, sc->last_ts); | |||||
| } | } | ||||
| } | } | ||||
| @@ -384,6 +393,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt) | |||||
| AVIOContext *pb = s->pb; | AVIOContext *pb = s->pb; | ||||
| AVCodecContext *enc = s->streams[pkt->stream_index]->codec; | AVCodecContext *enc = s->streams[pkt->stream_index]->codec; | ||||
| FLVContext *flv = s->priv_data; | FLVContext *flv = s->priv_data; | ||||
| FLVStreamContext *sc = s->streams[pkt->stream_index]->priv_data; | |||||
| unsigned ts; | unsigned ts; | ||||
| int size= pkt->size; | int size= pkt->size; | ||||
| uint8_t *data= NULL; | uint8_t *data= NULL; | ||||
| @@ -434,20 +444,20 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt) | |||||
| av_log(s, AV_LOG_ERROR, "malformated aac bitstream, use -absf aac_adtstoasc\n"); | av_log(s, AV_LOG_ERROR, "malformated aac bitstream, use -absf aac_adtstoasc\n"); | ||||
| return -1; | return -1; | ||||
| } | } | ||||
| if (!flv->delay && pkt->dts < 0) | |||||
| flv->delay = -pkt->dts; | |||||
| if (!sc->delay && pkt->dts < 0) | |||||
| sc->delay = -pkt->dts; | |||||
| ts = pkt->dts + flv->delay; // add delay to force positive dts | |||||
| ts = pkt->dts + sc->delay; // add delay to force positive dts | |||||
| /* check Speex packet duration */ | /* check Speex packet duration */ | ||||
| if (enc->codec_id == CODEC_ID_SPEEX && ts - flv->last_ts > 160) { | |||||
| if (enc->codec_id == CODEC_ID_SPEEX && ts - sc->last_ts > 160) { | |||||
| av_log(s, AV_LOG_WARNING, "Warning: Speex stream has more than " | av_log(s, AV_LOG_WARNING, "Warning: Speex stream has more than " | ||||
| "8 frames per packet. Adobe Flash " | "8 frames per packet. Adobe Flash " | ||||
| "Player cannot handle this!\n"); | "Player cannot handle this!\n"); | ||||
| } | } | ||||
| if (flv->last_ts < ts) | |||||
| flv->last_ts = ts; | |||||
| if (sc->last_ts < ts) | |||||
| sc->last_ts = ts; | |||||
| avio_wb24(pb,size + flags_size); | avio_wb24(pb,size + flags_size); | ||||
| avio_wb24(pb,ts); | avio_wb24(pb,ts); | ||||
| @@ -471,7 +481,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt) | |||||
| avio_write(pb, data ? data : pkt->data, size); | avio_write(pb, data ? data : pkt->data, size); | ||||
| avio_wb32(pb,size+flags_size+11); // previous tag size | avio_wb32(pb,size+flags_size+11); // previous tag size | ||||
| flv->duration = FFMAX(flv->duration, pkt->pts + flv->delay + pkt->duration); | |||||
| flv->duration = FFMAX(flv->duration, pkt->pts + sc->delay + pkt->duration); | |||||
| avio_flush(pb); | avio_flush(pb); | ||||
| @@ -35,6 +35,7 @@ | |||||
| #include "riff.h" | #include "riff.h" | ||||
| #include "isom.h" | #include "isom.h" | ||||
| #include "libavcodec/get_bits.h" | #include "libavcodec/get_bits.h" | ||||
| #include "id3v1.h" | |||||
| #if CONFIG_ZLIB | #if CONFIG_ZLIB | ||||
| #include <zlib.h> | #include <zlib.h> | ||||
| @@ -99,31 +100,48 @@ static int mov_metadata_track_or_disc_number(MOVContext *c, AVIOContext *pb, | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| static int mov_metadata_int8(MOVContext *c, AVIOContext *pb, | |||||
| unsigned len, const char *key) | |||||
| static int mov_metadata_int8_bypass_padding(MOVContext *c, AVIOContext *pb, | |||||
| unsigned len, const char *key) | |||||
| { | { | ||||
| char buf[16]; | |||||
| char buf[16]; | |||||
| /* bypass padding bytes */ | |||||
| avio_r8(pb); | |||||
| avio_r8(pb); | |||||
| avio_r8(pb); | |||||
| snprintf(buf, sizeof(buf), "%hu", avio_r8(pb)); | |||||
| av_dict_set(&c->fc->metadata, key, buf, 0); | |||||
| return 0; | |||||
| } | |||||
| /* bypass padding bytes */ | |||||
| avio_r8(pb); | |||||
| avio_r8(pb); | |||||
| avio_r8(pb); | |||||
| static int mov_metadata_int8_no_padding(MOVContext *c, AVIOContext *pb, | |||||
| unsigned len, const char *key) | |||||
| { | |||||
| char buf[16]; | |||||
| snprintf(buf, sizeof(buf), "%hu", avio_r8(pb)); | |||||
| av_dict_set(&c->fc->metadata, key, buf, 0); | |||||
| snprintf(buf, sizeof(buf), "%hu", avio_r8(pb)); | |||||
| av_dict_set(&c->fc->metadata, key, buf, 0); | |||||
| return 0; | |||||
| return 0; | |||||
| } | } | ||||
| static int mov_metadata_stik(MOVContext *c, AVIOContext *pb, | |||||
| static int mov_metadata_gnre(MOVContext *c, AVIOContext *pb, | |||||
| unsigned len, const char *key) | unsigned len, const char *key) | ||||
| { | { | ||||
| char buf[16]; | |||||
| short genre; | |||||
| char buf[20]; | |||||
| snprintf(buf, sizeof(buf), "%hu", avio_r8(pb)); | |||||
| av_dict_set(&c->fc->metadata, key, buf, 0); | |||||
| avio_r8(pb); // unknown | |||||
| return 0; | |||||
| genre = avio_r8(pb); | |||||
| if (genre < 1 || genre > ID3v1_GENRE_MAX) | |||||
| return 0; | |||||
| snprintf(buf, sizeof(buf), "%s", ff_id3v1_genre_str[genre-1]); | |||||
| av_dict_set(&c->fc->metadata, key, buf, 0); | |||||
| return 0; | |||||
| } | } | ||||
| static const uint32_t mac_to_unicode[128] = { | static const uint32_t mac_to_unicode[128] = { | ||||
| @@ -189,6 +207,8 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom) | |||||
| case MKTAG(0xa9,'a','l','b'): key = "album"; break; | case MKTAG(0xa9,'a','l','b'): key = "album"; break; | ||||
| case MKTAG(0xa9,'d','a','y'): key = "date"; break; | case MKTAG(0xa9,'d','a','y'): key = "date"; break; | ||||
| case MKTAG(0xa9,'g','e','n'): key = "genre"; break; | case MKTAG(0xa9,'g','e','n'): key = "genre"; break; | ||||
| case MKTAG( 'g','n','r','e'): key = "genre"; | |||||
| parse = mov_metadata_gnre; break; | |||||
| case MKTAG(0xa9,'t','o','o'): | case MKTAG(0xa9,'t','o','o'): | ||||
| case MKTAG(0xa9,'s','w','r'): key = "encoder"; break; | case MKTAG(0xa9,'s','w','r'): key = "encoder"; break; | ||||
| case MKTAG(0xa9,'e','n','c'): key = "encoder"; break; | case MKTAG(0xa9,'e','n','c'): key = "encoder"; break; | ||||
| @@ -202,11 +222,15 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom) | |||||
| case MKTAG( 'd','i','s','k'): key = "disc"; | case MKTAG( 'd','i','s','k'): key = "disc"; | ||||
| parse = mov_metadata_track_or_disc_number; break; | parse = mov_metadata_track_or_disc_number; break; | ||||
| case MKTAG( 't','v','e','s'): key = "episode_sort"; | case MKTAG( 't','v','e','s'): key = "episode_sort"; | ||||
| parse = mov_metadata_int8; break; | |||||
| parse = mov_metadata_int8_bypass_padding; break; | |||||
| case MKTAG( 't','v','s','n'): key = "season_number"; | case MKTAG( 't','v','s','n'): key = "season_number"; | ||||
| parse = mov_metadata_int8; break; | |||||
| parse = mov_metadata_int8_bypass_padding; break; | |||||
| case MKTAG( 's','t','i','k'): key = "media_type"; | case MKTAG( 's','t','i','k'): key = "media_type"; | ||||
| parse = mov_metadata_stik; break; | |||||
| parse = mov_metadata_int8_no_padding; break; | |||||
| case MKTAG( 'h','d','v','d'): key = "hd_video"; | |||||
| parse = mov_metadata_int8_no_padding; break; | |||||
| case MKTAG( 'p','g','a','p'): key = "gapless_playback"; | |||||
| parse = mov_metadata_int8_no_padding; break; | |||||
| } | } | ||||
| if (c->itunes_metadata && atom.size > 8) { | if (c->itunes_metadata && atom.size > 8) { | ||||
| @@ -859,6 +859,29 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = { | |||||
| }, | }, | ||||
| .flags = PIX_FMT_BE, | .flags = PIX_FMT_BE, | ||||
| }, | }, | ||||
| [PIX_FMT_YUV422P9LE] = { | |||||
| .name = "yuv422p9le", | |||||
| .nb_components= 3, | |||||
| .log2_chroma_w= 1, | |||||
| .log2_chroma_h= 0, | |||||
| .comp = { | |||||
| {0,1,1,0,8}, /* Y */ | |||||
| {1,1,1,0,8}, /* U */ | |||||
| {2,1,1,0,8}, /* V */ | |||||
| }, | |||||
| }, | |||||
| [PIX_FMT_YUV422P9BE] = { | |||||
| .name = "yuv422p9be", | |||||
| .nb_components= 3, | |||||
| .log2_chroma_w= 1, | |||||
| .log2_chroma_h= 0, | |||||
| .comp = { | |||||
| {0,1,1,0,8}, /* Y */ | |||||
| {1,1,1,0,8}, /* U */ | |||||
| {2,1,1,0,8}, /* V */ | |||||
| }, | |||||
| .flags = PIX_FMT_BE, | |||||
| }, | |||||
| [PIX_FMT_YUV422P10LE] = { | [PIX_FMT_YUV422P10LE] = { | ||||
| .name = "yuv422p10le", | .name = "yuv422p10le", | ||||
| .nb_components= 3, | .nb_components= 3, | ||||
| @@ -149,12 +149,15 @@ enum PixelFormat { | |||||
| PIX_FMT_YUV444P9LE, ///< planar YUV 4:4:4, 27bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian | PIX_FMT_YUV444P9LE, ///< planar YUV 4:4:4, 27bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian | ||||
| PIX_FMT_YUV444P10BE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian | PIX_FMT_YUV444P10BE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian | ||||
| PIX_FMT_YUV444P10LE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian | PIX_FMT_YUV444P10LE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian | ||||
| PIX_FMT_YUV422P9BE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian | |||||
| PIX_FMT_YUV422P9LE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian | |||||
| PIX_FMT_RGBA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian | PIX_FMT_RGBA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian | ||||
| PIX_FMT_RGBA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian | PIX_FMT_RGBA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian | ||||
| PIX_FMT_BGRA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian | PIX_FMT_BGRA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian | ||||
| PIX_FMT_BGRA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian | PIX_FMT_BGRA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian | ||||
| PIX_FMT_GBR24P, ///< planar GBR, 24bpp, 8G, 8B, 8R. | PIX_FMT_GBR24P, ///< planar GBR, 24bpp, 8G, 8B, 8R. | ||||
| PIX_FMT_NB, ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions | PIX_FMT_NB, ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions | ||||
| }; | }; | ||||
| @@ -182,6 +185,7 @@ enum PixelFormat { | |||||
| #define PIX_FMT_BGR444 PIX_FMT_NE(BGR444BE, BGR444LE) | #define PIX_FMT_BGR444 PIX_FMT_NE(BGR444BE, BGR444LE) | ||||
| #define PIX_FMT_YUV420P9 PIX_FMT_NE(YUV420P9BE , YUV420P9LE) | #define PIX_FMT_YUV420P9 PIX_FMT_NE(YUV420P9BE , YUV420P9LE) | ||||
| #define PIX_FMT_YUV422P9 PIX_FMT_NE(YUV422P9BE , YUV422P9LE) | |||||
| #define PIX_FMT_YUV444P9 PIX_FMT_NE(YUV444P9BE , YUV444P9LE) | #define PIX_FMT_YUV444P9 PIX_FMT_NE(YUV444P9BE , YUV444P9LE) | ||||
| #define PIX_FMT_YUV420P10 PIX_FMT_NE(YUV420P10BE, YUV420P10LE) | #define PIX_FMT_YUV420P10 PIX_FMT_NE(YUV420P10BE, YUV420P10LE) | ||||
| #define PIX_FMT_YUV422P10 PIX_FMT_NE(YUV422P10BE, YUV422P10LE) | #define PIX_FMT_YUV422P10 PIX_FMT_NE(YUV422P10BE, YUV422P10LE) | ||||
| @@ -536,6 +536,18 @@ | |||||
| %endif | %endif | ||||
| %endmacro | %endmacro | ||||
| %macro SPLATD_MMX 1 | |||||
| punpckldq %1, %1 | |||||
| %endmacro | |||||
| %macro SPLATD_SSE 1 | |||||
| shufps %1, %1, 0 | |||||
| %endmacro | |||||
| %macro SPLATD_SSE2 1 | |||||
| pshufd %1, %1, 0 | |||||
| %endmacro | |||||
| %macro CLIPW 3 ;(dst, min, max) | %macro CLIPW 3 ;(dst, min, max) | ||||
| pmaxsw %1, %2 | pmaxsw %1, %2 | ||||
| pminsw %1, %3 | pminsw %1, %3 | ||||
| @@ -2843,6 +2843,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c) | |||||
| case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break; | case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break; | ||||
| #if HAVE_BIGENDIAN | #if HAVE_BIGENDIAN | ||||
| case PIX_FMT_YUV444P9LE: | case PIX_FMT_YUV444P9LE: | ||||
| case PIX_FMT_YUV422P9LE: | |||||
| case PIX_FMT_YUV420P9LE: | case PIX_FMT_YUV420P9LE: | ||||
| case PIX_FMT_YUV422P10LE: | case PIX_FMT_YUV422P10LE: | ||||
| case PIX_FMT_YUV420P10LE: | case PIX_FMT_YUV420P10LE: | ||||
| @@ -2852,6 +2853,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c) | |||||
| case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break; | case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break; | ||||
| #else | #else | ||||
| case PIX_FMT_YUV444P9BE: | case PIX_FMT_YUV444P9BE: | ||||
| case PIX_FMT_YUV422P9BE: | |||||
| case PIX_FMT_YUV420P9BE: | case PIX_FMT_YUV420P9BE: | ||||
| case PIX_FMT_YUV444P10BE: | case PIX_FMT_YUV444P10BE: | ||||
| case PIX_FMT_YUV422P10BE: | case PIX_FMT_YUV422P10BE: | ||||
| @@ -2912,6 +2914,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c) | |||||
| switch (srcFormat) { | switch (srcFormat) { | ||||
| #if HAVE_BIGENDIAN | #if HAVE_BIGENDIAN | ||||
| case PIX_FMT_YUV444P9LE: | case PIX_FMT_YUV444P9LE: | ||||
| case PIX_FMT_YUV422P9LE: | |||||
| case PIX_FMT_YUV420P9LE: | case PIX_FMT_YUV420P9LE: | ||||
| case PIX_FMT_YUV422P10LE: | case PIX_FMT_YUV422P10LE: | ||||
| case PIX_FMT_YUV420P10LE: | case PIX_FMT_YUV420P10LE: | ||||
| @@ -2922,6 +2925,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c) | |||||
| case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break; | case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break; | ||||
| #else | #else | ||||
| case PIX_FMT_YUV444P9BE: | case PIX_FMT_YUV444P9BE: | ||||
| case PIX_FMT_YUV422P9BE: | |||||
| case PIX_FMT_YUV420P9BE: | case PIX_FMT_YUV420P9BE: | ||||
| case PIX_FMT_YUV444P10BE: | case PIX_FMT_YUV444P10BE: | ||||
| case PIX_FMT_YUV422P10BE: | case PIX_FMT_YUV422P10BE: | ||||
| @@ -547,6 +547,8 @@ const char *sws_format_name(enum PixelFormat format); | |||||
| #define isNBPS(x) ( \ | #define isNBPS(x) ( \ | ||||
| (x)==PIX_FMT_YUV420P9LE \ | (x)==PIX_FMT_YUV420P9LE \ | ||||
| || (x)==PIX_FMT_YUV420P9BE \ | || (x)==PIX_FMT_YUV420P9BE \ | ||||
| || (x)==PIX_FMT_YUV422P9LE \ | |||||
| || (x)==PIX_FMT_YUV422P9BE \ | |||||
| || (x)==PIX_FMT_YUV444P9BE \ | || (x)==PIX_FMT_YUV444P9BE \ | ||||
| || (x)==PIX_FMT_YUV444P9LE \ | || (x)==PIX_FMT_YUV444P9LE \ | ||||
| || (x)==PIX_FMT_YUV422P10BE \ | || (x)==PIX_FMT_YUV422P10BE \ | ||||
| @@ -574,6 +576,7 @@ const char *sws_format_name(enum PixelFormat format); | |||||
| #define isPlanarYUV(x) ( \ | #define isPlanarYUV(x) ( \ | ||||
| isPlanar8YUV(x) \ | isPlanar8YUV(x) \ | ||||
| || (x)==PIX_FMT_YUV420P9LE \ | || (x)==PIX_FMT_YUV420P9LE \ | ||||
| || (x)==PIX_FMT_YUV422P9LE \ | |||||
| || (x)==PIX_FMT_YUV444P9LE \ | || (x)==PIX_FMT_YUV444P9LE \ | ||||
| || (x)==PIX_FMT_YUV420P10LE \ | || (x)==PIX_FMT_YUV420P10LE \ | ||||
| || (x)==PIX_FMT_YUV422P10LE \ | || (x)==PIX_FMT_YUV422P10LE \ | ||||
| @@ -583,6 +586,7 @@ const char *sws_format_name(enum PixelFormat format); | |||||
| || (x)==PIX_FMT_YUV422P16LE \ | || (x)==PIX_FMT_YUV422P16LE \ | ||||
| || (x)==PIX_FMT_YUV444P16LE \ | || (x)==PIX_FMT_YUV444P16LE \ | ||||
| || (x)==PIX_FMT_YUV420P9BE \ | || (x)==PIX_FMT_YUV420P9BE \ | ||||
| || (x)==PIX_FMT_YUV422P9BE \ | |||||
| || (x)==PIX_FMT_YUV444P9BE \ | || (x)==PIX_FMT_YUV444P9BE \ | ||||
| || (x)==PIX_FMT_YUV420P10BE \ | || (x)==PIX_FMT_YUV420P10BE \ | ||||
| || (x)==PIX_FMT_YUV422P10BE \ | || (x)==PIX_FMT_YUV422P10BE \ | ||||
| @@ -136,6 +136,8 @@ const static FormatEntry format_entries[PIX_FMT_NB] = { | |||||
| [PIX_FMT_YUV420P9LE] = { 1 , 1 }, | [PIX_FMT_YUV420P9LE] = { 1 , 1 }, | ||||
| [PIX_FMT_YUV420P10BE] = { 1 , 1 }, | [PIX_FMT_YUV420P10BE] = { 1 , 1 }, | ||||
| [PIX_FMT_YUV420P10LE] = { 1 , 1 }, | [PIX_FMT_YUV420P10LE] = { 1 , 1 }, | ||||
| [PIX_FMT_YUV422P9BE] = { 1 , 1 }, | |||||
| [PIX_FMT_YUV422P9LE] = { 1 , 1 }, | |||||
| [PIX_FMT_YUV422P10BE] = { 1 , 1 }, | [PIX_FMT_YUV422P10BE] = { 1 , 1 }, | ||||
| [PIX_FMT_YUV422P10LE] = { 1 , 1 }, | [PIX_FMT_YUV422P10LE] = { 1 , 1 }, | ||||
| [PIX_FMT_YUV444P9BE] = { 1 , 1 }, | [PIX_FMT_YUV444P9BE] = { 1 , 1 }, | ||||
| @@ -280,15 +282,18 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi | |||||
| if (flags & SWS_BICUBIC) { | if (flags & SWS_BICUBIC) { | ||||
| int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1<<24); | int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1<<24); | ||||
| int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24); | int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24); | ||||
| int64_t dd = ( d*d)>>30; | |||||
| int64_t ddd= (dd*d)>>30; | |||||
| if (d < 1LL<<30) | |||||
| coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30); | |||||
| else if (d < 1LL<<31) | |||||
| coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30); | |||||
| else | |||||
| coeff=0.0; | |||||
| if (d >= 1LL<<31) { | |||||
| coeff = 0.0; | |||||
| } else { | |||||
| int64_t dd = (d * d) >> 30; | |||||
| int64_t ddd = (dd * d) >> 30; | |||||
| if (d < 1LL<<30) | |||||
| coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30); | |||||
| else | |||||
| coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30); | |||||
| } | |||||
| coeff *= fone>>(30+24); | coeff *= fone>>(30+24); | ||||
| } | } | ||||
| /* else if (flags & SWS_X) { | /* else if (flags & SWS_X) { | ||||
| @@ -790,8 +790,8 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int | |||||
| y_table32 = c->yuvTable; | y_table32 = c->yuvTable; | ||||
| yb = -(384<<16) - oy; | yb = -(384<<16) - oy; | ||||
| for (i = 0; i < 1024; i++) { | for (i = 0; i < 1024; i++) { | ||||
| uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16); | |||||
| y_table32[i ] = (yval << rbase) + (needAlpha ? 0 : (255 << abase)); | |||||
| unsigned yval = av_clip_uint8((yb + 0x8000) >> 16); | |||||
| y_table32[i ] = (yval << rbase) + (needAlpha ? 0 : (255u << abase)); | |||||
| y_table32[i+1024] = yval << gbase; | y_table32[i+1024] = yval << gbase; | ||||
| y_table32[i+2048] = yval << bbase; | y_table32[i+2048] = yval << bbase; | ||||
| yb += cy; | yb += cy; | ||||
| @@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71 | |||||
| yuv422p10le d0607c260a45c973e6639f4e449730ad | yuv422p10le d0607c260a45c973e6639f4e449730ad | ||||
| yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed | yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed | ||||
| yuv422p16le f87c81bf16916b64d201359be0b4b6f4 | yuv422p16le f87c81bf16916b64d201359be0b4b6f4 | ||||
| yuv422p9be 29b71579946940a8c00fa844c9dff507 | |||||
| yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a | |||||
| yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf | yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf | ||||
| yuv444p 0a98447b78fd476aa39686da6a74fa2e | yuv444p 0a98447b78fd476aa39686da6a74fa2e | ||||
| yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6 | yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6 | ||||
| @@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71 | |||||
| yuv422p10le d0607c260a45c973e6639f4e449730ad | yuv422p10le d0607c260a45c973e6639f4e449730ad | ||||
| yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed | yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed | ||||
| yuv422p16le f87c81bf16916b64d201359be0b4b6f4 | yuv422p16le f87c81bf16916b64d201359be0b4b6f4 | ||||
| yuv422p9be 29b71579946940a8c00fa844c9dff507 | |||||
| yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a | |||||
| yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf | yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf | ||||
| yuv444p 0a98447b78fd476aa39686da6a74fa2e | yuv444p 0a98447b78fd476aa39686da6a74fa2e | ||||
| yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6 | yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6 | ||||
| @@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71 | |||||
| yuv422p10le d0607c260a45c973e6639f4e449730ad | yuv422p10le d0607c260a45c973e6639f4e449730ad | ||||
| yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed | yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed | ||||
| yuv422p16le f87c81bf16916b64d201359be0b4b6f4 | yuv422p16le f87c81bf16916b64d201359be0b4b6f4 | ||||
| yuv422p9be 29b71579946940a8c00fa844c9dff507 | |||||
| yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a | |||||
| yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf | yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf | ||||
| yuv444p 0a98447b78fd476aa39686da6a74fa2e | yuv444p 0a98447b78fd476aa39686da6a74fa2e | ||||
| yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6 | yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6 | ||||
| @@ -42,6 +42,8 @@ yuv422p10be cea7ca6b0e66d6f29539885896c88603 | |||||
| yuv422p10le a10c4a5837547716f13cd61918b145f9 | yuv422p10le a10c4a5837547716f13cd61918b145f9 | ||||
| yuv422p16be 285993ee0c0f4f8e511ee46f93c5f38c | yuv422p16be 285993ee0c0f4f8e511ee46f93c5f38c | ||||
| yuv422p16le 61bfcee8e54465f760164f5a75d40b5e | yuv422p16le 61bfcee8e54465f760164f5a75d40b5e | ||||
| yuv422p9be 82494823944912f73cebc58ad2979bbd | |||||
| yuv422p9le fc69c8a21f473916a4b4225636b97e06 | |||||
| yuv440p 461503fdb9b90451020aa3b25ddf041c | yuv440p 461503fdb9b90451020aa3b25ddf041c | ||||
| yuv444p 81b2eba962d12e8d64f003ac56f6faf2 | yuv444p 81b2eba962d12e8d64f003ac56f6faf2 | ||||
| yuv444p10be e9d3c8e744b8b0d8187ca092fa203fc9 | yuv444p10be e9d3c8e744b8b0d8187ca092fa203fc9 | ||||
| @@ -42,6 +42,8 @@ yuv422p10be 588fe319b96513c32e21d3e32b45447f | |||||
| yuv422p10le 11b57f2bd9661024153f3973b9090cdb | yuv422p10le 11b57f2bd9661024153f3973b9090cdb | ||||
| yuv422p16be c092d083548c2a144c372a98c46875c7 | yuv422p16be c092d083548c2a144c372a98c46875c7 | ||||
| yuv422p16le c071b9397a416d51cbe339345cbcba84 | yuv422p16le c071b9397a416d51cbe339345cbcba84 | ||||
| yuv422p9be 7c6f1e140b3999ee7d923854e507752a | |||||
| yuv422p9le 51f10d79c07989060dd06e767e6d7d60 | |||||
| yuv440p 876385e96165acf51271b20e5d85a416 | yuv440p 876385e96165acf51271b20e5d85a416 | ||||
| yuv444p 9c3c667d1613b72d15bc6d851c5eb8f7 | yuv444p 9c3c667d1613b72d15bc6d851c5eb8f7 | ||||
| yuv444p10be 944a4997c4edb3a8dd0f0493cfd5a1fd | yuv444p10be 944a4997c4edb3a8dd0f0493cfd5a1fd | ||||