* qatar/master: get_bits: remove x86 inline asm in A32 bitstream reader doc: Remove outdated information about our issue tracker avidec: Factor out the sync fucntionality. fate-aac: Expand coverage. ac3dsp: add x86-optimized versions of ac3dsp.extract_exponents(). ac3dsp: simplify extract_exponents() now that it does not need to do clipping. ac3enc: clip coefficients after MDCT. ac3enc: add int32_t array clipping function to DSPUtil, including x86 versions. swscale: for >8bit scaling, read in native bit-depth. matroskadec: matroska_read_seek after after EBML_STOP leads to failure. doxygen: fix usage of @file directive in libavutil/{dict,file}.h doxygen: Help doxygen parser to understand the DECLARE_ALIGNED and offsetof macros Conflicts: doc/issue_tracker.txt libavformat/avidec.c libavutil/dict.h libswscale/swscale.c libswscale/utils.c tests/ref/lavfi/pixfmts_scale Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n0.9
| @@ -1160,6 +1160,7 @@ INCLUDE_FILE_PATTERNS = | |||
| PREDEFINED = __attribute__(x)="" "RENAME(x)=x ## _TMPL" "DEF(x)=x ## _TMPL" \ | |||
| HAVE_AV_CONFIG_H HAVE_MMX HAVE_MMX2 HAVE_AMD3DNOW \ | |||
| "DECLARE_ALIGNED(a,t,n)=t n" "offsetof(x,y)=0x42" \ | |||
| # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then | |||
| # this tag can be used to specify a list of macro names that should be expanded. | |||
| @@ -164,21 +164,8 @@ static void ac3_extract_exponents_c(uint8_t *exp, int32_t *coef, int nb_coefs) | |||
| int i; | |||
| for (i = 0; i < nb_coefs; i++) { | |||
| int e; | |||
| int v = abs(coef[i]); | |||
| if (v == 0) | |||
| e = 24; | |||
| else { | |||
| e = 23 - av_log2(v); | |||
| if (e >= 24) { | |||
| e = 24; | |||
| coef[i] = 0; | |||
| } else if (e < 0) { | |||
| e = 0; | |||
| coef[i] = av_clip(coef[i], -16777215, 16777215); | |||
| } | |||
| } | |||
| exp[i] = e; | |||
| exp[i] = v ? 23 - av_log2(v) : 24; | |||
| } | |||
| } | |||
| @@ -50,12 +50,16 @@ | |||
| #if CONFIG_AC3ENC_FLOAT | |||
| #define AC3_NAME(x) ff_ac3_float_ ## x | |||
| #define MAC_COEF(d,a,b) ((d)+=(a)*(b)) | |||
| #define COEF_MIN (-16777215.0/16777216.0) | |||
| #define COEF_MAX ( 16777215.0/16777216.0) | |||
| typedef float SampleType; | |||
| typedef float CoefType; | |||
| typedef float CoefSumType; | |||
| #else | |||
| #define AC3_NAME(x) ff_ac3_fixed_ ## x | |||
| #define MAC_COEF(d,a,b) MAC64(d,a,b) | |||
| #define COEF_MIN -16777215 | |||
| #define COEF_MAX 16777215 | |||
| typedef int16_t SampleType; | |||
| typedef int32_t CoefType; | |||
| typedef int64_t CoefSumType; | |||
| @@ -104,6 +104,15 @@ static void scale_coefficients(AC3EncodeContext *s) | |||
| } | |||
| /** | |||
| * Clip MDCT coefficients to allowable range. | |||
| */ | |||
| static void clip_coefficients(DSPContext *dsp, int32_t *coef, unsigned int len) | |||
| { | |||
| dsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len); | |||
| } | |||
| static av_cold int ac3_fixed_encode_init(AVCodecContext *avctx) | |||
| { | |||
| AC3EncodeContext *s = avctx->priv_data; | |||
| @@ -111,6 +111,15 @@ static void scale_coefficients(AC3EncodeContext *s) | |||
| } | |||
| /** | |||
| * Clip MDCT coefficients to allowable range. | |||
| */ | |||
| static void clip_coefficients(DSPContext *dsp, float *coef, unsigned int len) | |||
| { | |||
| dsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len); | |||
| } | |||
| #if CONFIG_AC3_ENCODER | |||
| AVCodec ff_ac3_float_encoder = { | |||
| "ac3_float", | |||
| @@ -41,6 +41,8 @@ static void apply_window(DSPContext *dsp, SampleType *output, | |||
| static int normalize_samples(AC3EncodeContext *s); | |||
| static void clip_coefficients(DSPContext *dsp, CoefType *coef, unsigned int len); | |||
| int AC3_NAME(allocate_sample_buffers)(AC3EncodeContext *s) | |||
| { | |||
| @@ -171,8 +173,8 @@ static void apply_channel_coupling(AC3EncodeContext *s) | |||
| cpl_coef[i] += ch_coef[i]; | |||
| } | |||
| /* coefficients must be clipped to +/- 1.0 in order to be encoded */ | |||
| s->dsp.vector_clipf(cpl_coef, cpl_coef, -1.0f, 1.0f, num_cpl_coefs); | |||
| /* coefficients must be clipped in order to be encoded */ | |||
| clip_coefficients(&s->dsp, cpl_coef, num_cpl_coefs); | |||
| /* scale coupling coefficients from float to 24-bit fixed-point */ | |||
| s->ac3dsp.float_to_fixed24(&block->fixed_coef[CPL_CH][cpl_start], | |||
| @@ -300,6 +302,7 @@ static void apply_channel_coupling(AC3EncodeContext *s) | |||
| if (!block->cpl_in_use || !block->new_cpl_coords) | |||
| continue; | |||
| clip_coefficients(&s->dsp, cpl_coords[blk][1], s->fbw_channels * 16); | |||
| s->ac3dsp.float_to_fixed24(fixed_cpl_coords[blk][1], | |||
| cpl_coords[blk][1], | |||
| s->fbw_channels * 16); | |||
| @@ -433,7 +436,11 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, unsigned char *frame, | |||
| apply_mdct(s); | |||
| scale_coefficients(s); | |||
| if (s->fixed_point) | |||
| scale_coefficients(s); | |||
| clip_coefficients(&s->dsp, s->blocks[0].mdct_coef[1], | |||
| AC3_MAX_COEFS * AC3_MAX_BLOCKS * s->channels); | |||
| s->cpl_on = s->cpl_enabled; | |||
| ff_ac3_compute_coupling_strategy(s); | |||
| @@ -443,6 +450,9 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, unsigned char *frame, | |||
| compute_rematrixing_strategy(s); | |||
| if (!s->fixed_point) | |||
| scale_coefficients(s); | |||
| ff_ac3_apply_rematrixing(s); | |||
| ff_ac3_process_exponents(s); | |||
| @@ -2664,6 +2664,22 @@ static void apply_window_int16_c(int16_t *output, const int16_t *input, | |||
| } | |||
| } | |||
| static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min, | |||
| int32_t max, unsigned int len) | |||
| { | |||
| do { | |||
| *dst++ = av_clip(*src++, min, max); | |||
| *dst++ = av_clip(*src++, min, max); | |||
| *dst++ = av_clip(*src++, min, max); | |||
| *dst++ = av_clip(*src++, min, max); | |||
| *dst++ = av_clip(*src++, min, max); | |||
| *dst++ = av_clip(*src++, min, max); | |||
| *dst++ = av_clip(*src++, min, max); | |||
| *dst++ = av_clip(*src++, min, max); | |||
| len -= 8; | |||
| } while (len > 0); | |||
| } | |||
| #define W0 2048 | |||
| #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ | |||
| #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ | |||
| @@ -3106,6 +3122,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||
| c->scalarproduct_int16 = scalarproduct_int16_c; | |||
| c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c; | |||
| c->apply_window_int16 = apply_window_int16_c; | |||
| c->vector_clip_int32 = vector_clip_int32_c; | |||
| c->scalarproduct_float = scalarproduct_float_c; | |||
| c->butterflies_float = butterflies_float_c; | |||
| c->vector_fmul_scalar = vector_fmul_scalar_c; | |||
| @@ -553,6 +553,22 @@ typedef struct DSPContext { | |||
| void (*apply_window_int16)(int16_t *output, const int16_t *input, | |||
| const int16_t *window, unsigned int len); | |||
| /** | |||
| * Clip each element in an array of int32_t to a given minimum and maximum value. | |||
| * @param dst destination array | |||
| * constraints: 16-byte aligned | |||
| * @param src source array | |||
| * constraints: 16-byte aligned | |||
| * @param min minimum value | |||
| * constraints: must in the the range [-(1<<24), 1<<24] | |||
| * @param max maximum value | |||
| * constraints: must in the the range [-(1<<24), 1<<24] | |||
| * @param len number of elements in the array | |||
| * constraints: multiple of 32 greater than zero | |||
| */ | |||
| void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, | |||
| int32_t max, unsigned int len); | |||
| /* rv30 functions */ | |||
| qpel_mc_func put_rv30_tpel_pixels_tab[4][16]; | |||
| qpel_mc_func avg_rv30_tpel_pixels_tab[4][16]; | |||
| @@ -201,19 +201,11 @@ static inline void skip_bits_long(GetBitContext *s, int n){ | |||
| } \ | |||
| } while (0) | |||
| #if ARCH_X86 | |||
| # define SKIP_CACHE(name, gb, num) \ | |||
| __asm__("shldl %2, %1, %0 \n\t" \ | |||
| "shll %2, %1 \n\t" \ | |||
| : "+r" (name##_cache0), "+r" (name##_cache1) \ | |||
| : "Ic" ((uint8_t)(num))) | |||
| #else | |||
| # define SKIP_CACHE(name, gb, num) do { \ | |||
| name##_cache0 <<= (num); \ | |||
| name##_cache0 |= NEG_USR32(name##_cache1,num); \ | |||
| name##_cache1 <<= (num); \ | |||
| } while (0) | |||
| #endif | |||
| # define SKIP_COUNTER(name, gb, num) name##_bit_count += (num) | |||
| @@ -32,6 +32,11 @@ cextern ac3_bap_bits | |||
| pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768 | |||
| pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7 | |||
| ; used in ff_ac3_extract_exponents() | |||
| pd_1: times 4 dd 1 | |||
| pd_151: times 4 dd 151 | |||
| pb_shuf_4dwb: db 0, 4, 8, 12 | |||
| SECTION .text | |||
| ;----------------------------------------------------------------------------- | |||
| @@ -346,3 +351,100 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum | |||
| movd eax, m0 | |||
| add eax, sumd | |||
| RET | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs) | |||
| ;------------------------------------------------------------------------------ | |||
| %macro PABSD_MMX 2 ; src/dst, tmp | |||
| pxor %2, %2 | |||
| pcmpgtd %2, %1 | |||
| pxor %1, %2 | |||
| psubd %1, %2 | |||
| %endmacro | |||
| %macro PABSD_SSSE3 1-2 ; src/dst, unused | |||
| pabsd %1, %1 | |||
| %endmacro | |||
| %ifdef HAVE_AMD3DNOW | |||
| INIT_MMX | |||
| cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len | |||
| add expq, lenq | |||
| lea coefq, [coefq+4*lenq] | |||
| neg lenq | |||
| movq m3, [pd_1] | |||
| movq m4, [pd_151] | |||
| .loop: | |||
| movq m0, [coefq+4*lenq ] | |||
| movq m1, [coefq+4*lenq+8] | |||
| PABSD_MMX m0, m2 | |||
| PABSD_MMX m1, m2 | |||
| pslld m0, 1 | |||
| por m0, m3 | |||
| pi2fd m2, m0 | |||
| psrld m2, 23 | |||
| movq m0, m4 | |||
| psubd m0, m2 | |||
| pslld m1, 1 | |||
| por m1, m3 | |||
| pi2fd m2, m1 | |||
| psrld m2, 23 | |||
| movq m1, m4 | |||
| psubd m1, m2 | |||
| packssdw m0, m0 | |||
| packuswb m0, m0 | |||
| packssdw m1, m1 | |||
| packuswb m1, m1 | |||
| punpcklwd m0, m1 | |||
| movd [expq+lenq], m0 | |||
| add lenq, 4 | |||
| jl .loop | |||
| REP_RET | |||
| %endif | |||
| %macro AC3_EXTRACT_EXPONENTS 1 | |||
| cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len | |||
| add expq, lenq | |||
| lea coefq, [coefq+4*lenq] | |||
| neg lenq | |||
| mova m2, [pd_1] | |||
| mova m3, [pd_151] | |||
| %ifidn %1, ssse3 ; | |||
| movd m4, [pb_shuf_4dwb] | |||
| %endif | |||
| .loop: | |||
| ; move 4 32-bit coefs to xmm0 | |||
| mova m0, [coefq+4*lenq] | |||
| ; absolute value | |||
| PABSD m0, m1 | |||
| ; convert to float and extract exponents | |||
| pslld m0, 1 | |||
| por m0, m2 | |||
| cvtdq2ps m1, m0 | |||
| psrld m1, 23 | |||
| mova m0, m3 | |||
| psubd m0, m1 | |||
| ; move the lowest byte in each of 4 dwords to the low dword | |||
| %ifidn %1, ssse3 | |||
| pshufb m0, m4 | |||
| %else | |||
| packssdw m0, m0 | |||
| packuswb m0, m0 | |||
| %endif | |||
| movd [expq+lenq], m0 | |||
| add lenq, 4 | |||
| jl .loop | |||
| REP_RET | |||
| %endmacro | |||
| %ifdef HAVE_SSE | |||
| INIT_XMM | |||
| %define PABSD PABSD_MMX | |||
| AC3_EXTRACT_EXPONENTS sse2 | |||
| %ifdef HAVE_SSSE3 | |||
| %define PABSD PABSD_SSSE3 | |||
| AC3_EXTRACT_EXPONENTS ssse3 | |||
| %endif | |||
| %endif | |||
| @@ -44,6 +44,10 @@ extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned i | |||
| extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]); | |||
| extern void ff_ac3_extract_exponents_3dnow(uint8_t *exp, int32_t *coef, int nb_coefs); | |||
| extern void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs); | |||
| extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs); | |||
| av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) | |||
| { | |||
| int mm_flags = av_get_cpu_flags(); | |||
| @@ -56,6 +60,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) | |||
| c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx; | |||
| } | |||
| if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) { | |||
| c->extract_exponents = ff_ac3_extract_exponents_3dnow; | |||
| if (!bit_exact) { | |||
| c->float_to_fixed24 = ff_float_to_fixed24_3dnow; | |||
| } | |||
| @@ -72,6 +77,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) | |||
| c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2; | |||
| c->float_to_fixed24 = ff_float_to_fixed24_sse2; | |||
| c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2; | |||
| c->extract_exponents = ff_ac3_extract_exponents_sse2; | |||
| if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { | |||
| c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2; | |||
| c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; | |||
| @@ -79,6 +85,9 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) | |||
| } | |||
| if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) { | |||
| c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3; | |||
| if (!(mm_flags & AV_CPU_FLAG_ATOM)) { | |||
| c->extract_exponents = ff_ac3_extract_exponents_ssse3; | |||
| } | |||
| } | |||
| #endif | |||
| } | |||
| @@ -2333,6 +2333,15 @@ int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, i | |||
| float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); | |||
| void ff_vector_clip_int32_mmx (int32_t *dst, const int32_t *src, int32_t min, | |||
| int32_t max, unsigned int len); | |||
| void ff_vector_clip_int32_sse2 (int32_t *dst, const int32_t *src, int32_t min, | |||
| int32_t max, unsigned int len); | |||
| void ff_vector_clip_int32_sse2_int(int32_t *dst, const int32_t *src, int32_t min, | |||
| int32_t max, unsigned int len); | |||
| void ff_vector_clip_int32_sse41 (int32_t *dst, const int32_t *src, int32_t min, | |||
| int32_t max, unsigned int len); | |||
| void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||
| { | |||
| int mm_flags = av_get_cpu_flags(); | |||
| @@ -2473,6 +2482,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||
| c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx; | |||
| c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx; | |||
| c->vector_clip_int32 = ff_vector_clip_int32_mmx; | |||
| #endif | |||
| if (mm_flags & AV_CPU_FLAG_MMX2) { | |||
| @@ -2756,6 +2767,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||
| #if HAVE_YASM | |||
| c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; | |||
| c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2; | |||
| if (mm_flags & AV_CPU_FLAG_ATOM) { | |||
| c->vector_clip_int32 = ff_vector_clip_int32_sse2_int; | |||
| } else { | |||
| c->vector_clip_int32 = ff_vector_clip_int32_sse2; | |||
| } | |||
| if (avctx->flags & CODEC_FLAG_BITEXACT) { | |||
| c->apply_window_int16 = ff_apply_window_int16_sse2_ba; | |||
| } else { | |||
| @@ -2781,6 +2797,13 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||
| } | |||
| #endif | |||
| } | |||
| if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { | |||
| #if HAVE_YASM | |||
| c->vector_clip_int32 = ff_vector_clip_int32_sse41; | |||
| #endif | |||
| } | |||
| #if HAVE_AVX && HAVE_YASM | |||
| if (mm_flags & AV_CPU_FLAG_AVX) { | |||
| if (bit_depth == 10) { | |||
| @@ -1048,3 +1048,118 @@ emu_edge sse | |||
| %ifdef ARCH_X86_32 | |||
| emu_edge mmx | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, | |||
| ; int32_t max, unsigned int len) | |||
| ;----------------------------------------------------------------------------- | |||
| %macro PMINSD_MMX 3 ; dst, src, tmp | |||
| mova %3, %2 | |||
| pcmpgtd %3, %1 | |||
| pxor %1, %2 | |||
| pand %1, %3 | |||
| pxor %1, %2 | |||
| %endmacro | |||
| %macro PMAXSD_MMX 3 ; dst, src, tmp | |||
| mova %3, %1 | |||
| pcmpgtd %3, %2 | |||
| pand %1, %3 | |||
| pandn %3, %2 | |||
| por %1, %3 | |||
| %endmacro | |||
| %macro CLIPD_MMX 3-4 ; src/dst, min, max, tmp | |||
| PMINSD_MMX %1, %3, %4 | |||
| PMAXSD_MMX %1, %2, %4 | |||
| %endmacro | |||
| %macro CLIPD_SSE2 3-4 ; src/dst, min (float), max (float), unused | |||
| cvtdq2ps %1, %1 | |||
| minps %1, %3 | |||
| maxps %1, %2 | |||
| cvtps2dq %1, %1 | |||
| %endmacro | |||
| %macro CLIPD_SSE41 3-4 ; src/dst, min, max, unused | |||
| pminsd %1, %3 | |||
| pmaxsd %1, %2 | |||
| %endmacro | |||
| %macro SPLATD_MMX 1 | |||
| punpckldq %1, %1 | |||
| %endmacro | |||
| %macro SPLATD_SSE2 1 | |||
| pshufd %1, %1, 0 | |||
| %endmacro | |||
| %macro VECTOR_CLIP_INT32 4 | |||
| cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len | |||
| %ifidn %1, sse2 | |||
| cvtsi2ss m4, minm | |||
| cvtsi2ss m5, maxm | |||
| %else | |||
| movd m4, minm | |||
| movd m5, maxm | |||
| %endif | |||
| SPLATD m4 | |||
| SPLATD m5 | |||
| .loop: | |||
| %assign %%i 1 | |||
| %rep %3 | |||
| mova m0, [srcq+mmsize*0*%%i] | |||
| mova m1, [srcq+mmsize*1*%%i] | |||
| mova m2, [srcq+mmsize*2*%%i] | |||
| mova m3, [srcq+mmsize*3*%%i] | |||
| %if %4 | |||
| mova m7, [srcq+mmsize*4*%%i] | |||
| mova m8, [srcq+mmsize*5*%%i] | |||
| mova m9, [srcq+mmsize*6*%%i] | |||
| mova m10, [srcq+mmsize*7*%%i] | |||
| %endif | |||
| CLIPD m0, m4, m5, m6 | |||
| CLIPD m1, m4, m5, m6 | |||
| CLIPD m2, m4, m5, m6 | |||
| CLIPD m3, m4, m5, m6 | |||
| %if %4 | |||
| CLIPD m7, m4, m5, m6 | |||
| CLIPD m8, m4, m5, m6 | |||
| CLIPD m9, m4, m5, m6 | |||
| CLIPD m10, m4, m5, m6 | |||
| %endif | |||
| mova [dstq+mmsize*0*%%i], m0 | |||
| mova [dstq+mmsize*1*%%i], m1 | |||
| mova [dstq+mmsize*2*%%i], m2 | |||
| mova [dstq+mmsize*3*%%i], m3 | |||
| %if %4 | |||
| mova [dstq+mmsize*4*%%i], m7 | |||
| mova [dstq+mmsize*5*%%i], m8 | |||
| mova [dstq+mmsize*6*%%i], m9 | |||
| mova [dstq+mmsize*7*%%i], m10 | |||
| %endif | |||
| %assign %%i %%i+1 | |||
| %endrep | |||
| add srcq, mmsize*4*(%3+%4) | |||
| add dstq, mmsize*4*(%3+%4) | |||
| sub lend, mmsize*(%3+%4) | |||
| jg .loop | |||
| REP_RET | |||
| %endmacro | |||
| INIT_MMX | |||
| %define SPLATD SPLATD_MMX | |||
| %define CLIPD CLIPD_MMX | |||
| VECTOR_CLIP_INT32 mmx, 0, 1, 0 | |||
| INIT_XMM | |||
| %define SPLATD SPLATD_SSE2 | |||
| VECTOR_CLIP_INT32 sse2_int, 6, 1, 0 | |||
| %define CLIPD CLIPD_SSE2 | |||
| VECTOR_CLIP_INT32 sse2, 6, 2, 0 | |||
| %define CLIPD CLIPD_SSE41 | |||
| %ifdef m8 | |||
| VECTOR_CLIP_INT32 sse41, 11, 1, 1 | |||
| %else | |||
| VECTOR_CLIP_INT32 sse41, 6, 1, 0 | |||
| %endif | |||
| @@ -861,13 +861,137 @@ static int get_stream_idx(int *d){ | |||
| } | |||
| } | |||
| static int avi_read_packet(AVFormatContext *s, AVPacket *pkt) | |||
| static int avi_sync(AVFormatContext *s) | |||
| { | |||
| AVIContext *avi = s->priv_data; | |||
| AVIOContext *pb = s->pb; | |||
| int n, d[8]; | |||
| unsigned int size; | |||
| int64_t i, sync; | |||
| start_sync: | |||
| memset(d, -1, sizeof(int)*8); | |||
| for(i=sync=avio_tell(pb); !url_feof(pb); i++) { | |||
| int j; | |||
| for(j=0; j<7; j++) | |||
| d[j]= d[j+1]; | |||
| d[7]= avio_r8(pb); | |||
| size= d[4] + (d[5]<<8) + (d[6]<<16) + (d[7]<<24); | |||
| n= get_stream_idx(d+2); | |||
| //av_log(s, AV_LOG_DEBUG, "%X %X %X %X %X %X %X %X %"PRId64" %d %d\n", d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], i, size, n); | |||
| if(i + (uint64_t)size > avi->fsize || d[0]<0) | |||
| continue; | |||
| //parse ix## | |||
| if( (d[0] == 'i' && d[1] == 'x' && n < s->nb_streams) | |||
| //parse JUNK | |||
| ||(d[0] == 'J' && d[1] == 'U' && d[2] == 'N' && d[3] == 'K') | |||
| ||(d[0] == 'i' && d[1] == 'd' && d[2] == 'x' && d[3] == '1')){ | |||
| avio_skip(pb, size); | |||
| //av_log(s, AV_LOG_DEBUG, "SKIP\n"); | |||
| goto start_sync; | |||
| } | |||
| //parse stray LIST | |||
| if(d[0] == 'L' && d[1] == 'I' && d[2] == 'S' && d[3] == 'T'){ | |||
| avio_skip(pb, 4); | |||
| goto start_sync; | |||
| } | |||
| n= get_stream_idx(d); | |||
| if(!((i-avi->last_pkt_pos)&1) && get_stream_idx(d+1) < s->nb_streams) | |||
| continue; | |||
| //detect ##ix chunk and skip | |||
| if(d[2] == 'i' && d[3] == 'x' && n < s->nb_streams){ | |||
| avio_skip(pb, size); | |||
| goto start_sync; | |||
| } | |||
| //parse ##dc/##wb | |||
| if(n < s->nb_streams){ | |||
| AVStream *st; | |||
| AVIStream *ast; | |||
| st = s->streams[n]; | |||
| ast = st->priv_data; | |||
| if(s->nb_streams>=2){ | |||
| AVStream *st1 = s->streams[1]; | |||
| AVIStream *ast1= st1->priv_data; | |||
| //workaround for broken small-file-bug402.avi | |||
| if( d[2] == 'w' && d[3] == 'b' | |||
| && n==0 | |||
| && st ->codec->codec_type == AVMEDIA_TYPE_VIDEO | |||
| && st1->codec->codec_type == AVMEDIA_TYPE_AUDIO | |||
| && ast->prefix == 'd'*256+'c' | |||
| && (d[2]*256+d[3] == ast1->prefix || !ast1->prefix_count) | |||
| ){ | |||
| n=1; | |||
| st = st1; | |||
| ast = ast1; | |||
| av_log(s, AV_LOG_WARNING, "Invalid stream + prefix combination, assuming audio.\n"); | |||
| } | |||
| } | |||
| if( (st->discard >= AVDISCARD_DEFAULT && size==0) | |||
| /*|| (st->discard >= AVDISCARD_NONKEY && !(pkt->flags & AV_PKT_FLAG_KEY))*/ //FIXME needs a little reordering | |||
| || st->discard >= AVDISCARD_ALL){ | |||
| ast->frame_offset += get_duration(ast, size); | |||
| avio_skip(pb, size); | |||
| goto start_sync; | |||
| } | |||
| if (d[2] == 'p' && d[3] == 'c' && size<=4*256+4) { | |||
| int k = avio_r8(pb); | |||
| int last = (k + avio_r8(pb) - 1) & 0xFF; | |||
| avio_rl16(pb); //flags | |||
| for (; k <= last; k++) | |||
| ast->pal[k] = avio_rb32(pb)>>8;// b + (g << 8) + (r << 16); | |||
| ast->has_pal= 1; | |||
| goto start_sync; | |||
| } else if( ((ast->prefix_count<5 || sync+9 > i) && d[2]<128 && d[3]<128) || | |||
| d[2]*256+d[3] == ast->prefix /*|| | |||
| (d[2] == 'd' && d[3] == 'c') || | |||
| (d[2] == 'w' && d[3] == 'b')*/) { | |||
| //av_log(s, AV_LOG_DEBUG, "OK\n"); | |||
| if(d[2]*256+d[3] == ast->prefix) | |||
| ast->prefix_count++; | |||
| else{ | |||
| ast->prefix= d[2]*256+d[3]; | |||
| ast->prefix_count= 0; | |||
| } | |||
| avi->stream_index= n; | |||
| ast->packet_size= size + 8; | |||
| ast->remaining= size; | |||
| if(size || !ast->sample_size){ | |||
| uint64_t pos= avio_tell(pb) - 8; | |||
| if(!st->index_entries || !st->nb_index_entries || st->index_entries[st->nb_index_entries - 1].pos < pos){ | |||
| av_add_index_entry(st, pos, ast->frame_offset, size, 0, AVINDEX_KEYFRAME); | |||
| } | |||
| } | |||
| return 0; | |||
| } | |||
| } | |||
| } | |||
| return AVERROR_EOF; | |||
| } | |||
| static int avi_read_packet(AVFormatContext *s, AVPacket *pkt) | |||
| { | |||
| AVIContext *avi = s->priv_data; | |||
| AVIOContext *pb = s->pb; | |||
| int err; | |||
| void* dstr; | |||
| if (CONFIG_DV_DEMUXER && avi->dv_demux) { | |||
| @@ -1041,121 +1165,9 @@ resync: | |||
| return size; | |||
| } | |||
| memset(d, -1, sizeof(int)*8); | |||
| for(i=sync=avio_tell(pb); !url_feof(pb); i++) { | |||
| int j; | |||
| for(j=0; j<7; j++) | |||
| d[j]= d[j+1]; | |||
| d[7]= avio_r8(pb); | |||
| size= d[4] + (d[5]<<8) + (d[6]<<16) + (d[7]<<24); | |||
| n= get_stream_idx(d+2); | |||
| //av_log(s, AV_LOG_DEBUG, "%X %X %X %X %X %X %X %X %"PRId64" %d %d\n", d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], i, size, n); | |||
| if(i + (uint64_t)size > avi->fsize || d[0]<0) | |||
| continue; | |||
| //parse ix## | |||
| if( (d[0] == 'i' && d[1] == 'x' && n < s->nb_streams) | |||
| //parse JUNK | |||
| ||(d[0] == 'J' && d[1] == 'U' && d[2] == 'N' && d[3] == 'K') | |||
| ||(d[0] == 'i' && d[1] == 'd' && d[2] == 'x' && d[3] == '1')){ | |||
| avio_skip(pb, size); | |||
| //av_log(s, AV_LOG_DEBUG, "SKIP\n"); | |||
| goto resync; | |||
| } | |||
| //parse stray LIST | |||
| if(d[0] == 'L' && d[1] == 'I' && d[2] == 'S' && d[3] == 'T'){ | |||
| avio_skip(pb, 4); | |||
| goto resync; | |||
| } | |||
| n= get_stream_idx(d); | |||
| if(!((i-avi->last_pkt_pos)&1) && get_stream_idx(d+1) < s->nb_streams) | |||
| continue; | |||
| //detect ##ix chunk and skip | |||
| if(d[2] == 'i' && d[3] == 'x' && n < s->nb_streams){ | |||
| avio_skip(pb, size); | |||
| goto resync; | |||
| } | |||
| //parse ##dc/##wb | |||
| if(n < s->nb_streams){ | |||
| AVStream *st; | |||
| AVIStream *ast; | |||
| st = s->streams[n]; | |||
| ast = st->priv_data; | |||
| if(s->nb_streams>=2){ | |||
| AVStream *st1 = s->streams[1]; | |||
| AVIStream *ast1= st1->priv_data; | |||
| //workaround for broken small-file-bug402.avi | |||
| if( d[2] == 'w' && d[3] == 'b' | |||
| && n==0 | |||
| && st ->codec->codec_type == AVMEDIA_TYPE_VIDEO | |||
| && st1->codec->codec_type == AVMEDIA_TYPE_AUDIO | |||
| && ast->prefix == 'd'*256+'c' | |||
| && (d[2]*256+d[3] == ast1->prefix || !ast1->prefix_count) | |||
| ){ | |||
| n=1; | |||
| st = st1; | |||
| ast = ast1; | |||
| av_log(s, AV_LOG_WARNING, "Invalid stream + prefix combination, assuming audio.\n"); | |||
| } | |||
| } | |||
| if( (st->discard >= AVDISCARD_DEFAULT && size==0) | |||
| /*|| (st->discard >= AVDISCARD_NONKEY && !(pkt->flags & AV_PKT_FLAG_KEY))*/ //FIXME needs a little reordering | |||
| || st->discard >= AVDISCARD_ALL){ | |||
| ast->frame_offset += get_duration(ast, size); | |||
| avio_skip(pb, size); | |||
| goto resync; | |||
| } | |||
| if (d[2] == 'p' && d[3] == 'c' && size<=4*256+4) { | |||
| int k = avio_r8(pb); | |||
| int last = (k + avio_r8(pb) - 1) & 0xFF; | |||
| avio_rl16(pb); //flags | |||
| for (; k <= last; k++) | |||
| ast->pal[k] = avio_rb32(pb)>>8;// b + (g << 8) + (r << 16); | |||
| ast->has_pal= 1; | |||
| goto resync; | |||
| } else if( ((ast->prefix_count<5 || sync+9 > i) && d[2]<128 && d[3]<128) || | |||
| d[2]*256+d[3] == ast->prefix /*|| | |||
| (d[2] == 'd' && d[3] == 'c') || | |||
| (d[2] == 'w' && d[3] == 'b')*/) { | |||
| //av_log(s, AV_LOG_DEBUG, "OK\n"); | |||
| if(d[2]*256+d[3] == ast->prefix) | |||
| ast->prefix_count++; | |||
| else{ | |||
| ast->prefix= d[2]*256+d[3]; | |||
| ast->prefix_count= 0; | |||
| } | |||
| avi->stream_index= n; | |||
| ast->packet_size= size + 8; | |||
| ast->remaining= size; | |||
| if(size || !ast->sample_size){ | |||
| uint64_t pos= avio_tell(pb) - 8; | |||
| if(!st->index_entries || !st->nb_index_entries || st->index_entries[st->nb_index_entries - 1].pos < pos){ | |||
| av_add_index_entry(st, pos, ast->frame_offset, size, 0, AVINDEX_KEYFRAME); | |||
| } | |||
| } | |||
| goto resync; | |||
| } | |||
| } | |||
| } | |||
| return AVERROR_EOF; | |||
| if ((err = avi_sync(s)) < 0) | |||
| return err; | |||
| goto resync; | |||
| } | |||
| /* XXX: We make the implicit supposition that the positions are sorted | |||
| @@ -1960,6 +1960,7 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index, | |||
| if ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) { | |||
| avio_seek(s->pb, st->index_entries[st->nb_index_entries-1].pos, SEEK_SET); | |||
| matroska->current_id = 0; | |||
| while ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) { | |||
| matroska_clear_queue(matroska); | |||
| if (matroska_parse_cluster(matroska) < 0) | |||
| @@ -1988,6 +1989,7 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index, | |||
| } | |||
| avio_seek(s->pb, st->index_entries[index_min].pos, SEEK_SET); | |||
| matroska->current_id = 0; | |||
| matroska->skip_to_keyframe = !(flags & AVSEEK_FLAG_ANY); | |||
| matroska->skip_to_timecode = st->index_entries[index].timestamp; | |||
| matroska->done = 0; | |||
| @@ -18,7 +18,8 @@ | |||
| */ | |||
| /** | |||
| * @file Public dictionary API. | |||
| * @file | |||
| * Public dictionary API. | |||
| * @deprecated | |||
| * AVDictionary is provided for compatibility with libav. It is both in | |||
| * implementation as well as API inefficient. It does not scale and is | |||
| @@ -22,7 +22,8 @@ | |||
| #include "avutil.h" | |||
| /** | |||
| * @file misc file utilities | |||
| * @file | |||
| * Misc file utilities. | |||
| */ | |||
| /** | |||
| @@ -1783,53 +1783,6 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, | |||
| #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) | |||
| // FIXME Maybe dither instead. | |||
| static av_always_inline void | |||
| yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV, | |||
| const uint8_t *_srcU, const uint8_t *_srcV, | |||
| int width, enum PixelFormat origin, int depth) | |||
| { | |||
| int i; | |||
| const uint16_t *srcU = (const uint16_t *) _srcU; | |||
| const uint16_t *srcV = (const uint16_t *) _srcV; | |||
| for (i = 0; i < width; i++) { | |||
| dstU[i] = input_pixel(&srcU[i]) >> (depth - 8); | |||
| dstV[i] = input_pixel(&srcV[i]) >> (depth - 8); | |||
| } | |||
| } | |||
| static av_always_inline void | |||
| yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY, | |||
| int width, enum PixelFormat origin, int depth) | |||
| { | |||
| int i; | |||
| const uint16_t *srcY = (const uint16_t*)_srcY; | |||
| for (i = 0; i < width; i++) | |||
| dstY[i] = input_pixel(&srcY[i]) >> (depth - 8); | |||
| } | |||
| #undef input_pixel | |||
| #define YUV_NBPS(depth, BE_LE, origin) \ | |||
| static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ | |||
| const uint8_t *srcU, const uint8_t *srcV, \ | |||
| int width, uint32_t *unused) \ | |||
| { \ | |||
| yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \ | |||
| } \ | |||
| static void BE_LE ## depth ## ToY_c(uint8_t *dstY, const uint8_t *srcY, \ | |||
| int width, uint32_t *unused) \ | |||
| { \ | |||
| yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \ | |||
| } | |||
| YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE); | |||
| YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE); | |||
| YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE); | |||
| YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE); | |||
| static void bgr24ToY_c(int16_t *dst, const uint8_t *src, | |||
| int width, uint32_t *unused) | |||
| { | |||
| @@ -2,10 +2,22 @@ FATE_AAC += fate-aac-al04_44 | |||
| fate-aac-al04_44: CMD = pcm -i $(SAMPLES)/aac/al04_44.mp4 | |||
| fate-aac-al04_44: REF = $(SAMPLES)/aac/al04_44.s16 | |||
| FATE_AAC += fate-aac-al05_44 | |||
| fate-aac-al05_44: CMD = pcm -i $(SAMPLES)/aac/al05_44.mp4 | |||
| fate-aac-al05_44: REF = $(SAMPLES)/aac/al05_44.s16 | |||
| FATE_AAC += fate-aac-al06_44 | |||
| fate-aac-al06_44: CMD = pcm -i $(SAMPLES)/aac/al06_44.mp4 | |||
| fate-aac-al06_44: REF = $(SAMPLES)/aac/al06_44.s16 | |||
| FATE_AAC += fate-aac-al07_96 | |||
| fate-aac-al07_96: CMD = pcm -i $(SAMPLES)/aac/al07_96.mp4 | |||
| fate-aac-al07_96: REF = $(SAMPLES)/aac/al07_96.s16 | |||
| FATE_AAC += fate-aac-al17_44 | |||
| fate-aac-al17_44: CMD = pcm -i $(SAMPLES)/aac/al17_44.mp4 | |||
| fate-aac-al17_44: REF = $(SAMPLES)/aac/al17_44.s16 | |||
| FATE_AAC += fate-aac-am00_88 | |||
| fate-aac-am00_88: CMD = pcm -i $(SAMPLES)/aac/am00_88.mp4 | |||
| fate-aac-am00_88: REF = $(SAMPLES)/aac/am00_88.s16 | |||