Originally committed as revision 11122 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -325,15 +325,15 @@ static void predictor_decompress_fir_adapt(int32_t *error_buffer, | |||||
| #if 0 | #if 0 | ||||
| /* 4 and 8 are very common cases (the only ones i've seen). these | /* 4 and 8 are very common cases (the only ones i've seen). these | ||||
| * should be unrolled and optimised | |||||
| * should be unrolled and optimized | |||||
| */ | */ | ||||
| if (predictor_coef_num == 4) { | if (predictor_coef_num == 4) { | ||||
| /* FIXME: optimised general case */ | |||||
| /* FIXME: optimized general case */ | |||||
| return; | return; | ||||
| } | } | ||||
| if (predictor_coef_table == 8) { | if (predictor_coef_table == 8) { | ||||
| /* FIXME: optimised general case */ | |||||
| /* FIXME: optimized general case */ | |||||
| return; | return; | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -490,7 +490,7 @@ static void init_entropy_decoder(APEContext * ctx) | |||||
| /* Keep a count of the blocks decoded in this frame */ | /* Keep a count of the blocks decoded in this frame */ | ||||
| ctx->blocksdecoded = 0; | ctx->blocksdecoded = 0; | ||||
| /* Initialise the rice structs */ | |||||
| /* Initialize the rice structs */ | |||||
| ctx->riceX.k = 10; | ctx->riceX.k = 10; | ||||
| ctx->riceX.ksum = (1 << ctx->riceX.k) * 16; | ctx->riceX.ksum = (1 << ctx->riceX.k) * 16; | ||||
| ctx->riceY.k = 10; | ctx->riceY.k = 10; | ||||
| @@ -514,7 +514,7 @@ static void init_predictor_decoder(APEContext * ctx) | |||||
| memset(p->historybuffer, 0, PREDICTOR_SIZE * sizeof(int32_t)); | memset(p->historybuffer, 0, PREDICTOR_SIZE * sizeof(int32_t)); | ||||
| p->buf = p->historybuffer; | p->buf = p->historybuffer; | ||||
| /* Initialise and zero the co-efficients */ | |||||
| /* Initialize and zero the co-efficients */ | |||||
| memcpy(p->coeffsA[0], initial_coeffs, sizeof(initial_coeffs)); | memcpy(p->coeffsA[0], initial_coeffs, sizeof(initial_coeffs)); | ||||
| memcpy(p->coeffsA[1], initial_coeffs, sizeof(initial_coeffs)); | memcpy(p->coeffsA[1], initial_coeffs, sizeof(initial_coeffs)); | ||||
| memset(p->coeffsB, 0, sizeof(p->coeffsB)); | memset(p->coeffsB, 0, sizeof(p->coeffsB)); | ||||
| @@ -28,7 +28,7 @@ extern void MPV_common_init_armv5te(MpegEncContext *s); | |||||
| void MPV_common_init_armv4l(MpegEncContext *s) | void MPV_common_init_armv4l(MpegEncContext *s) | ||||
| { | { | ||||
| /* IWMMXT support is a superset of armv5te, so | /* IWMMXT support is a superset of armv5te, so | ||||
| * allow optimised functions for armv5te unless | |||||
| * allow optimized functions for armv5te unless | |||||
| * a better iwmmxt function exists | * a better iwmmxt function exists | ||||
| */ | */ | ||||
| #ifdef HAVE_ARMV5TE | #ifdef HAVE_ARMV5TE | ||||
| @@ -702,7 +702,7 @@ assert(s->current_picture.pict_type == s->pict_type); | |||||
| } | } | ||||
| /* Return the Picture timestamp as the frame number */ | /* Return the Picture timestamp as the frame number */ | ||||
| /* we substract 1 because it is added on utils.c */ | |||||
| /* we subtract 1 because it is added on utils.c */ | |||||
| avctx->frame_number = s->picture_number - 1; | avctx->frame_number = s->picture_number - 1; | ||||
| #ifdef PRINT_FRAME_TIME | #ifdef PRINT_FRAME_TIME | ||||
| @@ -7850,7 +7850,7 @@ static int decode_frame(AVCodecContext *avctx, | |||||
| #if 0 //? | #if 0 //? | ||||
| /* Return the Picture timestamp as the frame number */ | /* Return the Picture timestamp as the frame number */ | ||||
| /* we substract 1 because it is added on utils.c */ | |||||
| /* we subtract 1 because it is added on utils.c */ | |||||
| avctx->frame_number = s->picture_number - 1; | avctx->frame_number = s->picture_number - 1; | ||||
| #endif | #endif | ||||
| return get_consumed_bytes(s, buf_index, buf_size); | return get_consumed_bytes(s, buf_index, buf_size); | ||||
| @@ -2,7 +2,7 @@ | |||||
| * Chinese AVS video (AVS1-P2, JiZhun profile) decoder. | * Chinese AVS video (AVS1-P2, JiZhun profile) decoder. | ||||
| * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de> | * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de> | ||||
| * | * | ||||
| * MMX optimised DSP functions, based on H.264 optimisations by | |||||
| * MMX optimized DSP functions, based on H.264 optimizations by | |||||
| * Michael Niedermayer and Loren Merritt | * Michael Niedermayer and Loren Merritt | ||||
| * | * | ||||
| * This file is part of FFmpeg. | * This file is part of FFmpeg. | ||||
| @@ -787,7 +787,7 @@ static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int | |||||
| "movq (%1,%3),%%mm4\n" /* mm4 = pix2[1][0-7] */ | "movq (%1,%3),%%mm4\n" /* mm4 = pix2[1][0-7] */ | ||||
| /* todo: mm1-mm2, mm3-mm4 */ | /* todo: mm1-mm2, mm3-mm4 */ | ||||
| /* algo: substract mm1 from mm2 with saturation and vice versa */ | |||||
| /* algo: subtract mm1 from mm2 with saturation and vice versa */ | |||||
| /* OR the results to get absolute difference */ | /* OR the results to get absolute difference */ | ||||
| "movq %%mm1,%%mm5\n" | "movq %%mm1,%%mm5\n" | ||||
| "movq %%mm3,%%mm6\n" | "movq %%mm3,%%mm6\n" | ||||
| @@ -847,7 +847,7 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int | |||||
| "movq 8(%1),%%mm4\n" /* mm4 = pix2[8-15] */ | "movq 8(%1),%%mm4\n" /* mm4 = pix2[8-15] */ | ||||
| /* todo: mm1-mm2, mm3-mm4 */ | /* todo: mm1-mm2, mm3-mm4 */ | ||||
| /* algo: substract mm1 from mm2 with saturation and vice versa */ | |||||
| /* algo: subtract mm1 from mm2 with saturation and vice versa */ | |||||
| /* OR the results to get absolute difference */ | /* OR the results to get absolute difference */ | ||||
| "movq %%mm1,%%mm5\n" | "movq %%mm1,%%mm5\n" | ||||
| "movq %%mm3,%%mm6\n" | "movq %%mm3,%%mm6\n" | ||||
| @@ -907,7 +907,7 @@ static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in | |||||
| "movdqu (%1,%4),%%xmm4\n" /* mm4 = pix2[1][0-15] */ | "movdqu (%1,%4),%%xmm4\n" /* mm4 = pix2[1][0-15] */ | ||||
| /* todo: mm1-mm2, mm3-mm4 */ | /* todo: mm1-mm2, mm3-mm4 */ | ||||
| /* algo: substract mm1 from mm2 with saturation and vice versa */ | |||||
| /* algo: subtract mm1 from mm2 with saturation and vice versa */ | |||||
| /* OR the results to get absolute difference */ | /* OR the results to get absolute difference */ | ||||
| "movdqa %%xmm1,%%xmm5\n" | "movdqa %%xmm1,%%xmm5\n" | ||||
| "movdqa %%xmm3,%%xmm6\n" | "movdqa %%xmm3,%%xmm6\n" | ||||
| @@ -397,7 +397,7 @@ static inline void idct_col (int16_t * col, int offset) | |||||
| static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; | static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; | ||||
| static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; | static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; | ||||
| /* column code adapted from peter gubanov */ | |||||
| /* column code adapted from Peter Gubanov */ | |||||
| /* http://www.elecard.com/peter/idct.shtml */ | /* http://www.elecard.com/peter/idct.shtml */ | ||||
| movq_m2r (*_T1, mm0); // mm0 = T1 | movq_m2r (*_T1, mm0); // mm0 = T1 | ||||
| @@ -177,7 +177,7 @@ asm volatile( | |||||
| if (level < -2048 || level > 2047) | if (level < -2048 || level > 2047) | ||||
| fprintf(stderr, "unquant error %d %d\n", i, level); | fprintf(stderr, "unquant error %d %d\n", i, level); | ||||
| #endif | #endif | ||||
| We can suppose that result of two multiplications can't be greate of 0xFFFF | |||||
| We can suppose that result of two multiplications can't be greater than 0xFFFF | |||||
| i.e. is 16-bit, so we use here only PMULLW instruction and can avoid | i.e. is 16-bit, so we use here only PMULLW instruction and can avoid | ||||
| a complex multiplication. | a complex multiplication. | ||||
| ===================================================== | ===================================================== | ||||
| @@ -1935,7 +1935,7 @@ typedef struct ConvertEntry { | |||||
| - PIX_FMT_422 must convert to and from PIX_FMT_422P. | - PIX_FMT_422 must convert to and from PIX_FMT_422P. | ||||
| The other conversion functions are just optimisations for common cases. | |||||
| The other conversion functions are just optimizations for common cases. | |||||
| */ | */ | ||||
| static const ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = { | static const ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = { | ||||
| [PIX_FMT_YUV420P] = { | [PIX_FMT_YUV420P] = { | ||||
| @@ -414,7 +414,7 @@ static void glue(pal8_to_, RGB_NAME)(AVPicture *dst, const AVPicture *src, | |||||
| } | } | ||||
| } | } | ||||
| // RGB24 has optimised routines | |||||
| // RGB24 has optimized routines | |||||
| #if !defined(FMT_RGB32) && !defined(FMT_RGB24) | #if !defined(FMT_RGB32) && !defined(FMT_RGB24) | ||||
| /* alpha support */ | /* alpha support */ | ||||
| @@ -285,7 +285,7 @@ static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, | |||||
| } | } | ||||
| #endif /* HAVE_MMX */ | #endif /* HAVE_MMX */ | ||||
| /* slow version to handle limit cases. Does not need optimisation */ | |||||
| /* slow version to handle limit cases. Does not need optimization */ | |||||
| static void h_resample_slow(uint8_t *dst, int dst_width, | static void h_resample_slow(uint8_t *dst, int dst_width, | ||||
| const uint8_t *src, int src_width, | const uint8_t *src, int src_width, | ||||
| int src_start, int src_incr, int16_t *filters) | int src_start, int src_incr, int16_t *filters) | ||||
| @@ -112,7 +112,7 @@ void test_motion(const char *name, | |||||
| } | } | ||||
| } | } | ||||
| emms_c(); | emms_c(); | ||||
| dummy = d1; /* avoid optimisation */ | |||||
| dummy = d1; /* avoid optimization */ | |||||
| ti = gettime() - ti; | ti = gettime() - ti; | ||||
| printf(" %0.0f kop/s\n", | printf(" %0.0f kop/s\n", | ||||
| @@ -94,7 +94,7 @@ typedef struct ScanTable{ | |||||
| uint8_t permutated[64]; | uint8_t permutated[64]; | ||||
| uint8_t raster_end[64]; | uint8_t raster_end[64]; | ||||
| #ifdef ARCH_POWERPC | #ifdef ARCH_POWERPC | ||||
| /** Used by dct_quantise_alitvec to find last-non-zero */ | |||||
| /** Used by dct_quantize_alitvec to find last-non-zero */ | |||||
| DECLARE_ALIGNED_8(uint8_t, inverse[64]); | DECLARE_ALIGNED_8(uint8_t, inverse[64]); | ||||
| #endif | #endif | ||||
| } ScanTable; | } ScanTable; | ||||
| @@ -1094,7 +1094,7 @@ POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); | |||||
| dstV = \ | dstV = \ | ||||
| (vector signed short)vec_mergeh((vector signed char)vzero, \ | (vector signed short)vec_mergeh((vector signed char)vzero, \ | ||||
| (vector signed char)dstO); \ | (vector signed char)dstO); \ | ||||
| /* substractions inside the first butterfly */ \ | |||||
| /* subtractions inside the first butterfly */ \ | |||||
| but0 = vec_sub(srcV, dstV); \ | but0 = vec_sub(srcV, dstV); \ | ||||
| op1 = vec_perm(but0, but0, perm1); \ | op1 = vec_perm(but0, but0, perm1); \ | ||||
| but1 = vec_mladd(but0, vprod1, op1); \ | but1 = vec_mladd(but0, vprod1, op1); \ | ||||
| @@ -1175,7 +1175,7 @@ POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff8x8_num, 1); | |||||
| schedule for the 7450, and its code isn't much faster than | schedule for the 7450, and its code isn't much faster than | ||||
| gcc-3.3 on the 7450 (but uses 25% less instructions...) | gcc-3.3 on the 7450 (but uses 25% less instructions...) | ||||
| On the 970, the hand-made RA is still a win (arount 690 | |||||
| On the 970, the hand-made RA is still a win (around 690 | |||||
| vs. around 780), but xlc goes to around 660 on the | vs. around 780), but xlc goes to around 660 on the | ||||
| regular C code... | regular C code... | ||||
| */ | */ | ||||
| @@ -1267,7 +1267,7 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, | |||||
| dstW = \ | dstW = \ | ||||
| (vector signed short)vec_mergel((vector signed char)vzero, \ | (vector signed short)vec_mergel((vector signed char)vzero, \ | ||||
| (vector signed char)dstO); \ | (vector signed char)dstO); \ | ||||
| /* substractions inside the first butterfly */ \ | |||||
| /* subtractions inside the first butterfly */ \ | |||||
| but0 = vec_sub(srcV, dstV); \ | but0 = vec_sub(srcV, dstV); \ | ||||
| but0S = vec_sub(srcW, dstW); \ | but0S = vec_sub(srcW, dstW); \ | ||||
| op1 = vec_perm(but0, but0, perm1); \ | op1 = vec_perm(but0, but0, perm1); \ | ||||
| @@ -29,7 +29,7 @@ | |||||
| /* | /* | ||||
| altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8, | altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8, | ||||
| to preserve proper dst alignement. | |||||
| to preserve proper dst alignment. | |||||
| */ | */ | ||||
| #define GMC1_PERF_COND (h==8) | #define GMC1_PERF_COND (h==8) | ||||
| void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder) | void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder) | ||||
| @@ -80,7 +80,7 @@ int dct_quantize_altivec(MpegEncContext* s, | |||||
| vector float row0, row1, row2, row3, row4, row5, row6, row7; | vector float row0, row1, row2, row3, row4, row5, row6, row7; | ||||
| vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7; | vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7; | ||||
| const vector float zero = (const vector float)FOUROF(0.); | const vector float zero = (const vector float)FOUROF(0.); | ||||
| // used after quantise step | |||||
| // used after quantize step | |||||
| int oldBaseValue = 0; | int oldBaseValue = 0; | ||||
| // Load the data into the row/alt vectors | // Load the data into the row/alt vectors | ||||
| @@ -258,7 +258,7 @@ int dct_quantize_altivec(MpegEncContext* s, | |||||
| } | } | ||||
| } | } | ||||
| // perform the quantise step, using the floating point data | |||||
| // perform the quantize step, using the floating point data | |||||
| // still in the row/alt registers | // still in the row/alt registers | ||||
| { | { | ||||
| const int* biasAddr; | const int* biasAddr; | ||||
| @@ -474,7 +474,7 @@ int dct_quantize_altivec(MpegEncContext* s, | |||||
| data[0] = (oldBaseValue + 4) >> 3; | data[0] = (oldBaseValue + 4) >> 3; | ||||
| } | } | ||||
| // We handled the tranpose permutation above and we don't | |||||
| // We handled the transpose permutation above and we don't | |||||
| // need to permute the "no" permutation case. | // need to permute the "no" permutation case. | ||||
| if ((lastNonZero > 0) && | if ((lastNonZero > 0) && | ||||
| (s->dsp.idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) && | (s->dsp.idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) && | ||||
| @@ -3999,7 +3999,7 @@ static int vis_level () | |||||
| /* libavcodec initialization code */ | /* libavcodec initialization code */ | ||||
| void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx) | void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx) | ||||
| { | { | ||||
| /* VIS specific optimisations */ | |||||
| /* VIS specific optimizations */ | |||||
| int accel = vis_level (); | int accel = vis_level (); | ||||
| if (accel & ACCEL_SPARC_VIS) { | if (accel & ACCEL_SPARC_VIS) { | ||||
| @@ -4102,7 +4102,7 @@ assert(s->current_picture.pict_type == s->pict_type); | |||||
| } | } | ||||
| /* Return the Picture timestamp as the frame number */ | /* Return the Picture timestamp as the frame number */ | ||||
| /* we substract 1 because it is added on utils.c */ | |||||
| /* we subtract 1 because it is added on utils.c */ | |||||
| avctx->frame_number = s->picture_number - 1; | avctx->frame_number = s->picture_number - 1; | ||||
| av_free(buf2); | av_free(buf2); | ||||
| @@ -281,7 +281,7 @@ static int audio_read_packet(AVFormatContext *s1, AVPacket *pkt) | |||||
| if (ioctl(s->fd, SNDCTL_DSP_GETISPACE, &abufi) == 0) { | if (ioctl(s->fd, SNDCTL_DSP_GETISPACE, &abufi) == 0) { | ||||
| bdelay += abufi.bytes; | bdelay += abufi.bytes; | ||||
| } | } | ||||
| /* substract time represented by the number of bytes in the audio fifo */ | |||||
| /* subtract time represented by the number of bytes in the audio fifo */ | |||||
| cur_time -= (bdelay * 1000000LL) / (s->sample_rate * s->channels); | cur_time -= (bdelay * 1000000LL) / (s->sample_rate * s->channels); | ||||
| /* convert to wanted units */ | /* convert to wanted units */ | ||||
| @@ -56,7 +56,7 @@ typedef struct { | |||||
| uint64_t send_time; /**< time to send file, in 100-nanosecond units | uint64_t send_time; /**< time to send file, in 100-nanosecond units | ||||
| * invalid if broadcasting (could be ignored) */ | * invalid if broadcasting (could be ignored) */ | ||||
| uint32_t preroll; /**< timestamp of the first packet, in milliseconds | uint32_t preroll; /**< timestamp of the first packet, in milliseconds | ||||
| * if nonzero - substract from time */ | |||||
| * if nonzero - subtract from time */ | |||||
| uint32_t ignore; ///< preroll is 64bit - but let's just ignore it | uint32_t ignore; ///< preroll is 64bit - but let's just ignore it | ||||
| uint32_t flags; /**< 0x01 - broadcast | uint32_t flags; /**< 0x01 - broadcast | ||||
| * 0x02 - seekable | * 0x02 - seekable | ||||
| @@ -194,7 +194,7 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) | |||||
| One could remove the recomputation of the perm | One could remove the recomputation of the perm | ||||
| vector by assuming (stride % 16) == 0, unfortunately | vector by assuming (stride % 16) == 0, unfortunately | ||||
| this is not always true. Quite a lot of load/stores | this is not always true. Quite a lot of load/stores | ||||
| can be removed by assuming proper alignement of | |||||
| can be removed by assuming proper alignment of | |||||
| src & stride :-( | src & stride :-( | ||||
| */ | */ | ||||
| uint8_t *src2 = src; | uint8_t *src2 = src; | ||||
| @@ -382,7 +382,7 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext | |||||
| One could remove the recomputation of the perm | One could remove the recomputation of the perm | ||||
| vector by assuming (stride % 16) == 0, unfortunately | vector by assuming (stride % 16) == 0, unfortunately | ||||
| this is not always true. Quite a lot of load/stores | this is not always true. Quite a lot of load/stores | ||||
| can be removed by assuming proper alignement of | |||||
| can be removed by assuming proper alignment of | |||||
| src & stride :-( | src & stride :-( | ||||
| */ | */ | ||||
| uint8_t *src2 = src; | uint8_t *src2 = src; | ||||
| @@ -469,7 +469,7 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext | |||||
| const vector signed short dornotd = vec_sel((vector signed short)zero, | const vector signed short dornotd = vec_sel((vector signed short)zero, | ||||
| dclampedfinal, | dclampedfinal, | ||||
| vec_cmplt(absmE, vqp)); | vec_cmplt(absmE, vqp)); | ||||
| /* add/substract to l4 and l5 */ | |||||
| /* add/subtract to l4 and l5 */ | |||||
| const vector signed short vb4minusd = vec_sub(vb4, dornotd); | const vector signed short vb4minusd = vec_sub(vb4, dornotd); | ||||
| const vector signed short vb5plusd = vec_add(vb5, dornotd); | const vector signed short vb5plusd = vec_add(vb5, dornotd); | ||||
| /* finally, stores */ | /* finally, stores */ | ||||
| @@ -506,7 +506,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||||
| One could remove the recomputation of the perm | One could remove the recomputation of the perm | ||||
| vector by assuming (stride % 16) == 0, unfortunately | vector by assuming (stride % 16) == 0, unfortunately | ||||
| this is not always true. Quite a lot of load/stores | this is not always true. Quite a lot of load/stores | ||||
| can be removed by assuming proper alignement of | |||||
| can be removed by assuming proper alignment of | |||||
| src & stride :-( | src & stride :-( | ||||
| */ | */ | ||||
| uint8_t *srcCopy = src; | uint8_t *srcCopy = src; | ||||
| @@ -506,7 +506,7 @@ void Process(void *ctx, AVPicture *picture, enum PixelFormat pix_fmt, int width, | |||||
| if ( | if ( | ||||
| ( (c == '_') && (text == ci->text) ) || /* skip '_' (consider as space) | ( (c == '_') && (text == ci->text) ) || /* skip '_' (consider as space) | ||||
| IF text was specified in cmd line | IF text was specified in cmd line | ||||
| (which doesn't like neasted quotes) */ | |||||
| (which doesn't like nested quotes) */ | |||||
| ( c == '\n' ) /* Skip new line char, just go to new line */ | ( c == '\n' ) /* Skip new line char, just go to new line */ | ||||
| ) | ) | ||||
| continue; | continue; | ||||