Originally committed as revision 14 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -0,0 +1,92 @@ | |||
| #ifndef __BSWAP_H__ | |||
| #define __BSWAP_H__ | |||
| #ifdef HAVE_CONFIG_H | |||
| #include "config.h" | |||
| #endif | |||
| #ifdef HAVE_BYTESWAP_H | |||
| #include <byteswap.h> | |||
| #else | |||
| #include <inttypes.h> | |||
| #ifdef ARCH_X86 | |||
| inline static unsigned short ByteSwap16(unsigned short x) | |||
| { | |||
| __asm("xchgb %b0,%h0" : | |||
| "=q" (x) : | |||
| "0" (x)); | |||
| return x; | |||
| } | |||
| #define bswap_16(x) ByteSwap16(x) | |||
| inline static unsigned int ByteSwap32(unsigned int x) | |||
| { | |||
| #if __CPU__ > 386 | |||
| __asm("bswap %0": | |||
| "=r" (x) : | |||
| #else | |||
| __asm("xchgb %b0,%h0\n" | |||
| " rorl $16,%0\n" | |||
| " xchgb %b0,%h0": | |||
| "=q" (x) : | |||
| #endif | |||
| "0" (x)); | |||
| return x; | |||
| } | |||
| #define bswap_32(x) ByteSwap32(x) | |||
| inline static unsigned long long int ByteSwap64(unsigned long long int x) | |||
| { | |||
| register union { __extension__ unsigned long long int __ll; | |||
| unsigned long int __l[2]; } __x; | |||
| asm("xchgl %0,%1": | |||
| "=r"(__x.__l[0]),"=r"(__x.__l[1]): | |||
| "0"(bswap_32((unsigned long)x)),"1"(bswap_32((unsigned long)(x>>32)))); | |||
| return __x.__ll; | |||
| } | |||
| #define bswap_64(x) ByteSwap64(x) | |||
| #else | |||
| #define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8) | |||
| // code from bits/byteswap.h (C) 1997, 1998 Free Software Foundation, Inc. | |||
| #define bswap_32(x) \ | |||
| ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \ | |||
| (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24)) | |||
| #define bswap_64(x) \ | |||
| (__extension__ \ | |||
| ({ union { __extension__ unsigned long long int __ll; \ | |||
| unsigned long int __l[2]; } __w, __r; \ | |||
| __w.__ll = (x); \ | |||
| __r.__l[0] = bswap_32 (__w.__l[1]); \ | |||
| __r.__l[1] = bswap_32 (__w.__l[0]); \ | |||
| __r.__ll; })) | |||
| #endif /* !ARCH_X86 */ | |||
| #endif /* !HAVE_BYTESWAP_H */ | |||
| // be2me ... BigEndian to MachineEndian | |||
| // le2me ... LittleEndian to MachineEndian | |||
| #ifdef WORDS_BIGENDIAN | |||
| #define be2me_16(x) (x) | |||
| #define be2me_32(x) (x) | |||
| #define be2me_64(x) (x) | |||
| #define le2me_16(x) bswap_16(x) | |||
| #define le2me_32(x) bswap_32(x) | |||
| #define le2me_64(x) bswap_64(x) | |||
| #else | |||
| #define be2me_16(x) bswap_16(x) | |||
| #define be2me_32(x) bswap_32(x) | |||
| #define be2me_64(x) bswap_64(x) | |||
| #define le2me_16(x) (x) | |||
| #define le2me_32(x) (x) | |||
| #define le2me_64(x) (x) | |||
| #endif | |||
| #endif | |||
| @@ -58,10 +58,15 @@ echo "Creating config.mak and config.h" | |||
| echo "# Automatically generated by configure - do not modify" > config.mak | |||
| echo "/* Automatically generated by configure - do not modify */" > config.h | |||
| # Checking for CFLAGS | |||
| if test -z "$CFLAGS"; then | |||
| CFLAGS="-O2" | |||
| fi | |||
| echo "prefix=$prefix" >> config.mak | |||
| echo "CC=$cc" >> config.mak | |||
| echo "AR=$ar" >> config.mak | |||
| echo "OPTFLAGS=-O2" >> config.mak | |||
| echo "OPTFLAGS=$CFLAGS" >> config.mak | |||
| if [ "$cpu" = "x86" ] ; then | |||
| echo "TARGET_ARCH_X86=yes" >> config.mak | |||
| echo "#define ARCH_X86 1" >> config.h | |||
| @@ -74,6 +79,7 @@ if [ "$gprof" = "yes" ] ; then | |||
| echo "TARGET_GPROF=yes" >> config.mak | |||
| echo "#define HAVE_GPROF 1" >> config.h | |||
| fi | |||
| echo "#define BIN_PORTABILITY 1 /*undefine it if you want to get maximal performance*/" >> config.h | |||
| # if you do not want to use encoders, disable that. | |||
| echo "#define CONFIG_ENCODERS 1" >> config.h | |||
| @@ -1,6 +1,6 @@ | |||
| include ../config.mak | |||
| CFLAGS= $(OPTFLAGS) -Wall -g | |||
| CFLAGS= $(OPTFLAGS) -Wall -g -DHAVE_CONFIG_H | |||
| LDFLAGS= -g | |||
| OBJS= common.o utils.o mpegvideo.o h263.o jrevdct.o jfdctfst.o \ | |||
| @@ -29,6 +29,8 @@ | |||
| #define NDEBUG | |||
| #include <assert.h> | |||
| #include "../bswap.h" | |||
| void init_put_bits(PutBitContext *s, | |||
| UINT8 *buffer, int buffer_size, | |||
| void *opaque, | |||
| @@ -222,10 +224,14 @@ unsigned int get_bits(GetBitContext *s, int n) | |||
| buf_ptr += 4; | |||
| /* handle common case: we can read everything */ | |||
| if (buf_ptr <= s->buf_end) { | |||
| bit_buf = (buf_ptr[-4] << 24) | | |||
| (buf_ptr[-3] << 16) | | |||
| #if ARCH_X86 | |||
| bit_buf = bswap_32(*((unsigned long*)(&buf_ptr[-4]))); | |||
| #else | |||
| bit_buf = (buf_ptr[-4] << 24) | | |||
| (buf_ptr[-3] << 16) | | |||
| (buf_ptr[-2] << 8) | | |||
| (buf_ptr[-1]); | |||
| (buf_ptr[-1]); | |||
| #endif | |||
| } else { | |||
| buf_ptr -= 4; | |||
| bit_buf = 0; | |||
| @@ -30,8 +30,10 @@ int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); | |||
| int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); | |||
| /* pixel operations */ | |||
| static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 }; | |||
| static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 }; | |||
| static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001; | |||
| static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002; | |||
| //static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 }; | |||
| //static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 }; | |||
| /***********************************/ | |||
| /* 3Dnow specific */ | |||
| @@ -215,7 +217,7 @@ static void put_pixels_x2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, | |||
| __asm __volatile( | |||
| "pxor %%mm7, %%mm7\n\t" | |||
| "movq %0, %%mm4\n\t" | |||
| ::"m"(mm_wone[0]):"memory"); | |||
| ::"m"(mm_wone):"memory"); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %1, %%mm0\n\t" | |||
| @@ -250,7 +252,7 @@ static void put_pixels_y2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, | |||
| __asm __volatile( | |||
| "pxor %%mm7, %%mm7\n\t" | |||
| "movq %0, %%mm4\n\t" | |||
| ::"m"(mm_wone[0]):"memory"); | |||
| ::"m"(mm_wone):"memory"); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %1, %%mm0\n\t" | |||
| @@ -287,7 +289,7 @@ static void put_pixels_xy2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, | |||
| __asm __volatile( | |||
| "pxor %%mm7, %%mm7\n\t" | |||
| "movq %0, %%mm6\n\t" | |||
| ::"m"(mm_wtwo[0]):"memory"); | |||
| ::"m"(mm_wtwo):"memory"); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %1, %%mm0\n\t" | |||
| @@ -399,7 +401,7 @@ static void put_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int | |||
| __asm __volatile( | |||
| "pxor %%mm7, %%mm7\n\t" | |||
| "movq %0, %%mm6\n\t" | |||
| ::"m"(mm_wone[0]):"memory"); | |||
| ::"m"(mm_wone):"memory"); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %1, %%mm0\n\t" | |||
| @@ -448,7 +450,7 @@ static void avg_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int | |||
| __asm __volatile( | |||
| "pxor %%mm7, %%mm7\n\t" | |||
| "movq %0, %%mm6\n\t" | |||
| ::"m"(mm_wone[0]):"memory"); | |||
| ::"m"(mm_wone):"memory"); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %0, %%mm0\n\t" | |||
| @@ -485,7 +487,7 @@ static void avg_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_si | |||
| __asm __volatile( | |||
| "pxor %%mm7, %%mm7\n\t" | |||
| "movq %0, %%mm6\n\t" | |||
| ::"m"(mm_wone[0]):"memory"); | |||
| ::"m"(mm_wone):"memory"); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %1, %%mm1\n\t" | |||
| @@ -531,7 +533,7 @@ static void avg_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_si | |||
| __asm __volatile( | |||
| "pxor %%mm7, %%mm7\n\t" | |||
| "movq %0, %%mm6\n\t" | |||
| ::"m"(mm_wone[0]):"memory"); | |||
| ::"m"(mm_wone):"memory"); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %1, %%mm1\n\t" | |||
| @@ -577,7 +579,7 @@ static void avg_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_s | |||
| __asm __volatile( | |||
| "pxor %%mm7, %%mm7\n\t" | |||
| "movq %0, %%mm6\n\t" | |||
| ::"m"(mm_wtwo[0]):"memory"); | |||
| ::"m"(mm_wtwo):"memory"); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %1, %%mm0\n\t" | |||
| @@ -621,7 +623,7 @@ static void avg_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_s | |||
| "movq %%mm0, %0\n\t" | |||
| :"=m"(*p) | |||
| :"m"(*pix), | |||
| "m"(*(pix+line_size)), "m"(mm_wone[0]) | |||
| "m"(*(pix+line_size)), "m"(mm_wone) | |||
| :"memory"); | |||
| pix += line_size; | |||
| p += line_size ; | |||
| @@ -748,7 +750,7 @@ static void avg_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int | |||
| __asm __volatile( | |||
| "pxor %%mm7, %%mm7\n\t" | |||
| "movq %0, %%mm6\n\t" | |||
| ::"m"(mm_wone[0]):"memory"); | |||
| ::"m"(mm_wone):"memory"); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %1, %%mm0\n\t" | |||
| @@ -832,7 +834,7 @@ static void sub_pixels_x2_mmx( DCTELEM *block, const UINT8 *pixels, int line_si | |||
| __asm __volatile( | |||
| "pxor %%mm7, %%mm7\n\t" | |||
| "movq %0, %%mm6" | |||
| ::"m"(mm_wone[0]):"memory"); | |||
| ::"m"(mm_wone):"memory"); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %0, %%mm0\n\t" | |||
| @@ -872,7 +874,7 @@ static void sub_pixels_y2_mmx( DCTELEM *block, const UINT8 *pixels, int line_si | |||
| __asm __volatile( | |||
| "pxor %%mm7, %%mm7\n\t" | |||
| "movq %0, %%mm6" | |||
| ::"m"(mm_wone[0]):"memory"); | |||
| ::"m"(mm_wone):"memory"); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %0, %%mm0\n\t" | |||
| @@ -912,7 +914,7 @@ static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line | |||
| __asm __volatile( | |||
| "pxor %%mm7, %%mm7\n\t" | |||
| "movq %0, %%mm6\n\t" | |||
| ::"m"(mm_wtwo[0]):"memory"); | |||
| ::"m"(mm_wtwo):"memory"); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %1, %%mm0\n\t" | |||
| @@ -243,7 +243,7 @@ static void DEF(avg_pixels_xy2)( UINT8 *block, const UINT8 *pixels, int line_si | |||
| __asm __volatile( | |||
| "pxor %%mm7, %%mm7\n\t" | |||
| "movq %0, %%mm6\n\t" | |||
| ::"m"(mm_wtwo[0]):"memory"); | |||
| ::"m"(mm_wtwo):"memory"); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %1, %%mm0\n\t" | |||
| @@ -0,0 +1,239 @@ | |||
| /* | |||
| * The simplest mpeg encoder (well, it was the simplest!) | |||
| * Copyright (c) 2000,2001 Gerard Lantau. | |||
| * | |||
| * This program is free software; you can redistribute it and/or modify | |||
| * it under the terms of the GNU General Public License as published by | |||
| * the Free Software Foundation; either version 2 of the License, or | |||
| * (at your option) any later version. | |||
| * | |||
| * This program is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
| * GNU General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU General Public License | |||
| * along with this program; if not, write to the Free Software | |||
| * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |||
| * | |||
| * Optimized for ia32 cpus by Nick Kurshev <nickols_k@mail.ru> | |||
| */ | |||
| void MPV_frame_start(MpegEncContext *s) | |||
| { | |||
| if (s->pict_type == B_TYPE) { | |||
| __asm __volatile( | |||
| "movl (%1), %%eax\n\t" | |||
| "movl 4(%1), %%edx\n\t" | |||
| "movl 8(%1), %%ecx\n\t" | |||
| "movl %%eax, (%0)\n\t" | |||
| "movl %%edx, 4(%0)\n\t" | |||
| "movl %%ecx, 8(%0)\n\t" | |||
| : | |||
| :"r"(s->current_picture), "r"(s->aux_picture) | |||
| :"eax","edx","ecx","memory"); | |||
| } else { | |||
| /* swap next and last */ | |||
| __asm __volatile( | |||
| "movl (%1), %%eax\n\t" | |||
| "movl 4(%1), %%edx\n\t" | |||
| "movl 8(%1), %%ecx\n\t" | |||
| "xchgl (%0), %%eax\n\t" | |||
| "xchgl 4(%0), %%edx\n\t" | |||
| "xchgl 8(%0), %%ecx\n\t" | |||
| "movl %%eax, (%1)\n\t" | |||
| "movl %%edx, 4(%1)\n\t" | |||
| "movl %%ecx, 8(%1)\n\t" | |||
| "movl %%eax, (%2)\n\t" | |||
| "movl %%edx, 4(%2)\n\t" | |||
| "movl %%ecx, 8(%2)\n\t" | |||
| : | |||
| :"r"(s->last_picture), "r"(s->next_picture), "r"(s->current_picture) | |||
| :"eax","edx","ecx","memory"); | |||
| } | |||
| } | |||
| static void dct_unquantize(MpegEncContext *s, DCTELEM *block, int n, int qscale); | |||
| #ifdef HAVE_MMX | |||
| static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL; | |||
| static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; | |||
| /* | |||
| NK: | |||
| Note: looking at PARANOID: | |||
| "enable all paranoid tests for rounding, overflows, etc..." | |||
| #ifdef PARANOID | |||
| if (level < -2048 || level > 2047) | |||
| fprintf(stderr, "unquant error %d %d\n", i, level); | |||
| #endif | |||
| We can suppose that result of two multiplications can't be greate of 0xFFFF | |||
| i.e. is 16-bit, so we use here only PMULLW instruction and can avoid | |||
| a complex multiplication. | |||
| ===================================================== | |||
| Full formula for multiplication of 2 integer numbers | |||
| which are represent as high:low words: | |||
| input: value1 = high1:low1 | |||
| value2 = high2:low2 | |||
| output: value3 = value1*value2 | |||
| value3=high3:low3 (on overflow: modulus 2^32 wrap-around) | |||
| this mean that for 0x123456 * 0x123456 correct result is 0x766cb0ce4 | |||
| but this algorithm will compute only 0x66cb0ce4 | |||
| this limited by 16-bit size of operands | |||
| --------------------------------- | |||
| tlow1 = high1*low2 | |||
| tlow2 = high2*low1 | |||
| tlow1 = tlow1 + tlow2 | |||
| high3:low3 = low1*low2 | |||
| high3 += tlow1 | |||
| */ | |||
| #ifdef BIN_PORTABILITY | |||
| static void dct_unquantize_mmx | |||
| #else | |||
| #define HAVE_DCT_UNQUANTIZE 1 | |||
| static void dct_unquantize | |||
| #endif | |||
| (MpegEncContext *s,DCTELEM *block, int n, int qscale) | |||
| { | |||
| int i, level; | |||
| const UINT16 *quant_matrix; | |||
| if (s->mb_intra) { | |||
| if (n < 4) | |||
| block[0] = block[0] * s->y_dc_scale; | |||
| else | |||
| block[0] = block[0] * s->c_dc_scale; | |||
| if (s->out_format == FMT_H263) { | |||
| i = 1; | |||
| goto unquant_even; | |||
| } | |||
| /* XXX: only mpeg1 */ | |||
| quant_matrix = s->intra_matrix; | |||
| i=1; | |||
| /* Align on 4 elements boundary */ | |||
| while(i&3) | |||
| { | |||
| level = block[i]; | |||
| if (level) { | |||
| if (level < 0) level = -level; | |||
| level = (int)(level * qscale * quant_matrix[i]) >> 3; | |||
| level = (level - 1) | 1; | |||
| if (block[i] < 0) level = -level; | |||
| block[i] = level; | |||
| } | |||
| i++; | |||
| } | |||
| __asm __volatile( | |||
| "movd %0, %%mm6\n\t" /* mm6 = qscale | 0 */ | |||
| "punpckldq %%mm6, %%mm6\n\t" /* mm6 = qscale | qscale */ | |||
| "movq %2, %%mm4\n\t" | |||
| "movq %%mm6, %%mm7\n\t" | |||
| "movq %1, %%mm5\n\t" | |||
| "packssdw %%mm6, %%mm7\n\t" /* mm7 = qscale | qscale | qscale | qscale */ | |||
| "pxor %%mm6, %%mm6\n\t" | |||
| ::"g"(qscale),"m"(mm_wone),"m"(mm_wabs):"memory"); | |||
| for(;i<64;i+=4) { | |||
| __asm __volatile( | |||
| "movq %1, %%mm0\n\t" | |||
| "movq %%mm7, %%mm1\n\t" | |||
| "movq %%mm0, %%mm2\n\t" | |||
| "movq %%mm0, %%mm3\n\t" | |||
| "pcmpgtw %%mm6, %%mm2\n\t" | |||
| "pmullw %2, %%mm1\n\t" | |||
| "pandn %%mm4, %%mm2\n\t" | |||
| "por %%mm5, %%mm2\n\t" | |||
| "pmullw %%mm2, %%mm0\n\t" /* mm0 = abs(block[i]). */ | |||
| "pcmpeqw %%mm6, %%mm3\n\t" | |||
| "pmullw %%mm0, %%mm1\n\t" | |||
| "psraw $3, %%mm1\n\t" | |||
| "psubw %%mm5, %%mm1\n\t" /* block[i] --; */ | |||
| "pandn %%mm4, %%mm3\n\t" /* fake of pcmpneqw : mm0 != 0 then mm1 = -1 */ | |||
| "por %%mm5, %%mm1\n\t" /* block[i] |= 1 */ | |||
| "pmullw %%mm2, %%mm1\n\t" /* change signs again */ | |||
| "pand %%mm3, %%mm1\n\t" /* nullify if was zero */ | |||
| "movq %%mm1, %0" | |||
| :"=m"(block[i]) | |||
| :"m"(block[i]), "m"(quant_matrix[i]) | |||
| :"memory"); | |||
| } | |||
| } else { | |||
| i = 0; | |||
| unquant_even: | |||
| quant_matrix = s->non_intra_matrix; | |||
| /* Align on 4 elements boundary */ | |||
| while(i&3) | |||
| { | |||
| level = block[i]; | |||
| if (level) { | |||
| if (level < 0) level = -level; | |||
| level = (((level << 1) + 1) * qscale * | |||
| ((int) quant_matrix[i])) >> 4; | |||
| level = (level - 1) | 1; | |||
| if(block[i] < 0) level = -level; | |||
| block[i] = level; | |||
| } | |||
| i++; | |||
| } | |||
| __asm __volatile( | |||
| "movd %0, %%mm6\n\t" /* mm6 = qscale | 0 */ | |||
| "punpckldq %%mm6, %%mm6\n\t" /* mm6 = qscale | qscale */ | |||
| "movq %2, %%mm4\n\t" | |||
| "movq %%mm6, %%mm7\n\t" | |||
| "movq %1, %%mm5\n\t" | |||
| "packssdw %%mm6, %%mm7\n\t" /* mm7 = qscale | qscale | qscale | qscale */ | |||
| "pxor %%mm6, %%mm6\n\t" | |||
| ::"g"(qscale),"m"(mm_wone),"m"(mm_wabs):"memory"); | |||
| for(;i<64;i+=4) { | |||
| __asm __volatile( | |||
| "movq %1, %%mm0\n\t" | |||
| "movq %%mm7, %%mm1\n\t" | |||
| "movq %%mm0, %%mm2\n\t" | |||
| "movq %%mm0, %%mm3\n\t" | |||
| "pcmpgtw %%mm6, %%mm2\n\t" | |||
| "pmullw %2, %%mm1\n\t" | |||
| "pandn %%mm4, %%mm2\n\t" | |||
| "por %%mm5, %%mm2\n\t" | |||
| "pmullw %%mm2, %%mm0\n\t" /* mm0 = abs(block[i]). */ | |||
| "psllw $1, %%mm0\n\t" /* block[i] <<= 1 */ | |||
| "paddw %%mm5, %%mm0\n\t" /* block[i] ++ */ | |||
| "pmullw %%mm0, %%mm1\n\t" | |||
| "psraw $4, %%mm1\n\t" | |||
| "pcmpeqw %%mm6, %%mm3\n\t" | |||
| "psubw %%mm5, %%mm1\n\t" /* block[i] --; */ | |||
| "pandn %%mm4, %%mm3\n\t" /* fake of pcmpneqw : mm0 != 0 then mm1 = -1 */ | |||
| "por %%mm5, %%mm1\n\t" /* block[i] |= 1 */ | |||
| "pmullw %%mm2, %%mm1\n\t" /* change signs again */ | |||
| "pand %%mm3, %%mm1\n\t" /* nullify if was zero */ | |||
| "movq %%mm1, %0" | |||
| :"=m"(block[i]) | |||
| :"m"(block[i]), "m"(quant_matrix[i]) | |||
| :"memory"); | |||
| } | |||
| } | |||
| } | |||
| #ifdef BIN_PORTABILITY | |||
| static void (*dct_unquantize_ptr)(MpegEncContext *s, | |||
| DCTELEM *block, int n, int qscale); | |||
| void MPV_common_init_mmx(void) | |||
| { | |||
| int mm_flags; | |||
| mm_flags = mm_support(); | |||
| if (mm_flags & MM_MMX) { | |||
| dct_unquantize_ptr = dct_unquantize_mmx; | |||
| } | |||
| else { | |||
| dct_unquantize_ptr = dct_unquantize; | |||
| } | |||
| } | |||
| #define DCT_UNQUANTIZE(a,b,c,d) (*dct_unquantize_ptr)(a,b,c,d) | |||
| #else | |||
| #define DCT_UNQUANTIZE(a,b,c,d) dct_unquantize(a,b,c,d) | |||
| #endif /* BIN_PORTABILITY */ | |||
| #endif /* HAVE_MMX */ | |||
| @@ -24,6 +24,15 @@ | |||
| #include "dsputil.h" | |||
| #include "mpegvideo.h" | |||
| #include "../config.h" | |||
| #ifdef ARCH_X86 | |||
| #include "i386/mpegvideo.c" | |||
| #endif | |||
| #ifndef DCT_UNQUANTIZE | |||
| #define DCT_UNQUANTIZE(a,b,c,d) dct_unquantize(a,b,c,d) | |||
| #endif | |||
| #define EDGE_WIDTH 16 | |||
| /* enable all paranoid tests for rounding, overflows, etc... */ | |||
| @@ -89,6 +98,9 @@ int MPV_common_init(MpegEncContext *s) | |||
| int c_size, i; | |||
| UINT8 *pict; | |||
| #if defined ( HAVE_MMX ) && defined ( BIN_PORTABILITY ) | |||
| MPV_common_init_mmx(); | |||
| #endif | |||
| s->mb_width = (s->width + 15) / 16; | |||
| s->mb_height = (s->height + 15) / 16; | |||
| s->linesize = s->mb_width * 16 + 2 * EDGE_WIDTH; | |||
| @@ -345,8 +357,8 @@ static void draw_edges(UINT8 *buf, int wrap, int width, int height, int w) | |||
| } | |||
| } | |||
| /* generic function for encode/decode called before a frame is coded/decoded */ | |||
| #ifndef ARCH_X86 | |||
| void MPV_frame_start(MpegEncContext *s) | |||
| { | |||
| int i; | |||
| @@ -366,7 +378,7 @@ void MPV_frame_start(MpegEncContext *s) | |||
| } | |||
| } | |||
| } | |||
| #endif | |||
| /* generic function for encode/decode called after a frame has been coded/decoded */ | |||
| void MPV_frame_end(MpegEncContext *s) | |||
| { | |||
| @@ -621,7 +633,7 @@ static inline void put_dct(MpegEncContext *s, | |||
| DCTELEM *block, int i, UINT8 *dest, int line_size) | |||
| { | |||
| if (!s->mpeg2) | |||
| dct_unquantize(s, block, i, s->qscale); | |||
| DCT_UNQUANTIZE(s, block, i, s->qscale); | |||
| j_rev_dct (block); | |||
| put_pixels_clamped(block, dest, line_size); | |||
| } | |||
| @@ -632,7 +644,7 @@ static inline void add_dct(MpegEncContext *s, | |||
| { | |||
| if (s->block_last_index[i] >= 0) { | |||
| if (!s->mpeg2) | |||
| dct_unquantize(s, block, i, s->qscale); | |||
| DCT_UNQUANTIZE(s, block, i, s->qscale); | |||
| j_rev_dct (block); | |||
| add_pixels_clamped(block, dest, line_size); | |||
| } | |||
| @@ -1109,6 +1121,7 @@ static int dct_quantize_mmx(MpegEncContext *s, | |||
| return last_non_zero; | |||
| } | |||
| #ifndef HAVE_DCT_UNQUANTIZE | |||
| static void dct_unquantize(MpegEncContext *s, | |||
| DCTELEM *block, int n, int qscale) | |||
| { | |||
| @@ -1172,7 +1185,7 @@ static void dct_unquantize(MpegEncContext *s, | |||
| } | |||
| } | |||
| } | |||
| #endif | |||
| /* rate control */ | |||