Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -96,7 +96,7 @@ unknown_opt: | |||||
| if(po->u.func2_arg(opt+1, arg)<0) | if(po->u.func2_arg(opt+1, arg)<0) | ||||
| goto unknown_opt; | goto unknown_opt; | ||||
| } else { | } else { | ||||
| po->u.func_arg(arg); | |||||
| po->u.func_arg(arg); | |||||
| } | } | ||||
| } else { | } else { | ||||
| parse_arg_file(opt); | parse_arg_file(opt); | ||||
| @@ -122,8 +122,8 @@ void print_error(const char *filename, int err) | |||||
| break; | break; | ||||
| case AVERROR_IO: | case AVERROR_IO: | ||||
| fprintf(stderr, "%s: I/O error occured\n" | fprintf(stderr, "%s: I/O error occured\n" | ||||
| "Usually that means that input file is truncated and/or corrupted.\n", | |||||
| filename); | |||||
| "Usually that means that input file is truncated and/or corrupted.\n", | |||||
| filename); | |||||
| break; | break; | ||||
| case AVERROR_NOMEM: | case AVERROR_NOMEM: | ||||
| fprintf(stderr, "%s: memory allocation error occured\n", filename); | fprintf(stderr, "%s: memory allocation error occured\n", filename); | ||||
| @@ -688,26 +688,26 @@ fi | |||||
| needmdynamicnopic="no" | needmdynamicnopic="no" | ||||
| if test $targetos = Darwin; then | if test $targetos = Darwin; then | ||||
| if test -n "`$cc -v 2>&1 | grep xlc`"; then | if test -n "`$cc -v 2>&1 | grep xlc`"; then | ||||
| CFLAGS="$CFLAGS -qpdf2 -qlanglvl=extc99 -qmaxmem=-1 -qarch=auto -qtune=auto" | |||||
| CFLAGS="$CFLAGS -qpdf2 -qlanglvl=extc99 -qmaxmem=-1 -qarch=auto -qtune=auto" | |||||
| else | else | ||||
| gcc_version="`$cc -v 2>&1 | grep version | cut -d ' ' -f3-`" | |||||
| case "$gcc_version" in | |||||
| *2.95*) | |||||
| CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer" | |||||
| ;; | |||||
| *[34].*) | |||||
| CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer -force_cpusubtype_ALL -Wno-sign-compare" | |||||
| if test "$lshared" = no; then | |||||
| needmdynamicnopic="yes" | |||||
| fi | |||||
| ;; | |||||
| *) | |||||
| CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer" | |||||
| if test "$lshared" = no; then | |||||
| needmdynamicnopic="yes" | |||||
| fi | |||||
| ;; | |||||
| esac | |||||
| gcc_version="`$cc -v 2>&1 | grep version | cut -d ' ' -f3-`" | |||||
| case "$gcc_version" in | |||||
| *2.95*) | |||||
| CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer" | |||||
| ;; | |||||
| *[34].*) | |||||
| CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer -force_cpusubtype_ALL -Wno-sign-compare" | |||||
| if test "$lshared" = no; then | |||||
| needmdynamicnopic="yes" | |||||
| fi | |||||
| ;; | |||||
| *) | |||||
| CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer" | |||||
| if test "$lshared" = no; then | |||||
| needmdynamicnopic="yes" | |||||
| fi | |||||
| ;; | |||||
| esac | |||||
| fi | fi | ||||
| fi | fi | ||||
| @@ -725,62 +725,62 @@ TUNECPU="generic" | |||||
| POWERPCMODE="32bits" | POWERPCMODE="32bits" | ||||
| if test $tune != "generic"; then | if test $tune != "generic"; then | ||||
| case $tune in | case $tune in | ||||
| 601|ppc601|PowerPC601) | |||||
| CFLAGS="$CFLAGS -mcpu=601" | |||||
| if test $altivec = "yes"; then | |||||
| echo "WARNING: Tuning for PPC601 but AltiVec enabled!"; | |||||
| fi | |||||
| TUNECPU=ppc601 | |||||
| ;; | |||||
| 603*|ppc603*|PowerPC603*) | |||||
| CFLAGS="$CFLAGS -mcpu=603" | |||||
| if test $altivec = "yes"; then | |||||
| echo "WARNING: Tuning for PPC603 but AltiVec enabled!"; | |||||
| fi | |||||
| TUNECPU=ppc603 | |||||
| ;; | |||||
| 604*|ppc604*|PowerPC604*) | |||||
| CFLAGS="$CFLAGS -mcpu=604" | |||||
| if test $altivec = "yes"; then | |||||
| echo "WARNING: Tuning for PPC604 but AltiVec enabled!"; | |||||
| fi | |||||
| TUNECPU=ppc604 | |||||
| ;; | |||||
| G3|g3|75*|ppc75*|PowerPC75*) | |||||
| CFLAGS="$CFLAGS -mcpu=750 -mtune=750 -mpowerpc-gfxopt" | |||||
| if test $altivec = "yes"; then | |||||
| echo "WARNING: Tuning for PPC75x but AltiVec enabled!"; | |||||
| fi | |||||
| TUNECPU=ppc750 | |||||
| ;; | |||||
| G4|g4|745*|ppc745*|PowerPC745*) | |||||
| CFLAGS="$CFLAGS -mcpu=7450 -mtune=7450 -mpowerpc-gfxopt" | |||||
| if test $altivec = "no"; then | |||||
| echo "WARNING: Tuning for PPC745x but AltiVec disabled!"; | |||||
| fi | |||||
| TUNECPU=ppc7450 | |||||
| ;; | |||||
| 74*|ppc74*|PowerPC74*) | |||||
| CFLAGS="$CFLAGS -mcpu=7400 -mtune=7400 -mpowerpc-gfxopt" | |||||
| if test $altivec = "no"; then | |||||
| echo "WARNING: Tuning for PPC74xx but AltiVec disabled!"; | |||||
| fi | |||||
| TUNECPU=ppc7400 | |||||
| ;; | |||||
| G5|g5|970|ppc970|PowerPC970|power4*|Power4*) | |||||
| CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc-gfxopt -mpowerpc64" | |||||
| if test $altivec = "no"; then | |||||
| echo "WARNING: Tuning for PPC970 but AltiVec disabled!"; | |||||
| fi | |||||
| TUNECPU=ppc970 | |||||
| 601|ppc601|PowerPC601) | |||||
| CFLAGS="$CFLAGS -mcpu=601" | |||||
| if test $altivec = "yes"; then | |||||
| echo "WARNING: Tuning for PPC601 but AltiVec enabled!"; | |||||
| fi | |||||
| TUNECPU=ppc601 | |||||
| ;; | |||||
| 603*|ppc603*|PowerPC603*) | |||||
| CFLAGS="$CFLAGS -mcpu=603" | |||||
| if test $altivec = "yes"; then | |||||
| echo "WARNING: Tuning for PPC603 but AltiVec enabled!"; | |||||
| fi | |||||
| TUNECPU=ppc603 | |||||
| ;; | |||||
| 604*|ppc604*|PowerPC604*) | |||||
| CFLAGS="$CFLAGS -mcpu=604" | |||||
| if test $altivec = "yes"; then | |||||
| echo "WARNING: Tuning for PPC604 but AltiVec enabled!"; | |||||
| fi | |||||
| TUNECPU=ppc604 | |||||
| ;; | |||||
| G3|g3|75*|ppc75*|PowerPC75*) | |||||
| CFLAGS="$CFLAGS -mcpu=750 -mtune=750 -mpowerpc-gfxopt" | |||||
| if test $altivec = "yes"; then | |||||
| echo "WARNING: Tuning for PPC75x but AltiVec enabled!"; | |||||
| fi | |||||
| TUNECPU=ppc750 | |||||
| ;; | |||||
| G4|g4|745*|ppc745*|PowerPC745*) | |||||
| CFLAGS="$CFLAGS -mcpu=7450 -mtune=7450 -mpowerpc-gfxopt" | |||||
| if test $altivec = "no"; then | |||||
| echo "WARNING: Tuning for PPC745x but AltiVec disabled!"; | |||||
| fi | |||||
| TUNECPU=ppc7450 | |||||
| ;; | |||||
| 74*|ppc74*|PowerPC74*) | |||||
| CFLAGS="$CFLAGS -mcpu=7400 -mtune=7400 -mpowerpc-gfxopt" | |||||
| if test $altivec = "no"; then | |||||
| echo "WARNING: Tuning for PPC74xx but AltiVec disabled!"; | |||||
| fi | |||||
| TUNECPU=ppc7400 | |||||
| ;; | |||||
| G5|g5|970|ppc970|PowerPC970|power4*|Power4*) | |||||
| CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc-gfxopt -mpowerpc64" | |||||
| if test $altivec = "no"; then | |||||
| echo "WARNING: Tuning for PPC970 but AltiVec disabled!"; | |||||
| fi | |||||
| TUNECPU=ppc970 | |||||
| POWERPCMODE="64bits" | POWERPCMODE="64bits" | ||||
| ;; | |||||
| i[3456]86|pentium|pentiumpro|pentium-mmx|pentium[234]|prescott|k6|k6-[23]|athlon|athlon-tbird|athlon-4|athlon-[mx]p|winchip-c6|winchip2|c3|nocona|athlon64|k8|opteron|athlon-fx) | |||||
| CFLAGS="$CFLAGS -march=$tune" | |||||
| ;; | |||||
| *) | |||||
| echo "WARNING: Unknown CPU \"$tune\", ignored." | |||||
| ;; | |||||
| ;; | |||||
| i[3456]86|pentium|pentiumpro|pentium-mmx|pentium[234]|prescott|k6|k6-[23]|athlon|athlon-tbird|athlon-4|athlon-[mx]p|winchip-c6|winchip2|c3|nocona|athlon64|k8|opteron|athlon-fx) | |||||
| CFLAGS="$CFLAGS -march=$tune" | |||||
| ;; | |||||
| *) | |||||
| echo "WARNING: Unknown CPU \"$tune\", ignored." | |||||
| ;; | |||||
| esac | esac | ||||
| fi | fi | ||||
| @@ -876,8 +876,8 @@ if test -z "$cross_prefix" ; then | |||||
| cat > $TMPC << EOF | cat > $TMPC << EOF | ||||
| #include <inttypes.h> | #include <inttypes.h> | ||||
| int main(int argc, char ** argv){ | int main(int argc, char ** argv){ | ||||
| volatile uint32_t i=0x01234567; | |||||
| return (*((uint8_t*)(&i))) == 0x67; | |||||
| volatile uint32_t i=0x01234567; | |||||
| return (*((uint8_t*)(&i))) == 0x67; | |||||
| } | } | ||||
| EOF | EOF | ||||
| @@ -912,8 +912,8 @@ $cc -o $TMPE $TMPC 2>/dev/null || inttypes="no" | |||||
| cat > $TMPC << EOF | cat > $TMPC << EOF | ||||
| #include <inttypes.h> | #include <inttypes.h> | ||||
| int main(int argc, char ** argv){ | int main(int argc, char ** argv){ | ||||
| volatile uint_fast64_t i=0x01234567; | |||||
| return 0; | |||||
| volatile uint_fast64_t i=0x01234567; | |||||
| return 0; | |||||
| } | } | ||||
| EOF | EOF | ||||
| @@ -1152,10 +1152,10 @@ fi | |||||
| case "`$cc -v 2>&1 | grep version`" in | case "`$cc -v 2>&1 | grep version`" in | ||||
| *gcc*) | *gcc*) | ||||
| CFLAGS="-Wall -Wno-switch $CFLAGS" | |||||
| ;; | |||||
| CFLAGS="-Wall -Wno-switch $CFLAGS" | |||||
| ;; | |||||
| *) | *) | ||||
| ;; | |||||
| ;; | |||||
| esac | esac | ||||
| if test "$sdl" = "no" ; then | if test "$sdl" = "no" ; then | ||||
| @@ -1163,7 +1163,7 @@ if test "$sdl" = "no" ; then | |||||
| fi | fi | ||||
| if test "$debug" = "yes"; then | if test "$debug" = "yes"; then | ||||
| CFLAGS="-g $CFLAGS" | |||||
| CFLAGS="-g $CFLAGS" | |||||
| fi | fi | ||||
| if test "$optimize" = "small"; then | if test "$optimize" = "small"; then | ||||
| @@ -1173,10 +1173,10 @@ fi | |||||
| if test "$optimize" = "yes"; then | if test "$optimize" = "yes"; then | ||||
| if test -n "`$cc -v 2>&1 | grep xlc`"; then | if test -n "`$cc -v 2>&1 | grep xlc`"; then | ||||
| CFLAGS="$CFLAGS -O5" | |||||
| LDFLAGS="$LDFLAGS -O5" | |||||
| CFLAGS="$CFLAGS -O5" | |||||
| LDFLAGS="$LDFLAGS -O5" | |||||
| else | else | ||||
| CFLAGS="-O3 $CFLAGS" | |||||
| CFLAGS="-O3 $CFLAGS" | |||||
| fi | fi | ||||
| fi | fi | ||||
| @@ -1793,9 +1793,9 @@ done | |||||
| diff $TMPH config.h >/dev/null 2>&1 | diff $TMPH config.h >/dev/null 2>&1 | ||||
| if test $? -ne 0 ; then | if test $? -ne 0 ; then | ||||
| mv -f $TMPH config.h | |||||
| mv -f $TMPH config.h | |||||
| else | else | ||||
| echo "config.h is unchanged" | |||||
| echo "config.h is unchanged" | |||||
| fi | fi | ||||
| rm -f $TMPO $TMPC $TMPE $TMPS $TMPH | rm -f $TMPO $TMPC $TMPE $TMPS $TMPH | ||||
| @@ -25,37 +25,37 @@ main(int argc, char *argv[]) | |||||
| if (argc < 3) | if (argc < 3) | ||||
| { | { | ||||
| printf("Usage: %s <infile.swf> <outfile.swf>\n", argv[0]); | |||||
| exit(1); | |||||
| printf("Usage: %s <infile.swf> <outfile.swf>\n", argv[0]); | |||||
| exit(1); | |||||
| } | } | ||||
| fd_in = open(argv[1], O_RDONLY); | fd_in = open(argv[1], O_RDONLY); | ||||
| if (fd_in < 0) | if (fd_in < 0) | ||||
| { | { | ||||
| perror("Error while opening: "); | |||||
| exit(1); | |||||
| perror("Error while opening: "); | |||||
| exit(1); | |||||
| } | } | ||||
| fd_out = open(argv[2], O_WRONLY|O_CREAT, 00644); | fd_out = open(argv[2], O_WRONLY|O_CREAT, 00644); | ||||
| if (fd_out < 0) | if (fd_out < 0) | ||||
| { | { | ||||
| perror("Error while opening: "); | |||||
| close(fd_in); | |||||
| exit(1); | |||||
| perror("Error while opening: "); | |||||
| close(fd_in); | |||||
| exit(1); | |||||
| } | } | ||||
| if (read(fd_in, &buf_in, 8) != 8) | if (read(fd_in, &buf_in, 8) != 8) | ||||
| { | { | ||||
| printf("Header error\n"); | |||||
| close(fd_in); | |||||
| close(fd_out); | |||||
| exit(1); | |||||
| printf("Header error\n"); | |||||
| close(fd_in); | |||||
| close(fd_out); | |||||
| exit(1); | |||||
| } | } | ||||
| if (buf_in[0] != 'C' || buf_in[1] != 'W' || buf_in[2] != 'S') | if (buf_in[0] != 'C' || buf_in[1] != 'W' || buf_in[2] != 'S') | ||||
| { | { | ||||
| printf("Not a compressed flash file\n"); | |||||
| exit(1); | |||||
| printf("Not a compressed flash file\n"); | |||||
| exit(1); | |||||
| } | } | ||||
| fstat(fd_in, &statbuf); | fstat(fd_in, &statbuf); | ||||
| @@ -75,48 +75,48 @@ main(int argc, char *argv[]) | |||||
| for (i = 0; i < comp_len-4;) | for (i = 0; i < comp_len-4;) | ||||
| { | { | ||||
| int ret, len = read(fd_in, &buf_in, 1024); | |||||
| int ret, len = read(fd_in, &buf_in, 1024); | |||||
| dbgprintf("read %d bytes\n", len); | |||||
| dbgprintf("read %d bytes\n", len); | |||||
| last_out = zstream.total_out; | |||||
| last_out = zstream.total_out; | |||||
| zstream.next_in = &buf_in[0]; | |||||
| zstream.avail_in = len; | |||||
| zstream.next_out = &buf_out[0]; | |||||
| zstream.avail_out = 1024; | |||||
| zstream.next_in = &buf_in[0]; | |||||
| zstream.avail_in = len; | |||||
| zstream.next_out = &buf_out[0]; | |||||
| zstream.avail_out = 1024; | |||||
| ret = inflate(&zstream, Z_SYNC_FLUSH); | |||||
| if (ret == Z_STREAM_END || ret == Z_BUF_ERROR) | |||||
| break; | |||||
| if (ret != Z_OK) | |||||
| { | |||||
| printf("Error while decompressing: %d\n", ret); | |||||
| inflateEnd(&zstream); | |||||
| exit(1); | |||||
| } | |||||
| ret = inflate(&zstream, Z_SYNC_FLUSH); | |||||
| if (ret == Z_STREAM_END || ret == Z_BUF_ERROR) | |||||
| break; | |||||
| if (ret != Z_OK) | |||||
| { | |||||
| printf("Error while decompressing: %d\n", ret); | |||||
| inflateEnd(&zstream); | |||||
| exit(1); | |||||
| } | |||||
| dbgprintf("a_in: %d t_in: %d a_out: %d t_out: %d -- %d out\n", | |||||
| zstream.avail_in, zstream.total_in, zstream.avail_out, zstream.total_out, | |||||
| zstream.total_out-last_out); | |||||
| dbgprintf("a_in: %d t_in: %d a_out: %d t_out: %d -- %d out\n", | |||||
| zstream.avail_in, zstream.total_in, zstream.avail_out, zstream.total_out, | |||||
| zstream.total_out-last_out); | |||||
| write(fd_out, &buf_out, zstream.total_out-last_out); | |||||
| write(fd_out, &buf_out, zstream.total_out-last_out); | |||||
| i += len; | |||||
| i += len; | |||||
| } | } | ||||
| if (zstream.total_out != uncomp_len-8) | if (zstream.total_out != uncomp_len-8) | ||||
| { | { | ||||
| printf("Size mismatch (%d != %d), updating header...\n", | |||||
| zstream.total_out, uncomp_len-8); | |||||
| printf("Size mismatch (%d != %d), updating header...\n", | |||||
| zstream.total_out, uncomp_len-8); | |||||
| buf_in[0] = (zstream.total_out+8) & 0xff; | |||||
| buf_in[1] = (zstream.total_out+8 >> 8) & 0xff; | |||||
| buf_in[2] = (zstream.total_out+8 >> 16) & 0xff; | |||||
| buf_in[3] = (zstream.total_out+8 >> 24) & 0xff; | |||||
| buf_in[0] = (zstream.total_out+8) & 0xff; | |||||
| buf_in[1] = (zstream.total_out+8 >> 8) & 0xff; | |||||
| buf_in[2] = (zstream.total_out+8 >> 16) & 0xff; | |||||
| buf_in[3] = (zstream.total_out+8 >> 24) & 0xff; | |||||
| lseek(fd_out, 4, SEEK_SET); | |||||
| write(fd_out, &buf_in, 4); | |||||
| lseek(fd_out, 4, SEEK_SET); | |||||
| write(fd_out, &buf_in, 4); | |||||
| } | } | ||||
| inflateEnd(&zstream); | inflateEnd(&zstream); | ||||
| @@ -39,24 +39,24 @@ $ibase = ""; | |||||
| while ($_ = shift) { | while ($_ = shift) { | ||||
| if (/^-D(.*)$/) { | if (/^-D(.*)$/) { | ||||
| if ($1 ne "") { | |||||
| $flag = $1; | |||||
| } else { | |||||
| $flag = shift; | |||||
| } | |||||
| $value = ""; | |||||
| ($flag, $value) = ($flag =~ /^([^=]+)(?:=(.+))?/); | |||||
| die "no flag specified for -D\n" | |||||
| unless $flag ne ""; | |||||
| die "flags may only contain letters, digits, hyphens, dashes and underscores\n" | |||||
| unless $flag =~ /^[a-zA-Z0-9_-]+$/; | |||||
| $defs{$flag} = $value; | |||||
| if ($1 ne "") { | |||||
| $flag = $1; | |||||
| } else { | |||||
| $flag = shift; | |||||
| } | |||||
| $value = ""; | |||||
| ($flag, $value) = ($flag =~ /^([^=]+)(?:=(.+))?/); | |||||
| die "no flag specified for -D\n" | |||||
| unless $flag ne ""; | |||||
| die "flags may only contain letters, digits, hyphens, dashes and underscores\n" | |||||
| unless $flag =~ /^[a-zA-Z0-9_-]+$/; | |||||
| $defs{$flag} = $value; | |||||
| } elsif (/^-/) { | } elsif (/^-/) { | ||||
| usage(); | |||||
| usage(); | |||||
| } else { | } else { | ||||
| $in = $_, next unless defined $in; | |||||
| $out = $_, next unless defined $out; | |||||
| usage(); | |||||
| $in = $_, next unless defined $in; | |||||
| $out = $_, next unless defined $out; | |||||
| usage(); | |||||
| } | } | ||||
| } | } | ||||
| @@ -76,13 +76,13 @@ while(defined $inf) { | |||||
| while(<$inf>) { | while(<$inf>) { | ||||
| # Certain commands are discarded without further processing. | # Certain commands are discarded without further processing. | ||||
| /^\@(?: | /^\@(?: | ||||
| [a-z]+index # @*index: useful only in complete manual | |||||
| |need # @need: useful only in printed manual | |||||
| |(?:end\s+)?group # @group .. @end group: ditto | |||||
| |page # @page: ditto | |||||
| |node # @node: useful only in .info file | |||||
| |(?:end\s+)?ifnottex # @ifnottex .. @end ifnottex: use contents | |||||
| )\b/x and next; | |||||
| [a-z]+index # @*index: useful only in complete manual | |||||
| |need # @need: useful only in printed manual | |||||
| |(?:end\s+)?group # @group .. @end group: ditto | |||||
| |page # @page: ditto | |||||
| |node # @node: useful only in .info file | |||||
| |(?:end\s+)?ifnottex # @ifnottex .. @end ifnottex: use contents | |||||
| )\b/x and next; | |||||
| chomp; | chomp; | ||||
| @@ -92,38 +92,38 @@ while(<$inf>) { | |||||
| # Identify a man title but keep only the one we are interested in. | # Identify a man title but keep only the one we are interested in. | ||||
| /^\@c\s+man\s+title\s+([A-Za-z0-9-]+)\s+(.+)/ and do { | /^\@c\s+man\s+title\s+([A-Za-z0-9-]+)\s+(.+)/ and do { | ||||
| if (exists $defs{$1}) { | |||||
| $fn = $1; | |||||
| $tl = postprocess($2); | |||||
| } | |||||
| next; | |||||
| if (exists $defs{$1}) { | |||||
| $fn = $1; | |||||
| $tl = postprocess($2); | |||||
| } | |||||
| next; | |||||
| }; | }; | ||||
| # Look for blocks surrounded by @c man begin SECTION ... @c man end. | # Look for blocks surrounded by @c man begin SECTION ... @c man end. | ||||
| # This really oughta be @ifman ... @end ifman and the like, but such | # This really oughta be @ifman ... @end ifman and the like, but such | ||||
| # would require rev'ing all other Texinfo translators. | # would require rev'ing all other Texinfo translators. | ||||
| /^\@c\s+man\s+begin\s+([A-Z]+)\s+([A-Za-z0-9-]+)/ and do { | /^\@c\s+man\s+begin\s+([A-Z]+)\s+([A-Za-z0-9-]+)/ and do { | ||||
| $output = 1 if exists $defs{$2}; | |||||
| $output = 1 if exists $defs{$2}; | |||||
| $sect = $1; | $sect = $1; | ||||
| next; | |||||
| next; | |||||
| }; | }; | ||||
| /^\@c\s+man\s+begin\s+([A-Z]+)/ and $sect = $1, $output = 1, next; | /^\@c\s+man\s+begin\s+([A-Z]+)/ and $sect = $1, $output = 1, next; | ||||
| /^\@c\s+man\s+end/ and do { | /^\@c\s+man\s+end/ and do { | ||||
| $sects{$sect} = "" unless exists $sects{$sect}; | |||||
| $sects{$sect} .= postprocess($section); | |||||
| $section = ""; | |||||
| $output = 0; | |||||
| next; | |||||
| $sects{$sect} = "" unless exists $sects{$sect}; | |||||
| $sects{$sect} .= postprocess($section); | |||||
| $section = ""; | |||||
| $output = 0; | |||||
| next; | |||||
| }; | }; | ||||
| # handle variables | # handle variables | ||||
| /^\@set\s+([a-zA-Z0-9_-]+)\s*(.*)$/ and do { | /^\@set\s+([a-zA-Z0-9_-]+)\s*(.*)$/ and do { | ||||
| $defs{$1} = $2; | |||||
| next; | |||||
| $defs{$1} = $2; | |||||
| next; | |||||
| }; | }; | ||||
| /^\@clear\s+([a-zA-Z0-9_-]+)/ and do { | /^\@clear\s+([a-zA-Z0-9_-]+)/ and do { | ||||
| delete $defs{$1}; | |||||
| next; | |||||
| delete $defs{$1}; | |||||
| next; | |||||
| }; | }; | ||||
| next unless $output; | next unless $output; | ||||
| @@ -135,55 +135,55 @@ while(<$inf>) { | |||||
| # End-block handler goes up here because it needs to operate even | # End-block handler goes up here because it needs to operate even | ||||
| # if we are skipping. | # if we are skipping. | ||||
| /^\@end\s+([a-z]+)/ and do { | /^\@end\s+([a-z]+)/ and do { | ||||
| # Ignore @end foo, where foo is not an operation which may | |||||
| # cause us to skip, if we are presently skipping. | |||||
| my $ended = $1; | |||||
| next if $skipping && $ended !~ /^(?:ifset|ifclear|ignore|menu|iftex)$/; | |||||
| die "\@end $ended without \@$ended at line $.\n" unless defined $endw; | |||||
| die "\@$endw ended by \@end $ended at line $.\n" unless $ended eq $endw; | |||||
| $endw = pop @endwstack; | |||||
| if ($ended =~ /^(?:ifset|ifclear|ignore|menu|iftex)$/) { | |||||
| $skipping = pop @skstack; | |||||
| next; | |||||
| } elsif ($ended =~ /^(?:example|smallexample|display)$/) { | |||||
| $shift = ""; | |||||
| $_ = ""; # need a paragraph break | |||||
| } elsif ($ended =~ /^(?:itemize|enumerate|[fv]?table)$/) { | |||||
| $_ = "\n=back\n"; | |||||
| $ic = pop @icstack; | |||||
| } else { | |||||
| die "unknown command \@end $ended at line $.\n"; | |||||
| } | |||||
| # Ignore @end foo, where foo is not an operation which may | |||||
| # cause us to skip, if we are presently skipping. | |||||
| my $ended = $1; | |||||
| next if $skipping && $ended !~ /^(?:ifset|ifclear|ignore|menu|iftex)$/; | |||||
| die "\@end $ended without \@$ended at line $.\n" unless defined $endw; | |||||
| die "\@$endw ended by \@end $ended at line $.\n" unless $ended eq $endw; | |||||
| $endw = pop @endwstack; | |||||
| if ($ended =~ /^(?:ifset|ifclear|ignore|menu|iftex)$/) { | |||||
| $skipping = pop @skstack; | |||||
| next; | |||||
| } elsif ($ended =~ /^(?:example|smallexample|display)$/) { | |||||
| $shift = ""; | |||||
| $_ = ""; # need a paragraph break | |||||
| } elsif ($ended =~ /^(?:itemize|enumerate|[fv]?table)$/) { | |||||
| $_ = "\n=back\n"; | |||||
| $ic = pop @icstack; | |||||
| } else { | |||||
| die "unknown command \@end $ended at line $.\n"; | |||||
| } | |||||
| }; | }; | ||||
| # We must handle commands which can cause skipping even while we | # We must handle commands which can cause skipping even while we | ||||
| # are skipping, otherwise we will not process nested conditionals | # are skipping, otherwise we will not process nested conditionals | ||||
| # correctly. | # correctly. | ||||
| /^\@ifset\s+([a-zA-Z0-9_-]+)/ and do { | /^\@ifset\s+([a-zA-Z0-9_-]+)/ and do { | ||||
| push @endwstack, $endw; | |||||
| push @skstack, $skipping; | |||||
| $endw = "ifset"; | |||||
| $skipping = 1 unless exists $defs{$1}; | |||||
| next; | |||||
| push @endwstack, $endw; | |||||
| push @skstack, $skipping; | |||||
| $endw = "ifset"; | |||||
| $skipping = 1 unless exists $defs{$1}; | |||||
| next; | |||||
| }; | }; | ||||
| /^\@ifclear\s+([a-zA-Z0-9_-]+)/ and do { | /^\@ifclear\s+([a-zA-Z0-9_-]+)/ and do { | ||||
| push @endwstack, $endw; | |||||
| push @skstack, $skipping; | |||||
| $endw = "ifclear"; | |||||
| $skipping = 1 if exists $defs{$1}; | |||||
| next; | |||||
| push @endwstack, $endw; | |||||
| push @skstack, $skipping; | |||||
| $endw = "ifclear"; | |||||
| $skipping = 1 if exists $defs{$1}; | |||||
| next; | |||||
| }; | }; | ||||
| /^\@(ignore|menu|iftex)\b/ and do { | /^\@(ignore|menu|iftex)\b/ and do { | ||||
| push @endwstack, $endw; | |||||
| push @skstack, $skipping; | |||||
| $endw = $1; | |||||
| $skipping = 1; | |||||
| next; | |||||
| push @endwstack, $endw; | |||||
| push @skstack, $skipping; | |||||
| $endw = $1; | |||||
| $skipping = 1; | |||||
| next; | |||||
| }; | }; | ||||
| next if $skipping; | next if $skipping; | ||||
| @@ -210,85 +210,85 @@ while(<$inf>) { | |||||
| # Inside a verbatim block, handle @var specially. | # Inside a verbatim block, handle @var specially. | ||||
| if ($shift ne "") { | if ($shift ne "") { | ||||
| s/\@var\{([^\}]*)\}/<$1>/g; | |||||
| s/\@var\{([^\}]*)\}/<$1>/g; | |||||
| } | } | ||||
| # POD doesn't interpret E<> inside a verbatim block. | # POD doesn't interpret E<> inside a verbatim block. | ||||
| if ($shift eq "") { | if ($shift eq "") { | ||||
| s/</</g; | |||||
| s/>/>/g; | |||||
| s/</</g; | |||||
| s/>/>/g; | |||||
| } else { | } else { | ||||
| s/</</g; | |||||
| s/>/>/g; | |||||
| s/</</g; | |||||
| s/>/>/g; | |||||
| } | } | ||||
| # Single line command handlers. | # Single line command handlers. | ||||
| /^\@include\s+(.+)$/ and do { | /^\@include\s+(.+)$/ and do { | ||||
| push @instack, $inf; | |||||
| $inf = gensym(); | |||||
| # Try cwd and $ibase. | |||||
| open($inf, "<" . $1) | |||||
| or open($inf, "<" . $ibase . "/" . $1) | |||||
| or die "cannot open $1 or $ibase/$1: $!\n"; | |||||
| next; | |||||
| push @instack, $inf; | |||||
| $inf = gensym(); | |||||
| # Try cwd and $ibase. | |||||
| open($inf, "<" . $1) | |||||
| or open($inf, "<" . $ibase . "/" . $1) | |||||
| or die "cannot open $1 or $ibase/$1: $!\n"; | |||||
| next; | |||||
| }; | }; | ||||
| /^\@(?:section|unnumbered|unnumberedsec|center)\s+(.+)$/ | /^\@(?:section|unnumbered|unnumberedsec|center)\s+(.+)$/ | ||||
| and $_ = "\n=head2 $1\n"; | |||||
| and $_ = "\n=head2 $1\n"; | |||||
| /^\@subsection\s+(.+)$/ | /^\@subsection\s+(.+)$/ | ||||
| and $_ = "\n=head3 $1\n"; | |||||
| and $_ = "\n=head3 $1\n"; | |||||
| # Block command handlers: | # Block command handlers: | ||||
| /^\@itemize\s+(\@[a-z]+|\*|-)/ and do { | /^\@itemize\s+(\@[a-z]+|\*|-)/ and do { | ||||
| push @endwstack, $endw; | |||||
| push @icstack, $ic; | |||||
| $ic = $1; | |||||
| $_ = "\n=over 4\n"; | |||||
| $endw = "itemize"; | |||||
| push @endwstack, $endw; | |||||
| push @icstack, $ic; | |||||
| $ic = $1; | |||||
| $_ = "\n=over 4\n"; | |||||
| $endw = "itemize"; | |||||
| }; | }; | ||||
| /^\@enumerate(?:\s+([a-zA-Z0-9]+))?/ and do { | /^\@enumerate(?:\s+([a-zA-Z0-9]+))?/ and do { | ||||
| push @endwstack, $endw; | |||||
| push @icstack, $ic; | |||||
| if (defined $1) { | |||||
| $ic = $1 . "."; | |||||
| } else { | |||||
| $ic = "1."; | |||||
| } | |||||
| $_ = "\n=over 4\n"; | |||||
| $endw = "enumerate"; | |||||
| push @endwstack, $endw; | |||||
| push @icstack, $ic; | |||||
| if (defined $1) { | |||||
| $ic = $1 . "."; | |||||
| } else { | |||||
| $ic = "1."; | |||||
| } | |||||
| $_ = "\n=over 4\n"; | |||||
| $endw = "enumerate"; | |||||
| }; | }; | ||||
| /^\@([fv]?table)\s+(\@[a-z]+)/ and do { | /^\@([fv]?table)\s+(\@[a-z]+)/ and do { | ||||
| push @endwstack, $endw; | |||||
| push @icstack, $ic; | |||||
| $endw = $1; | |||||
| $ic = $2; | |||||
| $ic =~ s/\@(?:samp|strong|key|gcctabopt|option|env)/B/; | |||||
| $ic =~ s/\@(?:code|kbd)/C/; | |||||
| $ic =~ s/\@(?:dfn|var|emph|cite|i)/I/; | |||||
| $ic =~ s/\@(?:file)/F/; | |||||
| $_ = "\n=over 4\n"; | |||||
| push @endwstack, $endw; | |||||
| push @icstack, $ic; | |||||
| $endw = $1; | |||||
| $ic = $2; | |||||
| $ic =~ s/\@(?:samp|strong|key|gcctabopt|option|env)/B/; | |||||
| $ic =~ s/\@(?:code|kbd)/C/; | |||||
| $ic =~ s/\@(?:dfn|var|emph|cite|i)/I/; | |||||
| $ic =~ s/\@(?:file)/F/; | |||||
| $_ = "\n=over 4\n"; | |||||
| }; | }; | ||||
| /^\@((?:small)?example|display)/ and do { | /^\@((?:small)?example|display)/ and do { | ||||
| push @endwstack, $endw; | |||||
| $endw = $1; | |||||
| $shift = "\t"; | |||||
| $_ = ""; # need a paragraph break | |||||
| push @endwstack, $endw; | |||||
| $endw = $1; | |||||
| $shift = "\t"; | |||||
| $_ = ""; # need a paragraph break | |||||
| }; | }; | ||||
| /^\@itemx?\s*(.+)?$/ and do { | /^\@itemx?\s*(.+)?$/ and do { | ||||
| if (defined $1) { | |||||
| # Entity escapes prevent munging by the <> processing below. | |||||
| $_ = "\n=item $ic\<$1\>\n"; | |||||
| } else { | |||||
| $_ = "\n=item $ic\n"; | |||||
| $ic =~ y/A-Ya-y/B-Zb-z/; | |||||
| $ic =~ s/(\d+)/$1 + 1/eg; | |||||
| } | |||||
| if (defined $1) { | |||||
| # Entity escapes prevent munging by the <> processing below. | |||||
| $_ = "\n=item $ic\<$1\>\n"; | |||||
| } else { | |||||
| $_ = "\n=item $ic\n"; | |||||
| $ic =~ y/A-Ya-y/B-Zb-z/; | |||||
| $ic =~ s/(\d+)/$1 + 1/eg; | |||||
| } | |||||
| }; | }; | ||||
| $section .= $shift.$_."\n"; | $section .= $shift.$_."\n"; | ||||
| @@ -304,13 +304,13 @@ $sects{NAME} = "$fn \- $tl\n"; | |||||
| $sects{FOOTNOTES} .= "=back\n" if exists $sects{FOOTNOTES}; | $sects{FOOTNOTES} .= "=back\n" if exists $sects{FOOTNOTES}; | ||||
| for $sect (qw(NAME SYNOPSIS DESCRIPTION OPTIONS EXAMPLES ENVIRONMENT FILES | for $sect (qw(NAME SYNOPSIS DESCRIPTION OPTIONS EXAMPLES ENVIRONMENT FILES | ||||
| BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) { | |||||
| BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) { | |||||
| if(exists $sects{$sect}) { | if(exists $sects{$sect}) { | ||||
| $head = $sect; | |||||
| $head =~ s/SEEALSO/SEE ALSO/; | |||||
| print "=head1 $head\n\n"; | |||||
| print scalar unmunge ($sects{$sect}); | |||||
| print "\n"; | |||||
| $head = $sect; | |||||
| $head =~ s/SEEALSO/SEE ALSO/; | |||||
| print "=head1 $head\n\n"; | |||||
| print scalar unmunge ($sects{$sect}); | |||||
| print "\n"; | |||||
| } | } | ||||
| } | } | ||||
| @@ -325,13 +325,13 @@ sub postprocess | |||||
| # @value{foo} is replaced by whatever 'foo' is defined as. | # @value{foo} is replaced by whatever 'foo' is defined as. | ||||
| while (m/(\@value\{([a-zA-Z0-9_-]+)\})/g) { | while (m/(\@value\{([a-zA-Z0-9_-]+)\})/g) { | ||||
| if (! exists $defs{$2}) { | |||||
| print STDERR "Option $2 not defined\n"; | |||||
| s/\Q$1\E//; | |||||
| } else { | |||||
| $value = $defs{$2}; | |||||
| s/\Q$1\E/$value/; | |||||
| } | |||||
| if (! exists $defs{$2}) { | |||||
| print STDERR "Option $2 not defined\n"; | |||||
| s/\Q$1\E//; | |||||
| } else { | |||||
| $value = $defs{$2}; | |||||
| s/\Q$1\E/$value/; | |||||
| } | |||||
| } | } | ||||
| # Formatting commands. | # Formatting commands. | ||||
| @@ -381,9 +381,9 @@ sub postprocess | |||||
| # processing because otherwise the regexp will choke on formatting | # processing because otherwise the regexp will choke on formatting | ||||
| # inside @footnote. | # inside @footnote. | ||||
| while (/\@footnote/g) { | while (/\@footnote/g) { | ||||
| s/\@footnote\{([^\}]+)\}/[$fnno]/; | |||||
| add_footnote($1, $fnno); | |||||
| $fnno++; | |||||
| s/\@footnote\{([^\}]+)\}/[$fnno]/; | |||||
| add_footnote($1, $fnno); | |||||
| $fnno++; | |||||
| } | } | ||||
| return $_; | return $_; | ||||
| @@ -406,7 +406,7 @@ sub unmunge | |||||
| sub add_footnote | sub add_footnote | ||||
| { | { | ||||
| unless (exists $sects{FOOTNOTES}) { | unless (exists $sects{FOOTNOTES}) { | ||||
| $sects{FOOTNOTES} = "\n=over 4\n\n"; | |||||
| $sects{FOOTNOTES} = "\n=over 4\n\n"; | |||||
| } | } | ||||
| $sects{FOOTNOTES} .= "=item $fnno.\n\n"; $fnno++; | $sects{FOOTNOTES} .= "=item $fnno.\n\n"; $fnno++; | ||||
| @@ -419,9 +419,9 @@ sub add_footnote | |||||
| my $genseq = 0; | my $genseq = 0; | ||||
| sub gensym | sub gensym | ||||
| { | { | ||||
| my $name = "GEN" . $genseq++; | |||||
| my $ref = \*{$name}; | |||||
| delete $::{$name}; | |||||
| return $ref; | |||||
| my $name = "GEN" . $genseq++; | |||||
| my $ref = \*{$name}; | |||||
| delete $::{$name}; | |||||
| return $ref; | |||||
| } | } | ||||
| } | } | ||||
| @@ -579,7 +579,7 @@ static void do_audio_out(AVFormatContext *s, | |||||
| break; | break; | ||||
| } | } | ||||
| ret = avcodec_encode_audio(enc, audio_out, size_out, | ret = avcodec_encode_audio(enc, audio_out, size_out, | ||||
| (short *)buftmp); | |||||
| (short *)buftmp); | |||||
| audio_size += ret; | audio_size += ret; | ||||
| pkt.stream_index= ost->index; | pkt.stream_index= ost->index; | ||||
| pkt.data= audio_out; | pkt.data= audio_out; | ||||
| @@ -821,10 +821,10 @@ static void do_video_out(AVFormatContext *s, | |||||
| padcolor); | padcolor); | ||||
| } | } | ||||
| if (enc->pix_fmt != PIX_FMT_YUV420P) { | |||||
| if (enc->pix_fmt != PIX_FMT_YUV420P) { | |||||
| int size; | int size; | ||||
| av_free(buf); | |||||
| av_free(buf); | |||||
| /* create temporary picture */ | /* create temporary picture */ | ||||
| size = avpicture_get_size(enc->pix_fmt, enc->width, enc->height); | size = avpicture_get_size(enc->pix_fmt, enc->width, enc->height); | ||||
| buf = av_malloc(size); | buf = av_malloc(size); | ||||
| @@ -842,7 +842,7 @@ static void do_video_out(AVFormatContext *s, | |||||
| goto the_end; | goto the_end; | ||||
| } | } | ||||
| } | |||||
| } | |||||
| } else if (ost->video_crop) { | } else if (ost->video_crop) { | ||||
| picture_crop_temp.data[0] = formatted_picture->data[0] + | picture_crop_temp.data[0] = formatted_picture->data[0] + | ||||
| (ost->topBand * formatted_picture->linesize[0]) + ost->leftBand; | (ost->topBand * formatted_picture->linesize[0]) + ost->leftBand; | ||||
| @@ -921,7 +921,7 @@ static void do_video_out(AVFormatContext *s, | |||||
| avoid any copies. We support temorarily the older | avoid any copies. We support temorarily the older | ||||
| method. */ | method. */ | ||||
| AVFrame* old_frame = enc->coded_frame; | AVFrame* old_frame = enc->coded_frame; | ||||
| enc->coded_frame = dec->coded_frame; //FIXME/XXX remove this hack | |||||
| enc->coded_frame = dec->coded_frame; //FIXME/XXX remove this hack | |||||
| pkt.data= (uint8_t *)final_picture; | pkt.data= (uint8_t *)final_picture; | ||||
| pkt.size= sizeof(AVPicture); | pkt.size= sizeof(AVPicture); | ||||
| if(dec->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE) | if(dec->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE) | ||||
| @@ -930,7 +930,7 @@ static void do_video_out(AVFormatContext *s, | |||||
| pkt.flags |= PKT_FLAG_KEY; | pkt.flags |= PKT_FLAG_KEY; | ||||
| av_interleaved_write_frame(s, &pkt); | av_interleaved_write_frame(s, &pkt); | ||||
| enc->coded_frame = old_frame; | |||||
| enc->coded_frame = old_frame; | |||||
| } else { | } else { | ||||
| AVFrame big_picture; | AVFrame big_picture; | ||||
| @@ -1044,8 +1044,8 @@ static void do_video_stats(AVFormatContext *os, AVOutputStream *ost, | |||||
| } | } | ||||
| static void print_report(AVFormatContext **output_files, | static void print_report(AVFormatContext **output_files, | ||||
| AVOutputStream **ost_table, int nb_ostreams, | |||||
| int is_last_report) | |||||
| AVOutputStream **ost_table, int nb_ostreams, | |||||
| int is_last_report) | |||||
| { | { | ||||
| char buf[1024]; | char buf[1024]; | ||||
| AVOutputStream *ost; | AVOutputStream *ost; | ||||
| @@ -1138,9 +1138,9 @@ static void print_report(AVFormatContext **output_files, | |||||
| "size=%8.0fkB time=%0.1f bitrate=%6.1fkbits/s", | "size=%8.0fkB time=%0.1f bitrate=%6.1fkbits/s", | ||||
| (double)total_size / 1024, ti1, bitrate); | (double)total_size / 1024, ti1, bitrate); | ||||
| if (verbose > 1) | |||||
| snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " dup=%d drop=%d", | |||||
| nb_frames_dup, nb_frames_drop); | |||||
| if (verbose > 1) | |||||
| snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " dup=%d drop=%d", | |||||
| nb_frames_dup, nb_frames_drop); | |||||
| if (verbose >= 0) | if (verbose >= 0) | ||||
| fprintf(stderr, "%s \r", buf); | fprintf(stderr, "%s \r", buf); | ||||
| @@ -1323,7 +1323,7 @@ static int output_packet(AVInputStream *ist, int ist_index, | |||||
| } | } | ||||
| #endif | #endif | ||||
| /* if output time reached then transcode raw format, | /* if output time reached then transcode raw format, | ||||
| encode packets and output them */ | |||||
| encode packets and output them */ | |||||
| if (start_time == 0 || ist->pts >= start_time) | if (start_time == 0 || ist->pts >= start_time) | ||||
| for(i=0;i<nb_ostreams;i++) { | for(i=0;i<nb_ostreams;i++) { | ||||
| int frame_size; | int frame_size; | ||||
| @@ -1898,7 +1898,7 @@ static int av_encode(AVFormatContext **output_files, | |||||
| /* init pts */ | /* init pts */ | ||||
| for(i=0;i<nb_istreams;i++) { | for(i=0;i<nb_istreams;i++) { | ||||
| ist = ist_table[i]; | ist = ist_table[i]; | ||||
| is = input_files[ist->file_index]; | |||||
| is = input_files[ist->file_index]; | |||||
| ist->pts = 0; | ist->pts = 0; | ||||
| ist->next_pts = av_rescale_q(ist->st->start_time, ist->st->time_base, AV_TIME_BASE_Q); | ist->next_pts = av_rescale_q(ist->st->start_time, ist->st->time_base, AV_TIME_BASE_Q); | ||||
| if(ist->st->start_time == AV_NOPTS_VALUE) | if(ist->st->start_time == AV_NOPTS_VALUE) | ||||
| @@ -2273,7 +2273,7 @@ static void opt_frame_rate(const char *arg) | |||||
| { | { | ||||
| if (parse_frame_rate(&frame_rate, &frame_rate_base, arg) < 0) { | if (parse_frame_rate(&frame_rate, &frame_rate_base, arg) < 0) { | ||||
| fprintf(stderr, "Incorrect frame rate\n"); | fprintf(stderr, "Incorrect frame rate\n"); | ||||
| exit(1); | |||||
| exit(1); | |||||
| } | } | ||||
| } | } | ||||
| @@ -2289,7 +2289,7 @@ static void opt_frame_crop_top(const char *arg) | |||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| if ((frame_topBand) >= frame_height){ | if ((frame_topBand) >= frame_height){ | ||||
| fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); | |||||
| fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); | |||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| frame_height -= frame_topBand; | frame_height -= frame_topBand; | ||||
| @@ -2307,7 +2307,7 @@ static void opt_frame_crop_bottom(const char *arg) | |||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| if ((frame_bottomBand) >= frame_height){ | if ((frame_bottomBand) >= frame_height){ | ||||
| fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); | |||||
| fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); | |||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| frame_height -= frame_bottomBand; | frame_height -= frame_bottomBand; | ||||
| @@ -2325,7 +2325,7 @@ static void opt_frame_crop_left(const char *arg) | |||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| if ((frame_leftBand) >= frame_width){ | if ((frame_leftBand) >= frame_width){ | ||||
| fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); | |||||
| fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); | |||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| frame_width -= frame_leftBand; | frame_width -= frame_leftBand; | ||||
| @@ -2343,7 +2343,7 @@ static void opt_frame_crop_right(const char *arg) | |||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| if ((frame_rightBand) >= frame_width){ | if ((frame_rightBand) >= frame_width){ | ||||
| fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); | |||||
| fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); | |||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| frame_width -= frame_rightBand; | frame_width -= frame_rightBand; | ||||
| @@ -2364,7 +2364,7 @@ static void opt_frame_size(const char *arg) | |||||
| #define SCALEBITS 10 | #define SCALEBITS 10 | ||||
| #define ONE_HALF (1 << (SCALEBITS - 1)) | #define ONE_HALF (1 << (SCALEBITS - 1)) | ||||
| #define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5)) | |||||
| #define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5)) | |||||
| #define RGB_TO_Y(r, g, b) \ | #define RGB_TO_Y(r, g, b) \ | ||||
| ((FIX(0.29900) * (r) + FIX(0.58700) * (g) + \ | ((FIX(0.29900) * (r) + FIX(0.58700) * (g) + \ | ||||
| @@ -2462,16 +2462,16 @@ static void opt_frame_aspect_ratio(const char *arg) | |||||
| p = strchr(arg, ':'); | p = strchr(arg, ':'); | ||||
| if (p) { | if (p) { | ||||
| x = strtol(arg, (char **)&arg, 10); | x = strtol(arg, (char **)&arg, 10); | ||||
| if (arg == p) | |||||
| y = strtol(arg+1, (char **)&arg, 10); | |||||
| if (x > 0 && y > 0) | |||||
| ar = (double)x / (double)y; | |||||
| if (arg == p) | |||||
| y = strtol(arg+1, (char **)&arg, 10); | |||||
| if (x > 0 && y > 0) | |||||
| ar = (double)x / (double)y; | |||||
| } else | } else | ||||
| ar = strtod(arg, (char **)&arg); | ar = strtod(arg, (char **)&arg); | ||||
| if (!ar) { | if (!ar) { | ||||
| fprintf(stderr, "Incorrect aspect ratio specification.\n"); | fprintf(stderr, "Incorrect aspect ratio specification.\n"); | ||||
| exit(1); | |||||
| exit(1); | |||||
| } | } | ||||
| frame_aspect_ratio = ar; | frame_aspect_ratio = ar; | ||||
| } | } | ||||
| @@ -2957,8 +2957,8 @@ static void opt_input_file(const char *filename) | |||||
| } | } | ||||
| frame_height = enc->height; | frame_height = enc->height; | ||||
| frame_width = enc->width; | frame_width = enc->width; | ||||
| frame_aspect_ratio = av_q2d(enc->sample_aspect_ratio) * enc->width / enc->height; | |||||
| frame_pix_fmt = enc->pix_fmt; | |||||
| frame_aspect_ratio = av_q2d(enc->sample_aspect_ratio) * enc->width / enc->height; | |||||
| frame_pix_fmt = enc->pix_fmt; | |||||
| rfps = ic->streams[i]->r_frame_rate.num; | rfps = ic->streams[i]->r_frame_rate.num; | ||||
| rfps_base = ic->streams[i]->r_frame_rate.den; | rfps_base = ic->streams[i]->r_frame_rate.den; | ||||
| enc->workaround_bugs = workaround_bugs; | enc->workaround_bugs = workaround_bugs; | ||||
| @@ -3454,7 +3454,7 @@ static void opt_output_file(const char *filename) | |||||
| oc->timestamp = rec_timestamp; | oc->timestamp = rec_timestamp; | ||||
| if (str_title) | |||||
| if (str_title) | |||||
| pstrcpy(oc->title, sizeof(oc->title), str_title); | pstrcpy(oc->title, sizeof(oc->title), str_title); | ||||
| if (str_author) | if (str_author) | ||||
| pstrcpy(oc->author, sizeof(oc->author), str_author); | pstrcpy(oc->author, sizeof(oc->author), str_author); | ||||
| @@ -3490,11 +3490,11 @@ static void opt_output_file(const char *filename) | |||||
| fprintf(stderr, "Not overwriting - exiting\n"); | fprintf(stderr, "Not overwriting - exiting\n"); | ||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| } | |||||
| else { | |||||
| } | |||||
| else { | |||||
| fprintf(stderr,"File '%s' already exists. Exiting.\n", filename); | fprintf(stderr,"File '%s' already exists. Exiting.\n", filename); | ||||
| exit(1); | exit(1); | ||||
| } | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| @@ -3579,14 +3579,14 @@ static void prepare_grab(void) | |||||
| fmt1 = av_find_input_format(video_grab_format); | fmt1 = av_find_input_format(video_grab_format); | ||||
| vp->device = video_device; | vp->device = video_device; | ||||
| vp->channel = video_channel; | vp->channel = video_channel; | ||||
| vp->standard = video_standard; | |||||
| vp->standard = video_standard; | |||||
| if (av_open_input_file(&ic, "", fmt1, 0, vp) < 0) { | if (av_open_input_file(&ic, "", fmt1, 0, vp) < 0) { | ||||
| fprintf(stderr, "Could not find video grab device\n"); | fprintf(stderr, "Could not find video grab device\n"); | ||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| /* If not enough info to get the stream parameters, we decode the | /* If not enough info to get the stream parameters, we decode the | ||||
| first frames to get it. */ | first frames to get it. */ | ||||
| if ((ic->ctx_flags & AVFMTCTX_NOHEADER) && av_find_stream_info(ic) < 0) { | |||||
| if ((ic->ctx_flags & AVFMTCTX_NOHEADER) && av_find_stream_info(ic) < 0) { | |||||
| fprintf(stderr, "Could not find video grab parameters\n"); | fprintf(stderr, "Could not find video grab parameters\n"); | ||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| @@ -4276,11 +4276,11 @@ int main(int argc, char **argv) | |||||
| for(i=0;i<nb_output_files;i++) { | for(i=0;i<nb_output_files;i++) { | ||||
| /* maybe av_close_output_file ??? */ | /* maybe av_close_output_file ??? */ | ||||
| AVFormatContext *s = output_files[i]; | AVFormatContext *s = output_files[i]; | ||||
| int j; | |||||
| int j; | |||||
| if (!(s->oformat->flags & AVFMT_NOFILE)) | if (!(s->oformat->flags & AVFMT_NOFILE)) | ||||
| url_fclose(&s->pb); | |||||
| for(j=0;j<s->nb_streams;j++) | |||||
| av_free(s->streams[j]); | |||||
| url_fclose(&s->pb); | |||||
| for(j=0;j<s->nb_streams;j++) | |||||
| av_free(s->streams[j]); | |||||
| av_free(s); | av_free(s); | ||||
| } | } | ||||
| for(i=0;i<nb_input_files;i++) | for(i=0;i<nb_input_files;i++) | ||||
| @@ -1649,7 +1649,7 @@ static int stream_component_open(VideoState *is, int stream_index) | |||||
| memset(&is->audio_pkt, 0, sizeof(is->audio_pkt)); | memset(&is->audio_pkt, 0, sizeof(is->audio_pkt)); | ||||
| packet_queue_init(&is->audioq); | packet_queue_init(&is->audioq); | ||||
| SDL_PauseAudio(0); | |||||
| SDL_PauseAudio(0); | |||||
| break; | break; | ||||
| case CODEC_TYPE_VIDEO: | case CODEC_TYPE_VIDEO: | ||||
| is->video_stream = stream_index; | is->video_stream = stream_index; | ||||
| @@ -1937,11 +1937,11 @@ static int decode_thread(void *arg) | |||||
| } | } | ||||
| ret = av_read_frame(ic, pkt); | ret = av_read_frame(ic, pkt); | ||||
| if (ret < 0) { | if (ret < 0) { | ||||
| if (url_ferror(&ic->pb) == 0) { | |||||
| if (url_ferror(&ic->pb) == 0) { | |||||
| SDL_Delay(100); /* wait for user event */ | SDL_Delay(100); /* wait for user event */ | ||||
| continue; | |||||
| } else | |||||
| break; | |||||
| continue; | |||||
| } else | |||||
| break; | |||||
| } | } | ||||
| if (pkt->stream_index == is->audio_stream) { | if (pkt->stream_index == is->audio_stream) { | ||||
| packet_queue_put(&is->audioq, pkt); | packet_queue_put(&is->audioq, pkt); | ||||
| @@ -2224,23 +2224,23 @@ void event_loop(void) | |||||
| } | } | ||||
| break; | break; | ||||
| case SDL_MOUSEBUTTONDOWN: | case SDL_MOUSEBUTTONDOWN: | ||||
| if (cur_stream) { | |||||
| int ns, hh, mm, ss; | |||||
| int tns, thh, tmm, tss; | |||||
| tns = cur_stream->ic->duration/1000000LL; | |||||
| thh = tns/3600; | |||||
| tmm = (tns%3600)/60; | |||||
| tss = (tns%60); | |||||
| frac = (double)event.button.x/(double)cur_stream->width; | |||||
| ns = frac*tns; | |||||
| hh = ns/3600; | |||||
| mm = (ns%3600)/60; | |||||
| ss = (ns%60); | |||||
| fprintf(stderr, "Seek to %2.0f%% (%2d:%02d:%02d) of total duration (%2d:%02d:%02d) \n", frac*100, | |||||
| hh, mm, ss, thh, tmm, tss); | |||||
| stream_seek(cur_stream, (int64_t)(cur_stream->ic->start_time+frac*cur_stream->ic->duration), 0); | |||||
| } | |||||
| break; | |||||
| if (cur_stream) { | |||||
| int ns, hh, mm, ss; | |||||
| int tns, thh, tmm, tss; | |||||
| tns = cur_stream->ic->duration/1000000LL; | |||||
| thh = tns/3600; | |||||
| tmm = (tns%3600)/60; | |||||
| tss = (tns%60); | |||||
| frac = (double)event.button.x/(double)cur_stream->width; | |||||
| ns = frac*tns; | |||||
| hh = ns/3600; | |||||
| mm = (ns%3600)/60; | |||||
| ss = (ns%60); | |||||
| fprintf(stderr, "Seek to %2.0f%% (%2d:%02d:%02d) of total duration (%2d:%02d:%02d) \n", frac*100, | |||||
| hh, mm, ss, thh, tmm, tss); | |||||
| stream_seek(cur_stream, (int64_t)(cur_stream->ic->start_time+frac*cur_stream->ic->duration), 0); | |||||
| } | |||||
| break; | |||||
| case SDL_VIDEORESIZE: | case SDL_VIDEORESIZE: | ||||
| if (cur_stream) { | if (cur_stream) { | ||||
| screen = SDL_SetVideoMode(event.resize.w, event.resize.h, 0, | screen = SDL_SetVideoMode(event.resize.w, event.resize.h, 0, | ||||
| @@ -2452,7 +2452,7 @@ int main(int argc, char **argv) | |||||
| if (dpy) { | if (dpy) { | ||||
| fs_screen_width = DisplayWidth(dpy, DefaultScreen(dpy)); | fs_screen_width = DisplayWidth(dpy, DefaultScreen(dpy)); | ||||
| fs_screen_height = DisplayHeight(dpy, DefaultScreen(dpy)); | fs_screen_height = DisplayHeight(dpy, DefaultScreen(dpy)); | ||||
| XCloseDisplay(dpy); | |||||
| XCloseDisplay(dpy); | |||||
| } | } | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -1204,7 +1204,7 @@ static int http_parse_request(HTTPContext *c) | |||||
| pstrcpy(c->protocol, sizeof(c->protocol), protocol); | pstrcpy(c->protocol, sizeof(c->protocol), protocol); | ||||
| if (ffserver_debug) | if (ffserver_debug) | ||||
| http_log("New connection: %s %s\n", cmd, url); | |||||
| http_log("New connection: %s %s\n", cmd, url); | |||||
| /* find the filename and the optional info string in the request */ | /* find the filename and the optional info string in the request */ | ||||
| p = url; | p = url; | ||||
| @@ -2001,7 +2001,7 @@ static int http_prepare_data(HTTPContext *c) | |||||
| c->fmt_ctx.nb_streams = c->stream->nb_streams; | c->fmt_ctx.nb_streams = c->stream->nb_streams; | ||||
| for(i=0;i<c->fmt_ctx.nb_streams;i++) { | for(i=0;i<c->fmt_ctx.nb_streams;i++) { | ||||
| AVStream *st; | AVStream *st; | ||||
| AVStream *src; | |||||
| AVStream *src; | |||||
| st = av_mallocz(sizeof(AVStream)); | st = av_mallocz(sizeof(AVStream)); | ||||
| st->codec= avcodec_alloc_context(); | st->codec= avcodec_alloc_context(); | ||||
| c->fmt_ctx.streams[i] = st; | c->fmt_ctx.streams[i] = st; | ||||
| @@ -2012,8 +2012,8 @@ static int http_prepare_data(HTTPContext *c) | |||||
| else | else | ||||
| src = c->stream->feed->streams[c->stream->feed_streams[i]]; | src = c->stream->feed->streams[c->stream->feed_streams[i]]; | ||||
| *st = *src; | |||||
| st->priv_data = 0; | |||||
| *st = *src; | |||||
| st->priv_data = 0; | |||||
| st->codec->frame_number = 0; /* XXX: should be done in | st->codec->frame_number = 0; /* XXX: should be done in | ||||
| AVStream, not in codec */ | AVStream, not in codec */ | ||||
| /* I'm pretty sure that this is not correct... | /* I'm pretty sure that this is not correct... | ||||
| @@ -2452,8 +2452,8 @@ static int http_receive_data(HTTPContext *c) | |||||
| s.priv_data = av_mallocz(fmt_in->priv_data_size); | s.priv_data = av_mallocz(fmt_in->priv_data_size); | ||||
| if (!s.priv_data) | if (!s.priv_data) | ||||
| goto fail; | goto fail; | ||||
| } else | |||||
| s.priv_data = NULL; | |||||
| } else | |||||
| s.priv_data = NULL; | |||||
| if (fmt_in->read_header(&s, 0) < 0) { | if (fmt_in->read_header(&s, 0) < 0) { | ||||
| av_freep(&s.priv_data); | av_freep(&s.priv_data); | ||||
| @@ -3868,20 +3868,20 @@ static int parse_ffconfig(const char *filename) | |||||
| feed->child_argv[i] = av_malloc(30 + strlen(feed->filename)); | feed->child_argv[i] = av_malloc(30 + strlen(feed->filename)); | ||||
| snprintf(feed->child_argv[i], 30+strlen(feed->filename), | |||||
| "http://%s:%d/%s", | |||||
| (my_http_addr.sin_addr.s_addr == INADDR_ANY) ? "127.0.0.1" : | |||||
| inet_ntoa(my_http_addr.sin_addr), | |||||
| ntohs(my_http_addr.sin_port), feed->filename); | |||||
| if (ffserver_debug) | |||||
| { | |||||
| int j; | |||||
| fprintf(stdout, "Launch commandline: "); | |||||
| for (j = 0; j <= i; j++) | |||||
| fprintf(stdout, "%s ", feed->child_argv[j]); | |||||
| fprintf(stdout, "\n"); | |||||
| } | |||||
| snprintf(feed->child_argv[i], 30+strlen(feed->filename), | |||||
| "http://%s:%d/%s", | |||||
| (my_http_addr.sin_addr.s_addr == INADDR_ANY) ? "127.0.0.1" : | |||||
| inet_ntoa(my_http_addr.sin_addr), | |||||
| ntohs(my_http_addr.sin_port), feed->filename); | |||||
| if (ffserver_debug) | |||||
| { | |||||
| int j; | |||||
| fprintf(stdout, "Launch commandline: "); | |||||
| for (j = 0; j <= i; j++) | |||||
| fprintf(stdout, "%s ", feed->child_argv[j]); | |||||
| fprintf(stdout, "\n"); | |||||
| } | |||||
| } | } | ||||
| } else if (!strcasecmp(cmd, "ReadOnlyFile")) { | } else if (!strcasecmp(cmd, "ReadOnlyFile")) { | ||||
| if (feed) { | if (feed) { | ||||
| @@ -4074,8 +4074,8 @@ static int parse_ffconfig(const char *filename) | |||||
| if (stream) { | if (stream) { | ||||
| audio_enc.sample_rate = atoi(arg); | audio_enc.sample_rate = atoi(arg); | ||||
| } | } | ||||
| } else if (!strcasecmp(cmd, "AudioQuality")) { | |||||
| get_arg(arg, sizeof(arg), &p); | |||||
| } else if (!strcasecmp(cmd, "AudioQuality")) { | |||||
| get_arg(arg, sizeof(arg), &p); | |||||
| if (stream) { | if (stream) { | ||||
| // audio_enc.quality = atof(arg) * 1000; | // audio_enc.quality = atof(arg) * 1000; | ||||
| } | } | ||||
| @@ -44,11 +44,11 @@ const enum PixelFormat pixfmt_rgb24[] = {PIX_FMT_BGR24, PIX_FMT_RGBA32, -1}; | |||||
| */ | */ | ||||
| typedef struct EightBpsContext { | typedef struct EightBpsContext { | ||||
| AVCodecContext *avctx; | |||||
| AVFrame pic; | |||||
| AVCodecContext *avctx; | |||||
| AVFrame pic; | |||||
| unsigned char planes; | |||||
| unsigned char planemap[4]; | |||||
| unsigned char planes; | |||||
| unsigned char planemap[4]; | |||||
| } EightBpsContext; | } EightBpsContext; | ||||
| @@ -59,87 +59,87 @@ typedef struct EightBpsContext { | |||||
| */ | */ | ||||
| static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size) | static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size) | ||||
| { | { | ||||
| EightBpsContext * const c = (EightBpsContext *)avctx->priv_data; | |||||
| unsigned char *encoded = (unsigned char *)buf; | |||||
| unsigned char *pixptr, *pixptr_end; | |||||
| unsigned int height = avctx->height; // Real image height | |||||
| unsigned int dlen, p, row; | |||||
| unsigned char *lp, *dp; | |||||
| unsigned char count; | |||||
| unsigned int px_inc; | |||||
| unsigned int planes = c->planes; | |||||
| unsigned char *planemap = c->planemap; | |||||
| if(c->pic.data[0]) | |||||
| avctx->release_buffer(avctx, &c->pic); | |||||
| c->pic.reference = 0; | |||||
| c->pic.buffer_hints = FF_BUFFER_HINTS_VALID; | |||||
| if(avctx->get_buffer(avctx, &c->pic) < 0){ | |||||
| av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |||||
| return -1; | |||||
| } | |||||
| /* Set data pointer after line lengths */ | |||||
| dp = encoded + planes * (height << 1); | |||||
| /* Ignore alpha plane, don't know what to do with it */ | |||||
| if (planes == 4) | |||||
| planes--; | |||||
| px_inc = planes + (avctx->pix_fmt == PIX_FMT_RGBA32); | |||||
| for (p = 0; p < planes; p++) { | |||||
| /* Lines length pointer for this plane */ | |||||
| lp = encoded + p * (height << 1); | |||||
| /* Decode a plane */ | |||||
| for(row = 0; row < height; row++) { | |||||
| pixptr = c->pic.data[0] + row * c->pic.linesize[0] + planemap[p]; | |||||
| pixptr_end = pixptr + c->pic.linesize[0]; | |||||
| dlen = be2me_16(*(unsigned short *)(lp+row*2)); | |||||
| /* Decode a row of this plane */ | |||||
| while(dlen > 0) { | |||||
| if(dp + 1 >= buf+buf_size) return -1; | |||||
| if ((count = *dp++) <= 127) { | |||||
| count++; | |||||
| dlen -= count + 1; | |||||
| if (pixptr + count * px_inc > pixptr_end) | |||||
| break; | |||||
| if(dp + count > buf+buf_size) return -1; | |||||
| while(count--) { | |||||
| *pixptr = *dp++; | |||||
| pixptr += px_inc; | |||||
| } | |||||
| } else { | |||||
| count = 257 - count; | |||||
| if (pixptr + count * px_inc > pixptr_end) | |||||
| break; | |||||
| while(count--) { | |||||
| *pixptr = *dp; | |||||
| pixptr += px_inc; | |||||
| } | |||||
| dp++; | |||||
| dlen -= 2; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| if (avctx->palctrl) { | |||||
| memcpy (c->pic.data[1], avctx->palctrl->palette, AVPALETTE_SIZE); | |||||
| if (avctx->palctrl->palette_changed) { | |||||
| c->pic.palette_has_changed = 1; | |||||
| avctx->palctrl->palette_changed = 0; | |||||
| } else | |||||
| c->pic.palette_has_changed = 0; | |||||
| } | |||||
| *data_size = sizeof(AVFrame); | |||||
| *(AVFrame*)data = c->pic; | |||||
| /* always report that the buffer was completely consumed */ | |||||
| return buf_size; | |||||
| EightBpsContext * const c = (EightBpsContext *)avctx->priv_data; | |||||
| unsigned char *encoded = (unsigned char *)buf; | |||||
| unsigned char *pixptr, *pixptr_end; | |||||
| unsigned int height = avctx->height; // Real image height | |||||
| unsigned int dlen, p, row; | |||||
| unsigned char *lp, *dp; | |||||
| unsigned char count; | |||||
| unsigned int px_inc; | |||||
| unsigned int planes = c->planes; | |||||
| unsigned char *planemap = c->planemap; | |||||
| if(c->pic.data[0]) | |||||
| avctx->release_buffer(avctx, &c->pic); | |||||
| c->pic.reference = 0; | |||||
| c->pic.buffer_hints = FF_BUFFER_HINTS_VALID; | |||||
| if(avctx->get_buffer(avctx, &c->pic) < 0){ | |||||
| av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |||||
| return -1; | |||||
| } | |||||
| /* Set data pointer after line lengths */ | |||||
| dp = encoded + planes * (height << 1); | |||||
| /* Ignore alpha plane, don't know what to do with it */ | |||||
| if (planes == 4) | |||||
| planes--; | |||||
| px_inc = planes + (avctx->pix_fmt == PIX_FMT_RGBA32); | |||||
| for (p = 0; p < planes; p++) { | |||||
| /* Lines length pointer for this plane */ | |||||
| lp = encoded + p * (height << 1); | |||||
| /* Decode a plane */ | |||||
| for(row = 0; row < height; row++) { | |||||
| pixptr = c->pic.data[0] + row * c->pic.linesize[0] + planemap[p]; | |||||
| pixptr_end = pixptr + c->pic.linesize[0]; | |||||
| dlen = be2me_16(*(unsigned short *)(lp+row*2)); | |||||
| /* Decode a row of this plane */ | |||||
| while(dlen > 0) { | |||||
| if(dp + 1 >= buf+buf_size) return -1; | |||||
| if ((count = *dp++) <= 127) { | |||||
| count++; | |||||
| dlen -= count + 1; | |||||
| if (pixptr + count * px_inc > pixptr_end) | |||||
| break; | |||||
| if(dp + count > buf+buf_size) return -1; | |||||
| while(count--) { | |||||
| *pixptr = *dp++; | |||||
| pixptr += px_inc; | |||||
| } | |||||
| } else { | |||||
| count = 257 - count; | |||||
| if (pixptr + count * px_inc > pixptr_end) | |||||
| break; | |||||
| while(count--) { | |||||
| *pixptr = *dp; | |||||
| pixptr += px_inc; | |||||
| } | |||||
| dp++; | |||||
| dlen -= 2; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| if (avctx->palctrl) { | |||||
| memcpy (c->pic.data[1], avctx->palctrl->palette, AVPALETTE_SIZE); | |||||
| if (avctx->palctrl->palette_changed) { | |||||
| c->pic.palette_has_changed = 1; | |||||
| avctx->palctrl->palette_changed = 0; | |||||
| } else | |||||
| c->pic.palette_has_changed = 0; | |||||
| } | |||||
| *data_size = sizeof(AVFrame); | |||||
| *(AVFrame*)data = c->pic; | |||||
| /* always report that the buffer was completely consumed */ | |||||
| return buf_size; | |||||
| } | } | ||||
| @@ -150,53 +150,53 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 | |||||
| */ | */ | ||||
| static int decode_init(AVCodecContext *avctx) | static int decode_init(AVCodecContext *avctx) | ||||
| { | { | ||||
| EightBpsContext * const c = (EightBpsContext *)avctx->priv_data; | |||||
| EightBpsContext * const c = (EightBpsContext *)avctx->priv_data; | |||||
| c->avctx = avctx; | |||||
| avctx->has_b_frames = 0; | |||||
| c->avctx = avctx; | |||||
| avctx->has_b_frames = 0; | |||||
| c->pic.data[0] = NULL; | |||||
| c->pic.data[0] = NULL; | |||||
| if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) { | if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) { | ||||
| return 1; | return 1; | ||||
| } | } | ||||
| switch (avctx->bits_per_sample) { | |||||
| case 8: | |||||
| avctx->pix_fmt = PIX_FMT_PAL8; | |||||
| c->planes = 1; | |||||
| c->planemap[0] = 0; // 1st plane is palette indexes | |||||
| if (avctx->palctrl == NULL) { | |||||
| av_log(avctx, AV_LOG_ERROR, "Error: PAL8 format but no palette from demuxer.\n"); | |||||
| return -1; | |||||
| } | |||||
| break; | |||||
| case 24: | |||||
| avctx->pix_fmt = avctx->get_format(avctx, pixfmt_rgb24); | |||||
| c->planes = 3; | |||||
| c->planemap[0] = 2; // 1st plane is red | |||||
| c->planemap[1] = 1; // 2nd plane is green | |||||
| c->planemap[2] = 0; // 3rd plane is blue | |||||
| break; | |||||
| case 32: | |||||
| avctx->pix_fmt = PIX_FMT_RGBA32; | |||||
| c->planes = 4; | |||||
| switch (avctx->bits_per_sample) { | |||||
| case 8: | |||||
| avctx->pix_fmt = PIX_FMT_PAL8; | |||||
| c->planes = 1; | |||||
| c->planemap[0] = 0; // 1st plane is palette indexes | |||||
| if (avctx->palctrl == NULL) { | |||||
| av_log(avctx, AV_LOG_ERROR, "Error: PAL8 format but no palette from demuxer.\n"); | |||||
| return -1; | |||||
| } | |||||
| break; | |||||
| case 24: | |||||
| avctx->pix_fmt = avctx->get_format(avctx, pixfmt_rgb24); | |||||
| c->planes = 3; | |||||
| c->planemap[0] = 2; // 1st plane is red | |||||
| c->planemap[1] = 1; // 2nd plane is green | |||||
| c->planemap[2] = 0; // 3rd plane is blue | |||||
| break; | |||||
| case 32: | |||||
| avctx->pix_fmt = PIX_FMT_RGBA32; | |||||
| c->planes = 4; | |||||
| #ifdef WORDS_BIGENDIAN | #ifdef WORDS_BIGENDIAN | ||||
| c->planemap[0] = 1; // 1st plane is red | |||||
| c->planemap[1] = 2; // 2nd plane is green | |||||
| c->planemap[2] = 3; // 3rd plane is blue | |||||
| c->planemap[3] = 0; // 4th plane is alpha??? | |||||
| c->planemap[0] = 1; // 1st plane is red | |||||
| c->planemap[1] = 2; // 2nd plane is green | |||||
| c->planemap[2] = 3; // 3rd plane is blue | |||||
| c->planemap[3] = 0; // 4th plane is alpha??? | |||||
| #else | #else | ||||
| c->planemap[0] = 2; // 1st plane is red | |||||
| c->planemap[1] = 1; // 2nd plane is green | |||||
| c->planemap[2] = 0; // 3rd plane is blue | |||||
| c->planemap[3] = 3; // 4th plane is alpha??? | |||||
| c->planemap[0] = 2; // 1st plane is red | |||||
| c->planemap[1] = 1; // 2nd plane is green | |||||
| c->planemap[2] = 0; // 3rd plane is blue | |||||
| c->planemap[3] = 3; // 4th plane is alpha??? | |||||
| #endif | #endif | ||||
| break; | |||||
| default: | |||||
| av_log(avctx, AV_LOG_ERROR, "Error: Unsupported color depth: %u.\n", avctx->bits_per_sample); | |||||
| return -1; | |||||
| } | |||||
| break; | |||||
| default: | |||||
| av_log(avctx, AV_LOG_ERROR, "Error: Unsupported color depth: %u.\n", avctx->bits_per_sample); | |||||
| return -1; | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -211,24 +211,24 @@ static int decode_init(AVCodecContext *avctx) | |||||
| */ | */ | ||||
| static int decode_end(AVCodecContext *avctx) | static int decode_end(AVCodecContext *avctx) | ||||
| { | { | ||||
| EightBpsContext * const c = (EightBpsContext *)avctx->priv_data; | |||||
| EightBpsContext * const c = (EightBpsContext *)avctx->priv_data; | |||||
| if (c->pic.data[0]) | |||||
| avctx->release_buffer(avctx, &c->pic); | |||||
| if (c->pic.data[0]) | |||||
| avctx->release_buffer(avctx, &c->pic); | |||||
| return 0; | |||||
| return 0; | |||||
| } | } | ||||
| AVCodec eightbps_decoder = { | AVCodec eightbps_decoder = { | ||||
| "8bps", | |||||
| CODEC_TYPE_VIDEO, | |||||
| CODEC_ID_8BPS, | |||||
| sizeof(EightBpsContext), | |||||
| decode_init, | |||||
| NULL, | |||||
| decode_end, | |||||
| decode_frame, | |||||
| CODEC_CAP_DR1, | |||||
| "8bps", | |||||
| CODEC_TYPE_VIDEO, | |||||
| CODEC_ID_8BPS, | |||||
| sizeof(EightBpsContext), | |||||
| decode_init, | |||||
| NULL, | |||||
| decode_end, | |||||
| decode_frame, | |||||
| CODEC_CAP_DR1, | |||||
| }; | }; | ||||
| @@ -58,11 +58,11 @@ typedef struct AC3DecodeState { | |||||
| a52_state_t* (*a52_init)(uint32_t mm_accel); | a52_state_t* (*a52_init)(uint32_t mm_accel); | ||||
| sample_t* (*a52_samples)(a52_state_t * state); | sample_t* (*a52_samples)(a52_state_t * state); | ||||
| int (*a52_syncinfo)(uint8_t * buf, int * flags, | int (*a52_syncinfo)(uint8_t * buf, int * flags, | ||||
| int * sample_rate, int * bit_rate); | |||||
| int * sample_rate, int * bit_rate); | |||||
| int (*a52_frame)(a52_state_t * state, uint8_t * buf, int * flags, | int (*a52_frame)(a52_state_t * state, uint8_t * buf, int * flags, | ||||
| sample_t * level, sample_t bias); | |||||
| sample_t * level, sample_t bias); | |||||
| void (*a52_dynrng)(a52_state_t * state, | void (*a52_dynrng)(a52_state_t * state, | ||||
| sample_t (* call) (sample_t, void *), void * data); | |||||
| sample_t (* call) (sample_t, void *), void * data); | |||||
| int (*a52_block)(a52_state_t * state); | int (*a52_block)(a52_state_t * state); | ||||
| void (*a52_free)(a52_state_t * state); | void (*a52_free)(a52_state_t * state); | ||||
| @@ -105,7 +105,7 @@ static int a52_decode_init(AVCodecContext *avctx) | |||||
| if (!s->a52_init || !s->a52_samples || !s->a52_syncinfo | if (!s->a52_init || !s->a52_samples || !s->a52_syncinfo | ||||
| || !s->a52_frame || !s->a52_block || !s->a52_free) | || !s->a52_frame || !s->a52_block || !s->a52_free) | ||||
| { | { | ||||
| dlclose(s->handle); | |||||
| dlclose(s->handle); | |||||
| return -1; | return -1; | ||||
| } | } | ||||
| #else | #else | ||||
| @@ -130,22 +130,22 @@ static int a52_decode_init(AVCodecContext *avctx) | |||||
| static inline int blah (int32_t i) | static inline int blah (int32_t i) | ||||
| { | { | ||||
| if (i > 0x43c07fff) | if (i > 0x43c07fff) | ||||
| return 32767; | |||||
| return 32767; | |||||
| else if (i < 0x43bf8000) | else if (i < 0x43bf8000) | ||||
| return -32768; | |||||
| return -32768; | |||||
| return i - 0x43c00000; | return i - 0x43c00000; | ||||
| } | } | ||||
| static inline void float_to_int (float * _f, int16_t * s16, int nchannels) | static inline void float_to_int (float * _f, int16_t * s16, int nchannels) | ||||
| { | { | ||||
| int i, j, c; | int i, j, c; | ||||
| int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format | |||||
| int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format | |||||
| j = 0; | j = 0; | ||||
| nchannels *= 256; | nchannels *= 256; | ||||
| for (i = 0; i < 256; i++) { | for (i = 0; i < 256; i++) { | ||||
| for (c = 0; c < nchannels; c += 256) | |||||
| s16[j++] = blah (f[i + c]); | |||||
| for (c = 0; c < nchannels; c += 256) | |||||
| s16[j++] = blah (f[i + c]); | |||||
| } | } | ||||
| } | } | ||||
| @@ -164,7 +164,7 @@ static int a52_decode_frame(AVCodecContext *avctx, | |||||
| short *out_samples = data; | short *out_samples = data; | ||||
| float level; | float level; | ||||
| static const int ac3_channels[8] = { | static const int ac3_channels[8] = { | ||||
| 2, 1, 2, 3, 3, 4, 4, 5 | |||||
| 2, 1, 2, 3, 3, 4, 4, 5 | |||||
| }; | }; | ||||
| buf_ptr = buf; | buf_ptr = buf; | ||||
| @@ -186,20 +186,20 @@ static int a52_decode_frame(AVCodecContext *avctx, | |||||
| memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1); | memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1); | ||||
| s->inbuf_ptr--; | s->inbuf_ptr--; | ||||
| } else { | } else { | ||||
| s->frame_size = len; | |||||
| s->frame_size = len; | |||||
| /* update codec info */ | /* update codec info */ | ||||
| avctx->sample_rate = sample_rate; | avctx->sample_rate = sample_rate; | ||||
| s->channels = ac3_channels[s->flags & 7]; | s->channels = ac3_channels[s->flags & 7]; | ||||
| if (s->flags & A52_LFE) | if (s->flags & A52_LFE) | ||||
| s->channels++; | |||||
| if (avctx->channels == 0) | |||||
| /* No specific number of channel requested */ | |||||
| avctx->channels = s->channels; | |||||
| else if (s->channels < avctx->channels) { | |||||
| av_log(avctx, AV_LOG_ERROR, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len); | |||||
| avctx->channels = s->channels; | |||||
| } | |||||
| avctx->bit_rate = bit_rate; | |||||
| s->channels++; | |||||
| if (avctx->channels == 0) | |||||
| /* No specific number of channel requested */ | |||||
| avctx->channels = s->channels; | |||||
| else if (s->channels < avctx->channels) { | |||||
| av_log(avctx, AV_LOG_ERROR, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len); | |||||
| avctx->channels = s->channels; | |||||
| } | |||||
| avctx->bit_rate = bit_rate; | |||||
| } | } | ||||
| } | } | ||||
| } else if (len < s->frame_size) { | } else if (len < s->frame_size) { | ||||
| @@ -54,23 +54,23 @@ stream_samples_t samples; | |||||
| static inline int blah (int32_t i) | static inline int blah (int32_t i) | ||||
| { | { | ||||
| if (i > 0x43c07fff) | if (i > 0x43c07fff) | ||||
| return 32767; | |||||
| return 32767; | |||||
| else if (i < 0x43bf8000) | else if (i < 0x43bf8000) | ||||
| return -32768; | |||||
| return -32768; | |||||
| else | else | ||||
| return i - 0x43c00000; | |||||
| return i - 0x43c00000; | |||||
| } | } | ||||
| static inline void float_to_int (float * _f, int16_t * s16, int nchannels) | static inline void float_to_int (float * _f, int16_t * s16, int nchannels) | ||||
| { | { | ||||
| int i, j, c; | int i, j, c; | ||||
| int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format | |||||
| int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format | |||||
| j = 0; | j = 0; | ||||
| nchannels *= 256; | nchannels *= 256; | ||||
| for (i = 0; i < 256; i++) { | for (i = 0; i < 256; i++) { | ||||
| for (c = 0; c < nchannels; c += 256) | |||||
| s16[j++] = blah (f[i + c]); | |||||
| for (c = 0; c < nchannels; c += 256) | |||||
| s16[j++] = blah (f[i + c]); | |||||
| } | } | ||||
| } | } | ||||
| @@ -89,7 +89,7 @@ static int ac3_decode_frame(AVCodecContext *avctx, | |||||
| short *out_samples = data; | short *out_samples = data; | ||||
| float level; | float level; | ||||
| static const int ac3_channels[8] = { | static const int ac3_channels[8] = { | ||||
| 2, 1, 2, 3, 3, 4, 4, 5 | |||||
| 2, 1, 2, 3, 3, 4, 4, 5 | |||||
| }; | }; | ||||
| buf_ptr = buf; | buf_ptr = buf; | ||||
| @@ -111,20 +111,20 @@ static int ac3_decode_frame(AVCodecContext *avctx, | |||||
| memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1); | memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1); | ||||
| s->inbuf_ptr--; | s->inbuf_ptr--; | ||||
| } else { | } else { | ||||
| s->frame_size = len; | |||||
| s->frame_size = len; | |||||
| /* update codec info */ | /* update codec info */ | ||||
| avctx->sample_rate = sample_rate; | avctx->sample_rate = sample_rate; | ||||
| s->channels = ac3_channels[s->flags & 7]; | s->channels = ac3_channels[s->flags & 7]; | ||||
| if (s->flags & AC3_LFE) | if (s->flags & AC3_LFE) | ||||
| s->channels++; | |||||
| if (avctx->channels == 0) | |||||
| /* No specific number of channel requested */ | |||||
| avctx->channels = s->channels; | |||||
| else if (s->channels < avctx->channels) { | |||||
| s->channels++; | |||||
| if (avctx->channels == 0) | |||||
| /* No specific number of channel requested */ | |||||
| avctx->channels = s->channels; | |||||
| else if (s->channels < avctx->channels) { | |||||
| av_log( avctx, AV_LOG_INFO, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len); | av_log( avctx, AV_LOG_INFO, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len); | ||||
| avctx->channels = s->channels; | |||||
| } | |||||
| avctx->bit_rate = bit_rate; | |||||
| avctx->channels = s->channels; | |||||
| } | |||||
| avctx->bit_rate = bit_rate; | |||||
| } | } | ||||
| } | } | ||||
| } else if (len < s->frame_size) { | } else if (len < s->frame_size) { | ||||
| @@ -337,8 +337,8 @@ static void fft_init(int ln) | |||||
| /* do a 2^n point complex fft on 2^ln points. */ | /* do a 2^n point complex fft on 2^ln points. */ | ||||
| static void fft(IComplex *z, int ln) | static void fft(IComplex *z, int ln) | ||||
| { | { | ||||
| int j, l, np, np2; | |||||
| int nblocks, nloops; | |||||
| int j, l, np, np2; | |||||
| int nblocks, nloops; | |||||
| register IComplex *p,*q; | register IComplex *p,*q; | ||||
| int tmp_re, tmp_im; | int tmp_re, tmp_im; | ||||
| @@ -472,7 +472,7 @@ static void compute_exp_strategy(uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNEL | |||||
| exp_strategy[i][ch] = EXP_REUSE; | exp_strategy[i][ch] = EXP_REUSE; | ||||
| } | } | ||||
| if (is_lfe) | if (is_lfe) | ||||
| return; | |||||
| return; | |||||
| /* now select the encoding strategy type : if exponents are often | /* now select the encoding strategy type : if exponents are often | ||||
| recoded, we use a coarse encoding */ | recoded, we use a coarse encoding */ | ||||
| @@ -493,7 +493,7 @@ static void compute_exp_strategy(uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNEL | |||||
| exp_strategy[i][ch] = EXP_D15; | exp_strategy[i][ch] = EXP_D15; | ||||
| break; | break; | ||||
| } | } | ||||
| i = j; | |||||
| i = j; | |||||
| } | } | ||||
| } | } | ||||
| @@ -553,9 +553,9 @@ static int encode_exp(uint8_t encoded_exp[N/2], | |||||
| /* Decrease the delta between each groups to within 2 | /* Decrease the delta between each groups to within 2 | ||||
| * so that they can be differentially encoded */ | * so that they can be differentially encoded */ | ||||
| for (i=1;i<=nb_groups;i++) | for (i=1;i<=nb_groups;i++) | ||||
| exp1[i] = FFMIN(exp1[i], exp1[i-1] + 2); | |||||
| exp1[i] = FFMIN(exp1[i], exp1[i-1] + 2); | |||||
| for (i=nb_groups-1;i>=0;i--) | for (i=nb_groups-1;i>=0;i--) | ||||
| exp1[i] = FFMIN(exp1[i], exp1[i+1] + 2); | |||||
| exp1[i] = FFMIN(exp1[i], exp1[i+1] + 2); | |||||
| /* now we have the exponent values the decoder will see */ | /* now we have the exponent values the decoder will see */ | ||||
| encoded_exp[0] = exp1[0]; | encoded_exp[0] = exp1[0]; | ||||
| @@ -708,8 +708,8 @@ static int compute_bit_allocation(AC3EncodeContext *s, | |||||
| if(i==0) frame_bits += 4; | if(i==0) frame_bits += 4; | ||||
| } | } | ||||
| frame_bits += 2 * s->nb_channels; /* chexpstr[2] * c */ | frame_bits += 2 * s->nb_channels; /* chexpstr[2] * c */ | ||||
| if (s->lfe) | |||||
| frame_bits++; /* lfeexpstr */ | |||||
| if (s->lfe) | |||||
| frame_bits++; /* lfeexpstr */ | |||||
| for(ch=0;ch<s->nb_channels;ch++) { | for(ch=0;ch<s->nb_channels;ch++) { | ||||
| if (exp_strategy[i][ch] != EXP_REUSE) | if (exp_strategy[i][ch] != EXP_REUSE) | ||||
| frame_bits += 6 + 2; /* chbwcod[6], gainrng[2] */ | frame_bits += 6 + 2; /* chbwcod[6], gainrng[2] */ | ||||
| @@ -736,11 +736,11 @@ static int compute_bit_allocation(AC3EncodeContext *s, | |||||
| csnroffst = s->csnroffst; | csnroffst = s->csnroffst; | ||||
| while (csnroffst >= 0 && | while (csnroffst >= 0 && | ||||
| bit_alloc(s, bap, encoded_exp, exp_strategy, frame_bits, csnroffst, 0) < 0) | |||||
| csnroffst -= SNR_INC1; | |||||
| bit_alloc(s, bap, encoded_exp, exp_strategy, frame_bits, csnroffst, 0) < 0) | |||||
| csnroffst -= SNR_INC1; | |||||
| if (csnroffst < 0) { | if (csnroffst < 0) { | ||||
| av_log(NULL, AV_LOG_ERROR, "Yack, Error !!!\n"); | |||||
| return -1; | |||||
| av_log(NULL, AV_LOG_ERROR, "Yack, Error !!!\n"); | |||||
| return -1; | |||||
| } | } | ||||
| while ((csnroffst + SNR_INC1) <= 63 && | while ((csnroffst + SNR_INC1) <= 63 && | ||||
| bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, | bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, | ||||
| @@ -815,19 +815,19 @@ static int AC3_encode_init(AVCodecContext *avctx) | |||||
| int i, j, ch; | int i, j, ch; | ||||
| float alpha; | float alpha; | ||||
| static const uint8_t acmod_defs[6] = { | static const uint8_t acmod_defs[6] = { | ||||
| 0x01, /* C */ | |||||
| 0x02, /* L R */ | |||||
| 0x03, /* L C R */ | |||||
| 0x06, /* L R SL SR */ | |||||
| 0x07, /* L C R SL SR */ | |||||
| 0x07, /* L C R SL SR (+LFE) */ | |||||
| 0x01, /* C */ | |||||
| 0x02, /* L R */ | |||||
| 0x03, /* L C R */ | |||||
| 0x06, /* L R SL SR */ | |||||
| 0x07, /* L C R SL SR */ | |||||
| 0x07, /* L C R SL SR (+LFE) */ | |||||
| }; | }; | ||||
| avctx->frame_size = AC3_FRAME_SIZE; | avctx->frame_size = AC3_FRAME_SIZE; | ||||
| /* number of channels */ | /* number of channels */ | ||||
| if (channels < 1 || channels > 6) | if (channels < 1 || channels > 6) | ||||
| return -1; | |||||
| return -1; | |||||
| s->acmod = acmod_defs[channels - 1]; | s->acmod = acmod_defs[channels - 1]; | ||||
| s->lfe = (channels == 6) ? 1 : 0; | s->lfe = (channels == 6) ? 1 : 0; | ||||
| s->nb_all_channels = channels; | s->nb_all_channels = channels; | ||||
| @@ -871,7 +871,7 @@ static int AC3_encode_init(AVCodecContext *avctx) | |||||
| s->nb_coefs[ch] = ((s->chbwcod[ch] + 12) * 3) + 37; | s->nb_coefs[ch] = ((s->chbwcod[ch] + 12) * 3) + 37; | ||||
| } | } | ||||
| if (s->lfe) { | if (s->lfe) { | ||||
| s->nb_coefs[s->lfe_channel] = 7; /* fixed */ | |||||
| s->nb_coefs[s->lfe_channel] = 7; /* fixed */ | |||||
| } | } | ||||
| /* initial snr offset */ | /* initial snr offset */ | ||||
| s->csnroffst = 40; | s->csnroffst = 40; | ||||
| @@ -907,9 +907,9 @@ static void output_frame_header(AC3EncodeContext *s, unsigned char *frame) | |||||
| put_bits(&s->pb, 3, s->bsmod); | put_bits(&s->pb, 3, s->bsmod); | ||||
| put_bits(&s->pb, 3, s->acmod); | put_bits(&s->pb, 3, s->acmod); | ||||
| if ((s->acmod & 0x01) && s->acmod != 0x01) | if ((s->acmod & 0x01) && s->acmod != 0x01) | ||||
| put_bits(&s->pb, 2, 1); /* XXX -4.5 dB */ | |||||
| put_bits(&s->pb, 2, 1); /* XXX -4.5 dB */ | |||||
| if (s->acmod & 0x04) | if (s->acmod & 0x04) | ||||
| put_bits(&s->pb, 2, 1); /* XXX -6 dB */ | |||||
| put_bits(&s->pb, 2, 1); /* XXX -6 dB */ | |||||
| if (s->acmod == 0x02) | if (s->acmod == 0x02) | ||||
| put_bits(&s->pb, 2, 0); /* surround not indicated */ | put_bits(&s->pb, 2, 0); /* surround not indicated */ | ||||
| put_bits(&s->pb, 1, s->lfe); /* LFE */ | put_bits(&s->pb, 1, s->lfe); /* LFE */ | ||||
| @@ -995,20 +995,20 @@ static void output_audio_block(AC3EncodeContext *s, | |||||
| if (s->acmod == 2) | if (s->acmod == 2) | ||||
| { | { | ||||
| if(block_num==0) | |||||
| { | |||||
| /* first block must define rematrixing (rematstr) */ | |||||
| put_bits(&s->pb, 1, 1); | |||||
| /* dummy rematrixing rematflg(1:4)=0 */ | |||||
| for (rbnd=0;rbnd<4;rbnd++) | |||||
| put_bits(&s->pb, 1, 0); | |||||
| } | |||||
| else | |||||
| { | |||||
| /* no matrixing (but should be used in the future) */ | |||||
| put_bits(&s->pb, 1, 0); | |||||
| } | |||||
| if(block_num==0) | |||||
| { | |||||
| /* first block must define rematrixing (rematstr) */ | |||||
| put_bits(&s->pb, 1, 1); | |||||
| /* dummy rematrixing rematflg(1:4)=0 */ | |||||
| for (rbnd=0;rbnd<4;rbnd++) | |||||
| put_bits(&s->pb, 1, 0); | |||||
| } | |||||
| else | |||||
| { | |||||
| /* no matrixing (but should be used in the future) */ | |||||
| put_bits(&s->pb, 1, 0); | |||||
| } | |||||
| } | } | ||||
| #if defined(DEBUG) | #if defined(DEBUG) | ||||
| @@ -1023,7 +1023,7 @@ static void output_audio_block(AC3EncodeContext *s, | |||||
| } | } | ||||
| if (s->lfe) { | if (s->lfe) { | ||||
| put_bits(&s->pb, 1, exp_strategy[s->lfe_channel]); | |||||
| put_bits(&s->pb, 1, exp_strategy[s->lfe_channel]); | |||||
| } | } | ||||
| for(ch=0;ch<s->nb_channels;ch++) { | for(ch=0;ch<s->nb_channels;ch++) { | ||||
| @@ -1047,7 +1047,7 @@ static void output_audio_block(AC3EncodeContext *s, | |||||
| group_size = 4; | group_size = 4; | ||||
| break; | break; | ||||
| } | } | ||||
| nb_groups = (s->nb_coefs[ch] + (group_size * 3) - 4) / (3 * group_size); | |||||
| nb_groups = (s->nb_coefs[ch] + (group_size * 3) - 4) / (3 * group_size); | |||||
| p = encoded_exp[ch]; | p = encoded_exp[ch]; | ||||
| /* first exponent */ | /* first exponent */ | ||||
| @@ -1075,8 +1075,8 @@ static void output_audio_block(AC3EncodeContext *s, | |||||
| put_bits(&s->pb, 7, ((delta0 * 5 + delta1) * 5) + delta2); | put_bits(&s->pb, 7, ((delta0 * 5 + delta1) * 5) + delta2); | ||||
| } | } | ||||
| if (ch != s->lfe_channel) | |||||
| put_bits(&s->pb, 2, 0); /* no gain range info */ | |||||
| if (ch != s->lfe_channel) | |||||
| put_bits(&s->pb, 2, 0); /* no gain range info */ | |||||
| } | } | ||||
| /* bit allocation info */ | /* bit allocation info */ | ||||
| @@ -300,7 +300,7 @@ static inline unsigned char adpcm_yamaha_compress_sample(ADPCMChannelStatus *c, | |||||
| } | } | ||||
| static int adpcm_encode_frame(AVCodecContext *avctx, | static int adpcm_encode_frame(AVCodecContext *avctx, | ||||
| unsigned char *frame, int buf_size, void *data) | |||||
| unsigned char *frame, int buf_size, void *data) | |||||
| { | { | ||||
| int n, i, st; | int n, i, st; | ||||
| short *samples; | short *samples; | ||||
| @@ -431,8 +431,8 @@ static int adpcm_decode_init(AVCodecContext * avctx) | |||||
| switch(avctx->codec->id) { | switch(avctx->codec->id) { | ||||
| case CODEC_ID_ADPCM_CT: | case CODEC_ID_ADPCM_CT: | ||||
| c->status[0].step = c->status[1].step = 511; | |||||
| break; | |||||
| c->status[0].step = c->status[1].step = 511; | |||||
| break; | |||||
| default: | default: | ||||
| break; | break; | ||||
| } | } | ||||
| @@ -498,16 +498,16 @@ static inline short adpcm_ct_expand_nibble(ADPCMChannelStatus *c, char nibble) | |||||
| predictor = c->predictor; | predictor = c->predictor; | ||||
| /* predictor update is not so trivial: predictor is multiplied on 254/256 before updating */ | /* predictor update is not so trivial: predictor is multiplied on 254/256 before updating */ | ||||
| if(sign) | if(sign) | ||||
| predictor = ((predictor * 254) >> 8) - diff; | |||||
| predictor = ((predictor * 254) >> 8) - diff; | |||||
| else | else | ||||
| predictor = ((predictor * 254) >> 8) + diff; | |||||
| predictor = ((predictor * 254) >> 8) + diff; | |||||
| /* calculate new step and clamp it to range 511..32767 */ | /* calculate new step and clamp it to range 511..32767 */ | ||||
| new_step = (ct_adpcm_table[nibble & 7] * c->step) >> 8; | new_step = (ct_adpcm_table[nibble & 7] * c->step) >> 8; | ||||
| c->step = new_step; | c->step = new_step; | ||||
| if(c->step < 511) | if(c->step < 511) | ||||
| c->step = 511; | |||||
| c->step = 511; | |||||
| if(c->step > 32767) | if(c->step > 32767) | ||||
| c->step = 32767; | |||||
| c->step = 32767; | |||||
| CLAMP_TO_SHORT(predictor); | CLAMP_TO_SHORT(predictor); | ||||
| c->predictor = predictor; | c->predictor = predictor; | ||||
| @@ -612,8 +612,8 @@ static void xa_decode(short *out, const unsigned char *in, | |||||
| } | } | ||||
| static int adpcm_decode_frame(AVCodecContext *avctx, | static int adpcm_decode_frame(AVCodecContext *avctx, | ||||
| void *data, int *data_size, | |||||
| uint8_t *buf, int buf_size) | |||||
| void *data, int *data_size, | |||||
| uint8_t *buf, int buf_size) | |||||
| { | { | ||||
| ADPCMContext *c = avctx->priv_data; | ADPCMContext *c = avctx->priv_data; | ||||
| ADPCMChannelStatus *cs; | ADPCMChannelStatus *cs; | ||||
| @@ -701,7 +701,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx, | |||||
| cs->predictor -= 0x10000; | cs->predictor -= 0x10000; | ||||
| CLAMP_TO_SHORT(cs->predictor); | CLAMP_TO_SHORT(cs->predictor); | ||||
| // XXX: is this correct ??: *samples++ = cs->predictor; | |||||
| // XXX: is this correct ??: *samples++ = cs->predictor; | |||||
| cs->step_index = *src++; | cs->step_index = *src++; | ||||
| if (cs->step_index < 0) cs->step_index = 0; | if (cs->step_index < 0) cs->step_index = 0; | ||||
| @@ -710,19 +710,19 @@ static int adpcm_decode_frame(AVCodecContext *avctx, | |||||
| } | } | ||||
| for(m=4; src < (buf + buf_size);) { | for(m=4; src < (buf + buf_size);) { | ||||
| *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[0] & 0x0F, 3); | |||||
| *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[0] & 0x0F, 3); | |||||
| if (st) | if (st) | ||||
| *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[4] & 0x0F, 3); | *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[4] & 0x0F, 3); | ||||
| *samples++ = adpcm_ima_expand_nibble(&c->status[0], (src[0] >> 4) & 0x0F, 3); | *samples++ = adpcm_ima_expand_nibble(&c->status[0], (src[0] >> 4) & 0x0F, 3); | ||||
| if (st) { | |||||
| if (st) { | |||||
| *samples++ = adpcm_ima_expand_nibble(&c->status[1], (src[4] >> 4) & 0x0F, 3); | *samples++ = adpcm_ima_expand_nibble(&c->status[1], (src[4] >> 4) & 0x0F, 3); | ||||
| if (!--m) { | |||||
| m=4; | |||||
| src+=4; | |||||
| } | |||||
| } | |||||
| src++; | |||||
| } | |||||
| if (!--m) { | |||||
| m=4; | |||||
| src+=4; | |||||
| } | |||||
| } | |||||
| src++; | |||||
| } | |||||
| break; | break; | ||||
| case CODEC_ID_ADPCM_4XM: | case CODEC_ID_ADPCM_4XM: | ||||
| cs = &(c->status[0]); | cs = &(c->status[0]); | ||||
| @@ -739,13 +739,13 @@ static int adpcm_decode_frame(AVCodecContext *avctx, | |||||
| m= (buf_size - (src - buf))>>st; | m= (buf_size - (src - buf))>>st; | ||||
| for(i=0; i<m; i++) { | for(i=0; i<m; i++) { | ||||
| *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] & 0x0F, 4); | |||||
| *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] & 0x0F, 4); | |||||
| if (st) | if (st) | ||||
| *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] & 0x0F, 4); | *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] & 0x0F, 4); | ||||
| *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] >> 4, 4); | *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] >> 4, 4); | ||||
| if (st) | |||||
| if (st) | |||||
| *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] >> 4, 4); | *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] >> 4, 4); | ||||
| } | |||||
| } | |||||
| src += m<<st; | src += m<<st; | ||||
| @@ -958,7 +958,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx, | |||||
| } | } | ||||
| break; | break; | ||||
| case CODEC_ID_ADPCM_CT: | case CODEC_ID_ADPCM_CT: | ||||
| while (src < buf + buf_size) { | |||||
| while (src < buf + buf_size) { | |||||
| if (st) { | if (st) { | ||||
| *samples++ = adpcm_ct_expand_nibble(&c->status[0], | *samples++ = adpcm_ct_expand_nibble(&c->status[0], | ||||
| (src[0] >> 4) & 0x0F); | (src[0] >> 4) & 0x0F); | ||||
| @@ -970,78 +970,78 @@ static int adpcm_decode_frame(AVCodecContext *avctx, | |||||
| *samples++ = adpcm_ct_expand_nibble(&c->status[0], | *samples++ = adpcm_ct_expand_nibble(&c->status[0], | ||||
| src[0] & 0x0F); | src[0] & 0x0F); | ||||
| } | } | ||||
| src++; | |||||
| src++; | |||||
| } | } | ||||
| break; | break; | ||||
| case CODEC_ID_ADPCM_SWF: | case CODEC_ID_ADPCM_SWF: | ||||
| { | { | ||||
| GetBitContext gb; | |||||
| const int *table; | |||||
| int k0, signmask; | |||||
| int size = buf_size*8; | |||||
| init_get_bits(&gb, buf, size); | |||||
| // first frame, read bits & inital values | |||||
| if (!c->nb_bits) | |||||
| { | |||||
| c->nb_bits = get_bits(&gb, 2)+2; | |||||
| // av_log(NULL,AV_LOG_INFO,"nb_bits: %d\n", c->nb_bits); | |||||
| } | |||||
| table = swf_index_tables[c->nb_bits-2]; | |||||
| k0 = 1 << (c->nb_bits-2); | |||||
| signmask = 1 << (c->nb_bits-1); | |||||
| while (get_bits_count(&gb) <= size) | |||||
| { | |||||
| int i; | |||||
| c->nb_samples++; | |||||
| // wrap around at every 4096 samples... | |||||
| if ((c->nb_samples & 0xfff) == 1) | |||||
| { | |||||
| for (i = 0; i <= st; i++) | |||||
| { | |||||
| *samples++ = c->status[i].predictor = get_sbits(&gb, 16); | |||||
| c->status[i].step_index = get_bits(&gb, 6); | |||||
| } | |||||
| } | |||||
| // similar to IMA adpcm | |||||
| for (i = 0; i <= st; i++) | |||||
| { | |||||
| int delta = get_bits(&gb, c->nb_bits); | |||||
| int step = step_table[c->status[i].step_index]; | |||||
| long vpdiff = 0; // vpdiff = (delta+0.5)*step/4 | |||||
| int k = k0; | |||||
| do { | |||||
| if (delta & k) | |||||
| vpdiff += step; | |||||
| step >>= 1; | |||||
| k >>= 1; | |||||
| } while(k); | |||||
| vpdiff += step; | |||||
| if (delta & signmask) | |||||
| c->status[i].predictor -= vpdiff; | |||||
| else | |||||
| c->status[i].predictor += vpdiff; | |||||
| c->status[i].step_index += table[delta & (~signmask)]; | |||||
| c->status[i].step_index = clip(c->status[i].step_index, 0, 88); | |||||
| c->status[i].predictor = clip(c->status[i].predictor, -32768, 32767); | |||||
| *samples++ = c->status[i].predictor; | |||||
| } | |||||
| } | |||||
| // src += get_bits_count(&gb)*8; | |||||
| src += size; | |||||
| break; | |||||
| GetBitContext gb; | |||||
| const int *table; | |||||
| int k0, signmask; | |||||
| int size = buf_size*8; | |||||
| init_get_bits(&gb, buf, size); | |||||
| // first frame, read bits & inital values | |||||
| if (!c->nb_bits) | |||||
| { | |||||
| c->nb_bits = get_bits(&gb, 2)+2; | |||||
| // av_log(NULL,AV_LOG_INFO,"nb_bits: %d\n", c->nb_bits); | |||||
| } | |||||
| table = swf_index_tables[c->nb_bits-2]; | |||||
| k0 = 1 << (c->nb_bits-2); | |||||
| signmask = 1 << (c->nb_bits-1); | |||||
| while (get_bits_count(&gb) <= size) | |||||
| { | |||||
| int i; | |||||
| c->nb_samples++; | |||||
| // wrap around at every 4096 samples... | |||||
| if ((c->nb_samples & 0xfff) == 1) | |||||
| { | |||||
| for (i = 0; i <= st; i++) | |||||
| { | |||||
| *samples++ = c->status[i].predictor = get_sbits(&gb, 16); | |||||
| c->status[i].step_index = get_bits(&gb, 6); | |||||
| } | |||||
| } | |||||
| // similar to IMA adpcm | |||||
| for (i = 0; i <= st; i++) | |||||
| { | |||||
| int delta = get_bits(&gb, c->nb_bits); | |||||
| int step = step_table[c->status[i].step_index]; | |||||
| long vpdiff = 0; // vpdiff = (delta+0.5)*step/4 | |||||
| int k = k0; | |||||
| do { | |||||
| if (delta & k) | |||||
| vpdiff += step; | |||||
| step >>= 1; | |||||
| k >>= 1; | |||||
| } while(k); | |||||
| vpdiff += step; | |||||
| if (delta & signmask) | |||||
| c->status[i].predictor -= vpdiff; | |||||
| else | |||||
| c->status[i].predictor += vpdiff; | |||||
| c->status[i].step_index += table[delta & (~signmask)]; | |||||
| c->status[i].step_index = clip(c->status[i].step_index, 0, 88); | |||||
| c->status[i].predictor = clip(c->status[i].predictor, -32768, 32767); | |||||
| *samples++ = c->status[i].predictor; | |||||
| } | |||||
| } | |||||
| // src += get_bits_count(&gb)*8; | |||||
| src += size; | |||||
| break; | |||||
| } | } | ||||
| case CODEC_ID_ADPCM_YAMAHA: | case CODEC_ID_ADPCM_YAMAHA: | ||||
| while (src < buf + buf_size) { | while (src < buf + buf_size) { | ||||
| @@ -35,7 +35,7 @@ void avcodec_register_all(void) | |||||
| static int inited = 0; | static int inited = 0; | ||||
| if (inited != 0) | if (inited != 0) | ||||
| return; | |||||
| return; | |||||
| inited = 1; | inited = 1; | ||||
| /* encoders */ | /* encoders */ | ||||
| @@ -84,24 +84,24 @@ static inline uint64_t WORD_VEC(uint64_t x) | |||||
| } *) (p))->__l) = l; \ | } *) (p))->__l) = l; \ | ||||
| } while (0) | } while (0) | ||||
| struct unaligned_long { uint64_t l; } __attribute__((packed)); | struct unaligned_long { uint64_t l; } __attribute__((packed)); | ||||
| #define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul)) | |||||
| #define uldq(a) (((const struct unaligned_long *) (a))->l) | |||||
| #define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul)) | |||||
| #define uldq(a) (((const struct unaligned_long *) (a))->l) | |||||
| #if GNUC_PREREQ(3,3) | #if GNUC_PREREQ(3,3) | ||||
| #define prefetch(p) __builtin_prefetch((p), 0, 1) | #define prefetch(p) __builtin_prefetch((p), 0, 1) | ||||
| #define prefetch_en(p) __builtin_prefetch((p), 0, 0) | #define prefetch_en(p) __builtin_prefetch((p), 0, 0) | ||||
| #define prefetch_m(p) __builtin_prefetch((p), 1, 1) | #define prefetch_m(p) __builtin_prefetch((p), 1, 1) | ||||
| #define prefetch_men(p) __builtin_prefetch((p), 1, 0) | #define prefetch_men(p) __builtin_prefetch((p), 1, 0) | ||||
| #define cmpbge __builtin_alpha_cmpbge | |||||
| #define cmpbge __builtin_alpha_cmpbge | |||||
| /* Avoid warnings. */ | /* Avoid warnings. */ | ||||
| #define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b)) | |||||
| #define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b)) | |||||
| #define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b)) | |||||
| #define zap __builtin_alpha_zap | |||||
| #define zapnot __builtin_alpha_zapnot | |||||
| #define amask __builtin_alpha_amask | |||||
| #define implver __builtin_alpha_implver | |||||
| #define rpcc __builtin_alpha_rpcc | |||||
| #define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b)) | |||||
| #define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b)) | |||||
| #define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b)) | |||||
| #define zap __builtin_alpha_zap | |||||
| #define zapnot __builtin_alpha_zapnot | |||||
| #define amask __builtin_alpha_amask | |||||
| #define implver __builtin_alpha_implver | |||||
| #define rpcc __builtin_alpha_rpcc | |||||
| #else | #else | ||||
| #define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory") | #define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory") | ||||
| #define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory") | #define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory") | ||||
| @@ -113,26 +113,26 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); | |||||
| #define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) | #define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) | ||||
| #define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) | #define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) | ||||
| #define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) | #define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) | ||||
| #define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; }) | |||||
| #define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; }) | |||||
| #define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; }) | |||||
| #define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; }) | |||||
| #define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; }) | |||||
| #define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; }) | |||||
| #endif | #endif | ||||
| #define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory") | #define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory") | ||||
| #if GNUC_PREREQ(3,3) && defined(__alpha_max__) | #if GNUC_PREREQ(3,3) && defined(__alpha_max__) | ||||
| #define minub8 __builtin_alpha_minub8 | |||||
| #define minsb8 __builtin_alpha_minsb8 | |||||
| #define minuw4 __builtin_alpha_minuw4 | |||||
| #define minsw4 __builtin_alpha_minsw4 | |||||
| #define maxub8 __builtin_alpha_maxub8 | |||||
| #define maxsb8 __builtin_alpha_maxsb8 | |||||
| #define maxuw4 __builtin_alpha_maxuw4 | |||||
| #define maxsw4 __builtin_alpha_maxsw4 | |||||
| #define perr __builtin_alpha_perr | |||||
| #define pklb __builtin_alpha_pklb | |||||
| #define pkwb __builtin_alpha_pkwb | |||||
| #define unpkbl __builtin_alpha_unpkbl | |||||
| #define unpkbw __builtin_alpha_unpkbw | |||||
| #define minub8 __builtin_alpha_minub8 | |||||
| #define minsb8 __builtin_alpha_minsb8 | |||||
| #define minuw4 __builtin_alpha_minuw4 | |||||
| #define minsw4 __builtin_alpha_minsw4 | |||||
| #define maxub8 __builtin_alpha_maxub8 | |||||
| #define maxsb8 __builtin_alpha_maxsb8 | |||||
| #define maxuw4 __builtin_alpha_maxuw4 | |||||
| #define maxsw4 __builtin_alpha_maxsw4 | |||||
| #define perr __builtin_alpha_perr | |||||
| #define pklb __builtin_alpha_pklb | |||||
| #define pkwb __builtin_alpha_pkwb | |||||
| #define unpkbl __builtin_alpha_unpkbl | |||||
| #define unpkbw __builtin_alpha_unpkbw | |||||
| #else | #else | ||||
| #define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | #define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | #define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| @@ -143,13 +143,13 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); | |||||
| #define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | #define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | #define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | ||||
| #define perr(a, b) ({ uint64_t __r; asm (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; }) | #define perr(a, b) ({ uint64_t __r; asm (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; }) | ||||
| #define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |||||
| #define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |||||
| #define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |||||
| #define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |||||
| #define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |||||
| #define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |||||
| #define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |||||
| #define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |||||
| #endif | #endif | ||||
| #elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */ | |||||
| #elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */ | |||||
| #include <c_asm.h> | #include <c_asm.h> | ||||
| #define ldq(p) (*(const uint64_t *) (p)) | #define ldq(p) (*(const uint64_t *) (p)) | ||||
| @@ -157,7 +157,7 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); | |||||
| #define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0) | #define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0) | ||||
| #define stl(l, p) do { *(int32_t *) (p) = (l); } while (0) | #define stl(l, p) do { *(int32_t *) (p) = (l); } while (0) | ||||
| #define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a) | #define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a) | ||||
| #define uldq(a) (*(const __unaligned uint64_t *) (a)) | |||||
| #define uldq(a) (*(const __unaligned uint64_t *) (a)) | |||||
| #define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b) | #define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b) | ||||
| #define extql(a, b) asm ("extql %a0,%a1,%v0", a, b) | #define extql(a, b) asm ("extql %a0,%a1,%v0", a, b) | ||||
| #define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b) | #define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b) | ||||
| @@ -166,7 +166,7 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); | |||||
| #define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b) | #define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b) | ||||
| #define amask(a) asm ("amask %a0,%v0", a) | #define amask(a) asm ("amask %a0,%v0", a) | ||||
| #define implver() asm ("implver %v0") | #define implver() asm ("implver %v0") | ||||
| #define rpcc() asm ("rpcc %v0") | |||||
| #define rpcc() asm ("rpcc %v0") | |||||
| #define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b) | #define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b) | ||||
| #define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b) | #define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b) | ||||
| #define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b) | #define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b) | ||||
| @@ -71,7 +71,7 @@ $unaligned: | |||||
| addq a1, a2, a1 | addq a1, a2, a1 | ||||
| nop | nop | ||||
| ldq_u t4, 0(a1) | |||||
| ldq_u t4, 0(a1) | |||||
| ldq_u t5, 8(a1) | ldq_u t5, 8(a1) | ||||
| addq a1, a2, a1 | addq a1, a2, a1 | ||||
| nop | nop | ||||
| @@ -120,20 +120,20 @@ $aligned: | |||||
| addq a1, a2, a1 | addq a1, a2, a1 | ||||
| ldq t3, 0(a1) | ldq t3, 0(a1) | ||||
| addq a0, a2, t4 | |||||
| addq a1, a2, a1 | |||||
| addq t4, a2, t5 | |||||
| subq a3, 4, a3 | |||||
| addq a0, a2, t4 | |||||
| addq a1, a2, a1 | |||||
| addq t4, a2, t5 | |||||
| subq a3, 4, a3 | |||||
| stq t0, 0(a0) | |||||
| addq t5, a2, t6 | |||||
| stq t1, 0(t4) | |||||
| addq t6, a2, a0 | |||||
| stq t0, 0(a0) | |||||
| addq t5, a2, t6 | |||||
| stq t1, 0(t4) | |||||
| addq t6, a2, a0 | |||||
| stq t2, 0(t5) | |||||
| stq t3, 0(t6) | |||||
| stq t2, 0(t5) | |||||
| stq t3, 0(t6) | |||||
| bne a3, $aligned | |||||
| bne a3, $aligned | |||||
| ret | ret | ||||
| .end put_pixels_axp_asm | .end put_pixels_axp_asm | ||||
| @@ -116,7 +116,7 @@ int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||||
| return result; | return result; | ||||
| } | } | ||||
| #if 0 /* now done in assembly */ | |||||
| #if 0 /* now done in assembly */ | |||||
| int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) | int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) | ||||
| { | { | ||||
| int result = 0; | int result = 0; | ||||
| @@ -285,7 +285,7 @@ void simple_idct_axp(DCTELEM *block) | |||||
| stq(v, block + 1 * 4); | stq(v, block + 1 * 4); | ||||
| stq(w, block + 2 * 4); | stq(w, block + 2 * 4); | ||||
| stq(w, block + 3 * 4); | stq(w, block + 3 * 4); | ||||
| block += 4 * 4; | |||||
| block += 4 * 4; | |||||
| } | } | ||||
| } else { | } else { | ||||
| for (i = 0; i < 8; i++) | for (i = 0; i < 8; i++) | ||||
| @@ -301,7 +301,7 @@ static int amr_nb_decode_frame(AVCodecContext * avctx, | |||||
| static int amr_nb_encode_frame(AVCodecContext *avctx, | static int amr_nb_encode_frame(AVCodecContext *avctx, | ||||
| unsigned char *frame/*out*/, int buf_size, void *data/*in*/) | |||||
| unsigned char *frame/*out*/, int buf_size, void *data/*in*/) | |||||
| { | { | ||||
| short serial_data[250] = {0}; | short serial_data[250] = {0}; | ||||
| @@ -440,7 +440,7 @@ static int amr_nb_decode_frame(AVCodecContext * avctx, | |||||
| } | } | ||||
| static int amr_nb_encode_frame(AVCodecContext *avctx, | static int amr_nb_encode_frame(AVCodecContext *avctx, | ||||
| unsigned char *frame/*out*/, int buf_size, void *data/*in*/) | |||||
| unsigned char *frame/*out*/, int buf_size, void *data/*in*/) | |||||
| { | { | ||||
| AMRContext *s = (AMRContext*)avctx->priv_data; | AMRContext *s = (AMRContext*)avctx->priv_data; | ||||
| int written; | int written; | ||||
| @@ -584,7 +584,7 @@ static int amr_wb_encode_close(AVCodecContext * avctx) | |||||
| } | } | ||||
| static int amr_wb_encode_frame(AVCodecContext *avctx, | static int amr_wb_encode_frame(AVCodecContext *avctx, | ||||
| unsigned char *frame/*out*/, int buf_size, void *data/*in*/) | |||||
| unsigned char *frame/*out*/, int buf_size, void *data/*in*/) | |||||
| { | { | ||||
| AMRWBContext *s = (AMRWBContext*) avctx->priv_data; | AMRWBContext *s = (AMRWBContext*) avctx->priv_data; | ||||
| int size = E_IF_encode(s->state, s->mode, data, frame, s->allow_dtx); | int size = E_IF_encode(s->state, s->mode, data, frame, s->allow_dtx); | ||||
| @@ -205,13 +205,13 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx) | |||||
| #endif | #endif | ||||
| c->idct_put= j_rev_dct_ARM_put; | c->idct_put= j_rev_dct_ARM_put; | ||||
| c->idct_add= j_rev_dct_ARM_add; | c->idct_add= j_rev_dct_ARM_add; | ||||
| c->idct = j_rev_dct_ARM; | |||||
| c->idct = j_rev_dct_ARM; | |||||
| c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */ | c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */ | ||||
| } else if (idct_algo==FF_IDCT_SIMPLEARM){ | } else if (idct_algo==FF_IDCT_SIMPLEARM){ | ||||
| c->idct_put= simple_idct_ARM_put; | |||||
| c->idct_add= simple_idct_ARM_add; | |||||
| c->idct = simple_idct_ARM; | |||||
| c->idct_permutation_type= FF_NO_IDCT_PERM; | |||||
| c->idct_put= simple_idct_ARM_put; | |||||
| c->idct_add= simple_idct_ARM_add; | |||||
| c->idct = simple_idct_ARM; | |||||
| c->idct_permutation_type= FF_NO_IDCT_PERM; | |||||
| #ifdef HAVE_IPP | #ifdef HAVE_IPP | ||||
| } else if (idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_IPP){ | } else if (idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_IPP){ | ||||
| #else | #else | ||||
| @@ -138,10 +138,10 @@ void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx) | |||||
| mm_flags = mm_support(); | mm_flags = mm_support(); | ||||
| if (avctx->dsp_mask) { | if (avctx->dsp_mask) { | ||||
| if (avctx->dsp_mask & FF_MM_FORCE) | |||||
| mm_flags |= (avctx->dsp_mask & 0xffff); | |||||
| else | |||||
| mm_flags &= ~(avctx->dsp_mask & 0xffff); | |||||
| if (avctx->dsp_mask & FF_MM_FORCE) | |||||
| mm_flags |= (avctx->dsp_mask & 0xffff); | |||||
| else | |||||
| mm_flags &= ~(avctx->dsp_mask & 0xffff); | |||||
| } | } | ||||
| if (!(mm_flags & MM_IWMMXT)) return; | if (!(mm_flags & MM_IWMMXT)) return; | ||||
| @@ -1,6 +1,6 @@ | |||||
| /* | /* | ||||
| C-like prototype : | C-like prototype : | ||||
| void j_rev_dct_ARM(DCTBLOCK data) | |||||
| void j_rev_dct_ARM(DCTBLOCK data) | |||||
| With DCTBLOCK being a pointer to an array of 64 'signed shorts' | With DCTBLOCK being a pointer to an array of 64 'signed shorts' | ||||
| @@ -51,336 +51,336 @@ | |||||
| #define FIX_M_1_961570560_ID 40 | #define FIX_M_1_961570560_ID 40 | ||||
| #define FIX_M_2_562915447_ID 44 | #define FIX_M_2_562915447_ID 44 | ||||
| #define FIX_0xFFFF_ID 48 | #define FIX_0xFFFF_ID 48 | ||||
| .text | |||||
| .align | |||||
| .text | |||||
| .align | |||||
| .global j_rev_dct_ARM | |||||
| .global j_rev_dct_ARM | |||||
| j_rev_dct_ARM: | j_rev_dct_ARM: | ||||
| stmdb sp!, { r4 - r12, lr } @ all callee saved regs | |||||
| stmdb sp!, { r4 - r12, lr } @ all callee saved regs | |||||
| sub sp, sp, #4 @ reserve some space on the stack | |||||
| str r0, [ sp ] @ save the DCT pointer to the stack | |||||
| sub sp, sp, #4 @ reserve some space on the stack | |||||
| str r0, [ sp ] @ save the DCT pointer to the stack | |||||
| mov lr, r0 @ lr = pointer to the current row | |||||
| mov r12, #8 @ r12 = row-counter | |||||
| add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array | |||||
| mov lr, r0 @ lr = pointer to the current row | |||||
| mov r12, #8 @ r12 = row-counter | |||||
| add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array | |||||
| row_loop: | row_loop: | ||||
| ldrsh r0, [lr, # 0] @ r0 = 'd0' | |||||
| ldrsh r1, [lr, # 8] @ r1 = 'd1' | |||||
| @ Optimization for row that have all items except the first set to 0 | |||||
| @ (this works as the DCTELEMS are always 4-byte aligned) | |||||
| ldr r5, [lr, # 0] | |||||
| ldr r2, [lr, # 4] | |||||
| ldr r3, [lr, # 8] | |||||
| ldr r4, [lr, #12] | |||||
| orr r3, r3, r4 | |||||
| orr r3, r3, r2 | |||||
| orrs r5, r3, r5 | |||||
| beq end_of_row_loop @ nothing to be done as ALL of them are '0' | |||||
| orrs r2, r3, r1 | |||||
| beq empty_row | |||||
| ldrsh r2, [lr, # 2] @ r2 = 'd2' | |||||
| ldrsh r4, [lr, # 4] @ r4 = 'd4' | |||||
| ldrsh r6, [lr, # 6] @ r6 = 'd6' | |||||
| ldr r3, [r11, #FIX_0_541196100_ID] | |||||
| add r7, r2, r6 | |||||
| ldr r5, [r11, #FIX_M_1_847759065_ID] | |||||
| mul r7, r3, r7 @ r7 = z1 | |||||
| ldr r3, [r11, #FIX_0_765366865_ID] | |||||
| mla r6, r5, r6, r7 @ r6 = tmp2 | |||||
| add r5, r0, r4 @ r5 = tmp0 | |||||
| mla r2, r3, r2, r7 @ r2 = tmp3 | |||||
| sub r3, r0, r4 @ r3 = tmp1 | |||||
| add r0, r2, r5, lsl #13 @ r0 = tmp10 | |||||
| rsb r2, r2, r5, lsl #13 @ r2 = tmp13 | |||||
| add r4, r6, r3, lsl #13 @ r4 = tmp11 | |||||
| rsb r3, r6, r3, lsl #13 @ r3 = tmp12 | |||||
| stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 | |||||
| ldrsh r3, [lr, #10] @ r3 = 'd3' | |||||
| ldrsh r5, [lr, #12] @ r5 = 'd5' | |||||
| ldrsh r7, [lr, #14] @ r7 = 'd7' | |||||
| add r0, r3, r5 @ r0 = 'z2' | |||||
| add r2, r1, r7 @ r2 = 'z1' | |||||
| add r4, r3, r7 @ r4 = 'z3' | |||||
| add r6, r1, r5 @ r6 = 'z4' | |||||
| ldr r9, [r11, #FIX_1_175875602_ID] | |||||
| add r8, r4, r6 @ r8 = z3 + z4 | |||||
| ldr r10, [r11, #FIX_M_0_899976223_ID] | |||||
| mul r8, r9, r8 @ r8 = 'z5' | |||||
| ldr r9, [r11, #FIX_M_2_562915447_ID] | |||||
| mul r2, r10, r2 @ r2 = 'z1' | |||||
| ldr r10, [r11, #FIX_M_1_961570560_ID] | |||||
| mul r0, r9, r0 @ r0 = 'z2' | |||||
| ldr r9, [r11, #FIX_M_0_390180644_ID] | |||||
| mla r4, r10, r4, r8 @ r4 = 'z3' | |||||
| ldr r10, [r11, #FIX_0_298631336_ID] | |||||
| mla r6, r9, r6, r8 @ r6 = 'z4' | |||||
| ldr r9, [r11, #FIX_2_053119869_ID] | |||||
| mla r7, r10, r7, r2 @ r7 = tmp0 + z1 | |||||
| ldr r10, [r11, #FIX_3_072711026_ID] | |||||
| mla r5, r9, r5, r0 @ r5 = tmp1 + z2 | |||||
| ldr r9, [r11, #FIX_1_501321110_ID] | |||||
| mla r3, r10, r3, r0 @ r3 = tmp2 + z2 | |||||
| add r7, r7, r4 @ r7 = tmp0 | |||||
| mla r1, r9, r1, r2 @ r1 = tmp3 + z1 | |||||
| add r5, r5, r6 @ r5 = tmp1 | |||||
| add r3, r3, r4 @ r3 = tmp2 | |||||
| add r1, r1, r6 @ r1 = tmp3 | |||||
| ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 | |||||
| @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 | |||||
| @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) | |||||
| add r8, r0, r1 | |||||
| add r8, r8, #(1<<10) | |||||
| mov r8, r8, asr #11 | |||||
| strh r8, [lr, # 0] | |||||
| @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) | |||||
| sub r8, r0, r1 | |||||
| add r8, r8, #(1<<10) | |||||
| mov r8, r8, asr #11 | |||||
| strh r8, [lr, #14] | |||||
| @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) | |||||
| add r8, r6, r3 | |||||
| add r8, r8, #(1<<10) | |||||
| mov r8, r8, asr #11 | |||||
| strh r8, [lr, # 2] | |||||
| @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) | |||||
| sub r8, r6, r3 | |||||
| add r8, r8, #(1<<10) | |||||
| mov r8, r8, asr #11 | |||||
| strh r8, [lr, #12] | |||||
| @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) | |||||
| add r8, r4, r5 | |||||
| add r8, r8, #(1<<10) | |||||
| mov r8, r8, asr #11 | |||||
| strh r8, [lr, # 4] | |||||
| @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) | |||||
| sub r8, r4, r5 | |||||
| add r8, r8, #(1<<10) | |||||
| mov r8, r8, asr #11 | |||||
| strh r8, [lr, #10] | |||||
| @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) | |||||
| add r8, r2, r7 | |||||
| add r8, r8, #(1<<10) | |||||
| mov r8, r8, asr #11 | |||||
| strh r8, [lr, # 6] | |||||
| @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) | |||||
| sub r8, r2, r7 | |||||
| add r8, r8, #(1<<10) | |||||
| mov r8, r8, asr #11 | |||||
| strh r8, [lr, # 8] | |||||
| @ End of row loop | |||||
| add lr, lr, #16 | |||||
| subs r12, r12, #1 | |||||
| bne row_loop | |||||
| beq start_column_loop | |||||
| ldrsh r0, [lr, # 0] @ r0 = 'd0' | |||||
| ldrsh r1, [lr, # 8] @ r1 = 'd1' | |||||
| @ Optimization for row that have all items except the first set to 0 | |||||
| @ (this works as the DCTELEMS are always 4-byte aligned) | |||||
| ldr r5, [lr, # 0] | |||||
| ldr r2, [lr, # 4] | |||||
| ldr r3, [lr, # 8] | |||||
| ldr r4, [lr, #12] | |||||
| orr r3, r3, r4 | |||||
| orr r3, r3, r2 | |||||
| orrs r5, r3, r5 | |||||
| beq end_of_row_loop @ nothing to be done as ALL of them are '0' | |||||
| orrs r2, r3, r1 | |||||
| beq empty_row | |||||
| ldrsh r2, [lr, # 2] @ r2 = 'd2' | |||||
| ldrsh r4, [lr, # 4] @ r4 = 'd4' | |||||
| ldrsh r6, [lr, # 6] @ r6 = 'd6' | |||||
| ldr r3, [r11, #FIX_0_541196100_ID] | |||||
| add r7, r2, r6 | |||||
| ldr r5, [r11, #FIX_M_1_847759065_ID] | |||||
| mul r7, r3, r7 @ r7 = z1 | |||||
| ldr r3, [r11, #FIX_0_765366865_ID] | |||||
| mla r6, r5, r6, r7 @ r6 = tmp2 | |||||
| add r5, r0, r4 @ r5 = tmp0 | |||||
| mla r2, r3, r2, r7 @ r2 = tmp3 | |||||
| sub r3, r0, r4 @ r3 = tmp1 | |||||
| add r0, r2, r5, lsl #13 @ r0 = tmp10 | |||||
| rsb r2, r2, r5, lsl #13 @ r2 = tmp13 | |||||
| add r4, r6, r3, lsl #13 @ r4 = tmp11 | |||||
| rsb r3, r6, r3, lsl #13 @ r3 = tmp12 | |||||
| stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 | |||||
| ldrsh r3, [lr, #10] @ r3 = 'd3' | |||||
| ldrsh r5, [lr, #12] @ r5 = 'd5' | |||||
| ldrsh r7, [lr, #14] @ r7 = 'd7' | |||||
| add r0, r3, r5 @ r0 = 'z2' | |||||
| add r2, r1, r7 @ r2 = 'z1' | |||||
| add r4, r3, r7 @ r4 = 'z3' | |||||
| add r6, r1, r5 @ r6 = 'z4' | |||||
| ldr r9, [r11, #FIX_1_175875602_ID] | |||||
| add r8, r4, r6 @ r8 = z3 + z4 | |||||
| ldr r10, [r11, #FIX_M_0_899976223_ID] | |||||
| mul r8, r9, r8 @ r8 = 'z5' | |||||
| ldr r9, [r11, #FIX_M_2_562915447_ID] | |||||
| mul r2, r10, r2 @ r2 = 'z1' | |||||
| ldr r10, [r11, #FIX_M_1_961570560_ID] | |||||
| mul r0, r9, r0 @ r0 = 'z2' | |||||
| ldr r9, [r11, #FIX_M_0_390180644_ID] | |||||
| mla r4, r10, r4, r8 @ r4 = 'z3' | |||||
| ldr r10, [r11, #FIX_0_298631336_ID] | |||||
| mla r6, r9, r6, r8 @ r6 = 'z4' | |||||
| ldr r9, [r11, #FIX_2_053119869_ID] | |||||
| mla r7, r10, r7, r2 @ r7 = tmp0 + z1 | |||||
| ldr r10, [r11, #FIX_3_072711026_ID] | |||||
| mla r5, r9, r5, r0 @ r5 = tmp1 + z2 | |||||
| ldr r9, [r11, #FIX_1_501321110_ID] | |||||
| mla r3, r10, r3, r0 @ r3 = tmp2 + z2 | |||||
| add r7, r7, r4 @ r7 = tmp0 | |||||
| mla r1, r9, r1, r2 @ r1 = tmp3 + z1 | |||||
| add r5, r5, r6 @ r5 = tmp1 | |||||
| add r3, r3, r4 @ r3 = tmp2 | |||||
| add r1, r1, r6 @ r1 = tmp3 | |||||
| ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 | |||||
| @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 | |||||
| @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) | |||||
| add r8, r0, r1 | |||||
| add r8, r8, #(1<<10) | |||||
| mov r8, r8, asr #11 | |||||
| strh r8, [lr, # 0] | |||||
| @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) | |||||
| sub r8, r0, r1 | |||||
| add r8, r8, #(1<<10) | |||||
| mov r8, r8, asr #11 | |||||
| strh r8, [lr, #14] | |||||
| @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) | |||||
| add r8, r6, r3 | |||||
| add r8, r8, #(1<<10) | |||||
| mov r8, r8, asr #11 | |||||
| strh r8, [lr, # 2] | |||||
| @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) | |||||
| sub r8, r6, r3 | |||||
| add r8, r8, #(1<<10) | |||||
| mov r8, r8, asr #11 | |||||
| strh r8, [lr, #12] | |||||
| @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) | |||||
| add r8, r4, r5 | |||||
| add r8, r8, #(1<<10) | |||||
| mov r8, r8, asr #11 | |||||
| strh r8, [lr, # 4] | |||||
| @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) | |||||
| sub r8, r4, r5 | |||||
| add r8, r8, #(1<<10) | |||||
| mov r8, r8, asr #11 | |||||
| strh r8, [lr, #10] | |||||
| @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) | |||||
| add r8, r2, r7 | |||||
| add r8, r8, #(1<<10) | |||||
| mov r8, r8, asr #11 | |||||
| strh r8, [lr, # 6] | |||||
| @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) | |||||
| sub r8, r2, r7 | |||||
| add r8, r8, #(1<<10) | |||||
| mov r8, r8, asr #11 | |||||
| strh r8, [lr, # 8] | |||||
| @ End of row loop | |||||
| add lr, lr, #16 | |||||
| subs r12, r12, #1 | |||||
| bne row_loop | |||||
| beq start_column_loop | |||||
| empty_row: | empty_row: | ||||
| ldr r1, [r11, #FIX_0xFFFF_ID] | |||||
| mov r0, r0, lsl #2 | |||||
| and r0, r0, r1 | |||||
| add r0, r0, r0, lsl #16 | |||||
| str r0, [lr, # 0] | |||||
| str r0, [lr, # 4] | |||||
| str r0, [lr, # 8] | |||||
| str r0, [lr, #12] | |||||
| ldr r1, [r11, #FIX_0xFFFF_ID] | |||||
| mov r0, r0, lsl #2 | |||||
| and r0, r0, r1 | |||||
| add r0, r0, r0, lsl #16 | |||||
| str r0, [lr, # 0] | |||||
| str r0, [lr, # 4] | |||||
| str r0, [lr, # 8] | |||||
| str r0, [lr, #12] | |||||
| end_of_row_loop: | end_of_row_loop: | ||||
| @ End of loop | |||||
| add lr, lr, #16 | |||||
| subs r12, r12, #1 | |||||
| bne row_loop | |||||
| @ End of loop | |||||
| add lr, lr, #16 | |||||
| subs r12, r12, #1 | |||||
| bne row_loop | |||||
| start_column_loop: | start_column_loop: | ||||
| @ Start of column loop | |||||
| ldr lr, [ sp ] | |||||
| mov r12, #8 | |||||
| @ Start of column loop | |||||
| ldr lr, [ sp ] | |||||
| mov r12, #8 | |||||
| column_loop: | column_loop: | ||||
| ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' | |||||
| ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' | |||||
| ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' | |||||
| ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' | |||||
| ldr r3, [r11, #FIX_0_541196100_ID] | |||||
| add r1, r2, r6 | |||||
| ldr r5, [r11, #FIX_M_1_847759065_ID] | |||||
| mul r1, r3, r1 @ r1 = z1 | |||||
| ldr r3, [r11, #FIX_0_765366865_ID] | |||||
| mla r6, r5, r6, r1 @ r6 = tmp2 | |||||
| add r5, r0, r4 @ r5 = tmp0 | |||||
| mla r2, r3, r2, r1 @ r2 = tmp3 | |||||
| sub r3, r0, r4 @ r3 = tmp1 | |||||
| add r0, r2, r5, lsl #13 @ r0 = tmp10 | |||||
| rsb r2, r2, r5, lsl #13 @ r2 = tmp13 | |||||
| add r4, r6, r3, lsl #13 @ r4 = tmp11 | |||||
| rsb r6, r6, r3, lsl #13 @ r6 = tmp12 | |||||
| ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' | |||||
| ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' | |||||
| ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' | |||||
| ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' | |||||
| @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) | |||||
| orr r9, r1, r3 | |||||
| orr r10, r5, r7 | |||||
| orrs r10, r9, r10 | |||||
| beq empty_odd_column | |||||
| stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 | |||||
| add r0, r3, r5 @ r0 = 'z2' | |||||
| add r2, r1, r7 @ r2 = 'z1' | |||||
| add r4, r3, r7 @ r4 = 'z3' | |||||
| add r6, r1, r5 @ r6 = 'z4' | |||||
| ldr r9, [r11, #FIX_1_175875602_ID] | |||||
| add r8, r4, r6 | |||||
| ldr r10, [r11, #FIX_M_0_899976223_ID] | |||||
| mul r8, r9, r8 @ r8 = 'z5' | |||||
| ldr r9, [r11, #FIX_M_2_562915447_ID] | |||||
| mul r2, r10, r2 @ r2 = 'z1' | |||||
| ldr r10, [r11, #FIX_M_1_961570560_ID] | |||||
| mul r0, r9, r0 @ r0 = 'z2' | |||||
| ldr r9, [r11, #FIX_M_0_390180644_ID] | |||||
| mla r4, r10, r4, r8 @ r4 = 'z3' | |||||
| ldr r10, [r11, #FIX_0_298631336_ID] | |||||
| mla r6, r9, r6, r8 @ r6 = 'z4' | |||||
| ldr r9, [r11, #FIX_2_053119869_ID] | |||||
| mla r7, r10, r7, r2 @ r7 = tmp0 + z1 | |||||
| ldr r10, [r11, #FIX_3_072711026_ID] | |||||
| mla r5, r9, r5, r0 @ r5 = tmp1 + z2 | |||||
| ldr r9, [r11, #FIX_1_501321110_ID] | |||||
| mla r3, r10, r3, r0 @ r3 = tmp2 + z2 | |||||
| add r7, r7, r4 @ r7 = tmp0 | |||||
| mla r1, r9, r1, r2 @ r1 = tmp3 + z1 | |||||
| add r5, r5, r6 @ r5 = tmp1 | |||||
| add r3, r3, r4 @ r3 = tmp2 | |||||
| add r1, r1, r6 @ r1 = tmp3 | |||||
| ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 | |||||
| @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 | |||||
| @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) | |||||
| add r8, r0, r1 | |||||
| add r8, r8, #(1<<17) | |||||
| mov r8, r8, asr #18 | |||||
| strh r8, [lr, #( 0*8)] | |||||
| @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) | |||||
| sub r8, r0, r1 | |||||
| add r8, r8, #(1<<17) | |||||
| mov r8, r8, asr #18 | |||||
| strh r8, [lr, #(14*8)] | |||||
| @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) | |||||
| add r8, r4, r3 | |||||
| add r8, r8, #(1<<17) | |||||
| mov r8, r8, asr #18 | |||||
| strh r8, [lr, #( 2*8)] | |||||
| @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) | |||||
| sub r8, r4, r3 | |||||
| add r8, r8, #(1<<17) | |||||
| mov r8, r8, asr #18 | |||||
| strh r8, [lr, #(12*8)] | |||||
| @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) | |||||
| add r8, r6, r5 | |||||
| add r8, r8, #(1<<17) | |||||
| mov r8, r8, asr #18 | |||||
| strh r8, [lr, #( 4*8)] | |||||
| @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) | |||||
| sub r8, r6, r5 | |||||
| add r8, r8, #(1<<17) | |||||
| mov r8, r8, asr #18 | |||||
| strh r8, [lr, #(10*8)] | |||||
| @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) | |||||
| add r8, r2, r7 | |||||
| add r8, r8, #(1<<17) | |||||
| mov r8, r8, asr #18 | |||||
| strh r8, [lr, #( 6*8)] | |||||
| @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) | |||||
| sub r8, r2, r7 | |||||
| add r8, r8, #(1<<17) | |||||
| mov r8, r8, asr #18 | |||||
| strh r8, [lr, #( 8*8)] | |||||
| @ End of row loop | |||||
| add lr, lr, #2 | |||||
| subs r12, r12, #1 | |||||
| bne column_loop | |||||
| beq the_end | |||||
| ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' | |||||
| ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' | |||||
| ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' | |||||
| ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' | |||||
| ldr r3, [r11, #FIX_0_541196100_ID] | |||||
| add r1, r2, r6 | |||||
| ldr r5, [r11, #FIX_M_1_847759065_ID] | |||||
| mul r1, r3, r1 @ r1 = z1 | |||||
| ldr r3, [r11, #FIX_0_765366865_ID] | |||||
| mla r6, r5, r6, r1 @ r6 = tmp2 | |||||
| add r5, r0, r4 @ r5 = tmp0 | |||||
| mla r2, r3, r2, r1 @ r2 = tmp3 | |||||
| sub r3, r0, r4 @ r3 = tmp1 | |||||
| add r0, r2, r5, lsl #13 @ r0 = tmp10 | |||||
| rsb r2, r2, r5, lsl #13 @ r2 = tmp13 | |||||
| add r4, r6, r3, lsl #13 @ r4 = tmp11 | |||||
| rsb r6, r6, r3, lsl #13 @ r6 = tmp12 | |||||
| ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' | |||||
| ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' | |||||
| ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' | |||||
| ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' | |||||
| @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) | |||||
| orr r9, r1, r3 | |||||
| orr r10, r5, r7 | |||||
| orrs r10, r9, r10 | |||||
| beq empty_odd_column | |||||
| stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 | |||||
| add r0, r3, r5 @ r0 = 'z2' | |||||
| add r2, r1, r7 @ r2 = 'z1' | |||||
| add r4, r3, r7 @ r4 = 'z3' | |||||
| add r6, r1, r5 @ r6 = 'z4' | |||||
| ldr r9, [r11, #FIX_1_175875602_ID] | |||||
| add r8, r4, r6 | |||||
| ldr r10, [r11, #FIX_M_0_899976223_ID] | |||||
| mul r8, r9, r8 @ r8 = 'z5' | |||||
| ldr r9, [r11, #FIX_M_2_562915447_ID] | |||||
| mul r2, r10, r2 @ r2 = 'z1' | |||||
| ldr r10, [r11, #FIX_M_1_961570560_ID] | |||||
| mul r0, r9, r0 @ r0 = 'z2' | |||||
| ldr r9, [r11, #FIX_M_0_390180644_ID] | |||||
| mla r4, r10, r4, r8 @ r4 = 'z3' | |||||
| ldr r10, [r11, #FIX_0_298631336_ID] | |||||
| mla r6, r9, r6, r8 @ r6 = 'z4' | |||||
| ldr r9, [r11, #FIX_2_053119869_ID] | |||||
| mla r7, r10, r7, r2 @ r7 = tmp0 + z1 | |||||
| ldr r10, [r11, #FIX_3_072711026_ID] | |||||
| mla r5, r9, r5, r0 @ r5 = tmp1 + z2 | |||||
| ldr r9, [r11, #FIX_1_501321110_ID] | |||||
| mla r3, r10, r3, r0 @ r3 = tmp2 + z2 | |||||
| add r7, r7, r4 @ r7 = tmp0 | |||||
| mla r1, r9, r1, r2 @ r1 = tmp3 + z1 | |||||
| add r5, r5, r6 @ r5 = tmp1 | |||||
| add r3, r3, r4 @ r3 = tmp2 | |||||
| add r1, r1, r6 @ r1 = tmp3 | |||||
| ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 | |||||
| @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 | |||||
| @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) | |||||
| add r8, r0, r1 | |||||
| add r8, r8, #(1<<17) | |||||
| mov r8, r8, asr #18 | |||||
| strh r8, [lr, #( 0*8)] | |||||
| @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) | |||||
| sub r8, r0, r1 | |||||
| add r8, r8, #(1<<17) | |||||
| mov r8, r8, asr #18 | |||||
| strh r8, [lr, #(14*8)] | |||||
| @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) | |||||
| add r8, r4, r3 | |||||
| add r8, r8, #(1<<17) | |||||
| mov r8, r8, asr #18 | |||||
| strh r8, [lr, #( 2*8)] | |||||
| @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) | |||||
| sub r8, r4, r3 | |||||
| add r8, r8, #(1<<17) | |||||
| mov r8, r8, asr #18 | |||||
| strh r8, [lr, #(12*8)] | |||||
| @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) | |||||
| add r8, r6, r5 | |||||
| add r8, r8, #(1<<17) | |||||
| mov r8, r8, asr #18 | |||||
| strh r8, [lr, #( 4*8)] | |||||
| @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) | |||||
| sub r8, r6, r5 | |||||
| add r8, r8, #(1<<17) | |||||
| mov r8, r8, asr #18 | |||||
| strh r8, [lr, #(10*8)] | |||||
| @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) | |||||
| add r8, r2, r7 | |||||
| add r8, r8, #(1<<17) | |||||
| mov r8, r8, asr #18 | |||||
| strh r8, [lr, #( 6*8)] | |||||
| @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) | |||||
| sub r8, r2, r7 | |||||
| add r8, r8, #(1<<17) | |||||
| mov r8, r8, asr #18 | |||||
| strh r8, [lr, #( 8*8)] | |||||
| @ End of row loop | |||||
| add lr, lr, #2 | |||||
| subs r12, r12, #1 | |||||
| bne column_loop | |||||
| beq the_end | |||||
| empty_odd_column: | empty_odd_column: | ||||
| @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) | |||||
| @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) | |||||
| add r0, r0, #(1<<17) | |||||
| mov r0, r0, asr #18 | |||||
| strh r0, [lr, #( 0*8)] | |||||
| strh r0, [lr, #(14*8)] | |||||
| @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) | |||||
| @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) | |||||
| add r4, r4, #(1<<17) | |||||
| mov r4, r4, asr #18 | |||||
| strh r4, [lr, #( 2*8)] | |||||
| strh r4, [lr, #(12*8)] | |||||
| @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) | |||||
| @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) | |||||
| add r6, r6, #(1<<17) | |||||
| mov r6, r6, asr #18 | |||||
| strh r6, [lr, #( 4*8)] | |||||
| strh r6, [lr, #(10*8)] | |||||
| @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) | |||||
| @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) | |||||
| add r2, r2, #(1<<17) | |||||
| mov r2, r2, asr #18 | |||||
| strh r2, [lr, #( 6*8)] | |||||
| strh r2, [lr, #( 8*8)] | |||||
| @ End of row loop | |||||
| add lr, lr, #2 | |||||
| subs r12, r12, #1 | |||||
| bne column_loop | |||||
| @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) | |||||
| @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) | |||||
| add r0, r0, #(1<<17) | |||||
| mov r0, r0, asr #18 | |||||
| strh r0, [lr, #( 0*8)] | |||||
| strh r0, [lr, #(14*8)] | |||||
| @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) | |||||
| @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) | |||||
| add r4, r4, #(1<<17) | |||||
| mov r4, r4, asr #18 | |||||
| strh r4, [lr, #( 2*8)] | |||||
| strh r4, [lr, #(12*8)] | |||||
| @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) | |||||
| @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) | |||||
| add r6, r6, #(1<<17) | |||||
| mov r6, r6, asr #18 | |||||
| strh r6, [lr, #( 4*8)] | |||||
| strh r6, [lr, #(10*8)] | |||||
| @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) | |||||
| @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) | |||||
| add r2, r2, #(1<<17) | |||||
| mov r2, r2, asr #18 | |||||
| strh r2, [lr, #( 6*8)] | |||||
| strh r2, [lr, #( 8*8)] | |||||
| @ End of row loop | |||||
| add lr, lr, #2 | |||||
| subs r12, r12, #1 | |||||
| bne column_loop | |||||
| the_end: | the_end: | ||||
| @ The end.... | |||||
| add sp, sp, #4 | |||||
| ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return | |||||
| @ The end.... | |||||
| add sp, sp, #4 | |||||
| ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return | |||||
| const_array: | const_array: | ||||
| .align | |||||
| .word FIX_0_298631336 | |||||
| .word FIX_0_541196100 | |||||
| .word FIX_0_765366865 | |||||
| .word FIX_1_175875602 | |||||
| .word FIX_1_501321110 | |||||
| .word FIX_2_053119869 | |||||
| .word FIX_3_072711026 | |||||
| .word FIX_M_0_390180644 | |||||
| .word FIX_M_0_899976223 | |||||
| .word FIX_M_1_847759065 | |||||
| .word FIX_M_1_961570560 | |||||
| .word FIX_M_2_562915447 | |||||
| .word FIX_0xFFFF | |||||
| .align | |||||
| .word FIX_0_298631336 | |||||
| .word FIX_0_541196100 | |||||
| .word FIX_0_765366865 | |||||
| .word FIX_1_175875602 | |||||
| .word FIX_1_501321110 | |||||
| .word FIX_2_053119869 | |||||
| .word FIX_3_072711026 | |||||
| .word FIX_M_0_390180644 | |||||
| .word FIX_M_0_899976223 | |||||
| .word FIX_M_1_847759065 | |||||
| .word FIX_M_1_961570560 | |||||
| .word FIX_M_2_562915447 | |||||
| .word FIX_0xFFFF | |||||
| @@ -51,9 +51,9 @@ | |||||
| #define COL_SHIFTED_1 524288 /* 1<< (COL_SHIFT-1) */ | #define COL_SHIFTED_1 524288 /* 1<< (COL_SHIFT-1) */ | ||||
| .text | |||||
| .align | |||||
| .global simple_idct_ARM | |||||
| .text | |||||
| .align | |||||
| .global simple_idct_ARM | |||||
| simple_idct_ARM: | simple_idct_ARM: | ||||
| @@ void simple_idct_ARM(int16_t *block) | @@ void simple_idct_ARM(int16_t *block) | ||||
| @@ -120,8 +120,8 @@ __b_evaluation: | |||||
| ldr r11, [r12, #offW7] @ R11=W7 | ldr r11, [r12, #offW7] @ R11=W7 | ||||
| mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | ||||
| mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | ||||
| teq r2, #0 @ if null avoid muls | |||||
| mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||||
| teq r2, #0 @ if null avoid muls | |||||
| mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||||
| rsbne r2, r2, #0 @ R2=-ROWr16[3] | rsbne r2, r2, #0 @ R2=-ROWr16[3] | ||||
| mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | ||||
| mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | ||||
| @@ -147,7 +147,7 @@ __b_evaluation: | |||||
| @@ MAC16(b3, -W1, row[7]); | @@ MAC16(b3, -W1, row[7]); | ||||
| @@ MAC16(b1, -W5, row[7]); | @@ MAC16(b1, -W5, row[7]); | ||||
| mov r3, r3, asr #16 @ R3=ROWr16[5] | mov r3, r3, asr #16 @ R3=ROWr16[5] | ||||
| teq r3, #0 @ if null avoid muls | |||||
| teq r3, #0 @ if null avoid muls | |||||
| mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5]=b0 | mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5]=b0 | ||||
| mov r4, r4, asr #16 @ R4=ROWr16[7] | mov r4, r4, asr #16 @ R4=ROWr16[7] | ||||
| mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5]=b2 | mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5]=b2 | ||||
| @@ -155,7 +155,7 @@ __b_evaluation: | |||||
| rsbne r3, r3, #0 @ R3=-ROWr16[5] | rsbne r3, r3, #0 @ R3=-ROWr16[5] | ||||
| mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5]=b1 | mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5]=b1 | ||||
| @@ R3 is free now | @@ R3 is free now | ||||
| teq r4, #0 @ if null avoid muls | |||||
| teq r4, #0 @ if null avoid muls | |||||
| mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7]=b0 | mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7]=b0 | ||||
| mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7]=b2 | mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7]=b2 | ||||
| rsbne r4, r4, #0 @ R4=-ROWr16[7] | rsbne r4, r4, #0 @ R4=-ROWr16[7] | ||||
| @@ -187,7 +187,7 @@ __a_evaluation: | |||||
| teq r2, #0 | teq r2, #0 | ||||
| beq __end_bef_a_evaluation | beq __end_bef_a_evaluation | ||||
| add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1) | |||||
| add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1) | |||||
| mul r11, r8, r4 @ R11=W2*ROWr16[2] | mul r11, r8, r4 @ R11=W2*ROWr16[2] | ||||
| sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3) | sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3) | ||||
| add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0) | add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0) | ||||
| @@ -203,7 +203,7 @@ __a_evaluation: | |||||
| @@ a2 -= W4*row[4] | @@ a2 -= W4*row[4] | ||||
| @@ a3 += W4*row[4] | @@ a3 += W4*row[4] | ||||
| ldrsh r11, [r14, #8] @ R11=ROWr16[4] | ldrsh r11, [r14, #8] @ R11=ROWr16[4] | ||||
| teq r11, #0 @ if null avoid muls | |||||
| teq r11, #0 @ if null avoid muls | |||||
| mulne r11, r9, r11 @ R11=W4*ROWr16[4] | mulne r11, r9, r11 @ R11=W4*ROWr16[4] | ||||
| @@ R9 is free now | @@ R9 is free now | ||||
| ldrsh r9, [r14, #12] @ R9=ROWr16[6] | ldrsh r9, [r14, #12] @ R9=ROWr16[6] | ||||
| @@ -212,7 +212,7 @@ __a_evaluation: | |||||
| subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2) | subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2) | ||||
| addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3) | addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3) | ||||
| @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead | @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead | ||||
| teq r9, #0 @ if null avoid muls | |||||
| teq r9, #0 @ if null avoid muls | |||||
| mulne r11, r10, r9 @ R11=W6*ROWr16[6] | mulne r11, r10, r9 @ R11=W6*ROWr16[6] | ||||
| addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0) | addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0) | ||||
| mulne r10, r8, r9 @ R10=W2*ROWr16[6] | mulne r10, r8, r9 @ R10=W2*ROWr16[6] | ||||
| @@ -294,165 +294,165 @@ __end_row_loop: | |||||
| @@ at this point, R0=block, R1-R11 (free) | |||||
| @@ R12=__const_ptr_, R14=&block[n] | |||||
| add r14, r0, #14 @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block. | |||||
| @@ at this point, R0=block, R1-R11 (free) | |||||
| @@ R12=__const_ptr_, R14=&block[n] | |||||
| add r14, r0, #14 @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block. | |||||
| __col_loop: | __col_loop: | ||||
| __b_evaluation2: | __b_evaluation2: | ||||
| @@ at this point, R0=block (temp), R1-R11 (free) | |||||
| @@ R12=__const_ptr_, R14=&block[n] | |||||
| @@ proceed with b0-b3 first, followed by a0-a3 | |||||
| @@ MUL16(b0, W1, col[8x1]); | |||||
| @@ MUL16(b1, W3, col[8x1]); | |||||
| @@ MUL16(b2, W5, col[8x1]); | |||||
| @@ MUL16(b3, W7, col[8x1]); | |||||
| @@ MAC16(b0, W3, col[8x3]); | |||||
| @@ MAC16(b1, -W7, col[8x3]); | |||||
| @@ MAC16(b2, -W1, col[8x3]); | |||||
| @@ MAC16(b3, -W5, col[8x3]); | |||||
| ldr r8, [r12, #offW1] @ R8=W1 | |||||
| ldrsh r7, [r14, #16] | |||||
| mul r0, r8, r7 @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||||
| ldr r9, [r12, #offW3] @ R9=W3 | |||||
| ldr r10, [r12, #offW5] @ R10=W5 | |||||
| mul r1, r9, r7 @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||||
| ldr r11, [r12, #offW7] @ R11=W7 | |||||
| mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||||
| ldrsh r2, [r14, #48] | |||||
| mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||||
| teq r2, #0 @ if 0, then avoid muls | |||||
| mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||||
| rsbne r2, r2, #0 @ R2=-ROWr16[3] | |||||
| mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||||
| mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||||
| mlane r7, r10, r2, r7 @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||||
| @@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free), | |||||
| @@ R5=b2, R6 (free), R7=b3, R8=W1, R9=W3, R10=W5, R11=W7, | |||||
| @@ R12=__const_ptr_, R14=&block[n] | |||||
| @@ MAC16(b0, W5, col[5x8]); | |||||
| @@ MAC16(b2, W7, col[5x8]); | |||||
| @@ MAC16(b3, W3, col[5x8]); | |||||
| @@ MAC16(b1, -W1, col[5x8]); | |||||
| @@ MAC16(b0, W7, col[7x8]); | |||||
| @@ MAC16(b2, W3, col[7x8]); | |||||
| @@ MAC16(b3, -W1, col[7x8]); | |||||
| @@ MAC16(b1, -W5, col[7x8]); | |||||
| ldrsh r3, [r14, #80] @ R3=COLr16[5x8] | |||||
| teq r3, #0 @ if 0 then avoid muls | |||||
| mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5x8]=b0 | |||||
| mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5x8]=b2 | |||||
| mlane r7, r9, r3, r7 @ R7+=W3*ROWr16[5x8]=b3 | |||||
| rsbne r3, r3, #0 @ R3=-ROWr16[5x8] | |||||
| ldrsh r4, [r14, #112] @ R4=COLr16[7x8] | |||||
| mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5x8]=b1 | |||||
| @@ R3 is free now | |||||
| teq r4, #0 @ if 0 then avoid muls | |||||
| mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7x8]=b0 | |||||
| mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7x8]=b2 | |||||
| rsbne r4, r4, #0 @ R4=-ROWr16[7x8] | |||||
| mlane r7, r8, r4, r7 @ R7-=W1*ROWr16[7x8]=b3 | |||||
| mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7x8]=b1 | |||||
| @@ R4 is free now | |||||
| @@ at this point, R0=block (temp), R1-R11 (free) | |||||
| @@ R12=__const_ptr_, R14=&block[n] | |||||
| @@ proceed with b0-b3 first, followed by a0-a3 | |||||
| @@ MUL16(b0, W1, col[8x1]); | |||||
| @@ MUL16(b1, W3, col[8x1]); | |||||
| @@ MUL16(b2, W5, col[8x1]); | |||||
| @@ MUL16(b3, W7, col[8x1]); | |||||
| @@ MAC16(b0, W3, col[8x3]); | |||||
| @@ MAC16(b1, -W7, col[8x3]); | |||||
| @@ MAC16(b2, -W1, col[8x3]); | |||||
| @@ MAC16(b3, -W5, col[8x3]); | |||||
| ldr r8, [r12, #offW1] @ R8=W1 | |||||
| ldrsh r7, [r14, #16] | |||||
| mul r0, r8, r7 @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||||
| ldr r9, [r12, #offW3] @ R9=W3 | |||||
| ldr r10, [r12, #offW5] @ R10=W5 | |||||
| mul r1, r9, r7 @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||||
| ldr r11, [r12, #offW7] @ R11=W7 | |||||
| mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||||
| ldrsh r2, [r14, #48] | |||||
| mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||||
| teq r2, #0 @ if 0, then avoid muls | |||||
| mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||||
| rsbne r2, r2, #0 @ R2=-ROWr16[3] | |||||
| mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||||
| mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||||
| mlane r7, r10, r2, r7 @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||||
| @@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free), | |||||
| @@ R5=b2, R6 (free), R7=b3, R8=W1, R9=W3, R10=W5, R11=W7, | |||||
| @@ R12=__const_ptr_, R14=&block[n] | |||||
| @@ MAC16(b0, W5, col[5x8]); | |||||
| @@ MAC16(b2, W7, col[5x8]); | |||||
| @@ MAC16(b3, W3, col[5x8]); | |||||
| @@ MAC16(b1, -W1, col[5x8]); | |||||
| @@ MAC16(b0, W7, col[7x8]); | |||||
| @@ MAC16(b2, W3, col[7x8]); | |||||
| @@ MAC16(b3, -W1, col[7x8]); | |||||
| @@ MAC16(b1, -W5, col[7x8]); | |||||
| ldrsh r3, [r14, #80] @ R3=COLr16[5x8] | |||||
| teq r3, #0 @ if 0 then avoid muls | |||||
| mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5x8]=b0 | |||||
| mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5x8]=b2 | |||||
| mlane r7, r9, r3, r7 @ R7+=W3*ROWr16[5x8]=b3 | |||||
| rsbne r3, r3, #0 @ R3=-ROWr16[5x8] | |||||
| ldrsh r4, [r14, #112] @ R4=COLr16[7x8] | |||||
| mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5x8]=b1 | |||||
| @@ R3 is free now | |||||
| teq r4, #0 @ if 0 then avoid muls | |||||
| mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7x8]=b0 | |||||
| mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7x8]=b2 | |||||
| rsbne r4, r4, #0 @ R4=-ROWr16[7x8] | |||||
| mlane r7, r8, r4, r7 @ R7-=W1*ROWr16[7x8]=b3 | |||||
| mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7x8]=b1 | |||||
| @@ R4 is free now | |||||
| __end_b_evaluation2: | __end_b_evaluation2: | ||||
| @@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free), | |||||
| @@ R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free), | |||||
| @@ R12=__const_ptr_, R14=&block[n] | |||||
| @@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free), | |||||
| @@ R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free), | |||||
| @@ R12=__const_ptr_, R14=&block[n] | |||||
| __a_evaluation2: | __a_evaluation2: | ||||
| @@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1)); | |||||
| @@ a1 = a0 + W6 * row[2]; | |||||
| @@ a2 = a0 - W6 * row[2]; | |||||
| @@ a3 = a0 - W2 * row[2]; | |||||
| @@ a0 = a0 + W2 * row[2]; | |||||
| ldrsh r6, [r14, #0] | |||||
| ldr r9, [r12, #offW4] @ R9=W4 | |||||
| mul r6, r9, r6 @ R6=W4*ROWr16[0] | |||||
| ldr r10, [r12, #offW6] @ R10=W6 | |||||
| ldrsh r4, [r14, #32] @ R4=ROWr16[2] (a3 not defined yet) | |||||
| add r6, r6, #COL_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(COL_SHIFT-1) (a0) | |||||
| mul r11, r10, r4 @ R11=W6*ROWr16[2] | |||||
| ldr r8, [r12, #offW2] @ R8=W2 | |||||
| add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1) | |||||
| sub r3, r6, r11 @ R3=a0-W6*ROWr16[2] (a2) | |||||
| mul r11, r8, r4 @ R11=W2*ROWr16[2] | |||||
| sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3) | |||||
| add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0) | |||||
| @@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3, | |||||
| @@ R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free), | |||||
| @@ R12=__const_ptr_, R14=&block[n] | |||||
| @@ a0 += W4*row[4] | |||||
| @@ a1 -= W4*row[4] | |||||
| @@ a2 -= W4*row[4] | |||||
| @@ a3 += W4*row[4] | |||||
| ldrsh r11, [r14, #64] @ R11=ROWr16[4] | |||||
| teq r11, #0 @ if null avoid muls | |||||
| mulne r11, r9, r11 @ R11=W4*ROWr16[4] | |||||
| @@ R9 is free now | |||||
| addne r6, r6, r11 @ R6+=W4*ROWr16[4] (a0) | |||||
| subne r2, r2, r11 @ R2-=W4*ROWr16[4] (a1) | |||||
| subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2) | |||||
| ldrsh r9, [r14, #96] @ R9=ROWr16[6] | |||||
| addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3) | |||||
| @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead | |||||
| teq r9, #0 @ if null avoid muls | |||||
| mulne r11, r10, r9 @ R11=W6*ROWr16[6] | |||||
| addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0) | |||||
| mulne r10, r8, r9 @ R10=W2*ROWr16[6] | |||||
| @@ a0 += W6*row[6]; | |||||
| @@ a3 -= W6*row[6]; | |||||
| @@ a1 -= W2*row[6]; | |||||
| @@ a2 += W2*row[6]; | |||||
| subne r4, r4, r11 @ R4-=W6*ROWr16[6] (a3) | |||||
| subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1) | |||||
| addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2) | |||||
| @@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1)); | |||||
| @@ a1 = a0 + W6 * row[2]; | |||||
| @@ a2 = a0 - W6 * row[2]; | |||||
| @@ a3 = a0 - W2 * row[2]; | |||||
| @@ a0 = a0 + W2 * row[2]; | |||||
| ldrsh r6, [r14, #0] | |||||
| ldr r9, [r12, #offW4] @ R9=W4 | |||||
| mul r6, r9, r6 @ R6=W4*ROWr16[0] | |||||
| ldr r10, [r12, #offW6] @ R10=W6 | |||||
| ldrsh r4, [r14, #32] @ R4=ROWr16[2] (a3 not defined yet) | |||||
| add r6, r6, #COL_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(COL_SHIFT-1) (a0) | |||||
| mul r11, r10, r4 @ R11=W6*ROWr16[2] | |||||
| ldr r8, [r12, #offW2] @ R8=W2 | |||||
| add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1) | |||||
| sub r3, r6, r11 @ R3=a0-W6*ROWr16[2] (a2) | |||||
| mul r11, r8, r4 @ R11=W2*ROWr16[2] | |||||
| sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3) | |||||
| add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0) | |||||
| @@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3, | |||||
| @@ R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free), | |||||
| @@ R12=__const_ptr_, R14=&block[n] | |||||
| @@ a0 += W4*row[4] | |||||
| @@ a1 -= W4*row[4] | |||||
| @@ a2 -= W4*row[4] | |||||
| @@ a3 += W4*row[4] | |||||
| ldrsh r11, [r14, #64] @ R11=ROWr16[4] | |||||
| teq r11, #0 @ if null avoid muls | |||||
| mulne r11, r9, r11 @ R11=W4*ROWr16[4] | |||||
| @@ R9 is free now | |||||
| addne r6, r6, r11 @ R6+=W4*ROWr16[4] (a0) | |||||
| subne r2, r2, r11 @ R2-=W4*ROWr16[4] (a1) | |||||
| subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2) | |||||
| ldrsh r9, [r14, #96] @ R9=ROWr16[6] | |||||
| addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3) | |||||
| @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead | |||||
| teq r9, #0 @ if null avoid muls | |||||
| mulne r11, r10, r9 @ R11=W6*ROWr16[6] | |||||
| addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0) | |||||
| mulne r10, r8, r9 @ R10=W2*ROWr16[6] | |||||
| @@ a0 += W6*row[6]; | |||||
| @@ a3 -= W6*row[6]; | |||||
| @@ a1 -= W2*row[6]; | |||||
| @@ a2 += W2*row[6]; | |||||
| subne r4, r4, r11 @ R4-=W6*ROWr16[6] (a3) | |||||
| subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1) | |||||
| addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2) | |||||
| __end_a_evaluation2: | __end_a_evaluation2: | ||||
| @@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3, | |||||
| @@ R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free), | |||||
| @@ R12=__const_ptr_, R14=&block[n] | |||||
| @@ col[0 ] = ((a0 + b0) >> COL_SHIFT); | |||||
| @@ col[8 ] = ((a1 + b1) >> COL_SHIFT); | |||||
| @@ col[16] = ((a2 + b2) >> COL_SHIFT); | |||||
| @@ col[24] = ((a3 + b3) >> COL_SHIFT); | |||||
| @@ col[32] = ((a3 - b3) >> COL_SHIFT); | |||||
| @@ col[40] = ((a2 - b2) >> COL_SHIFT); | |||||
| @@ col[48] = ((a1 - b1) >> COL_SHIFT); | |||||
| @@ col[56] = ((a0 - b0) >> COL_SHIFT); | |||||
| @@@@@ no optimisation here @@@@@ | |||||
| add r8, r6, r0 @ R8=a0+b0 | |||||
| add r9, r2, r1 @ R9=a1+b1 | |||||
| mov r8, r8, asr #COL_SHIFT | |||||
| mov r9, r9, asr #COL_SHIFT | |||||
| strh r8, [r14, #0] | |||||
| strh r9, [r14, #16] | |||||
| add r8, r3, r5 @ R8=a2+b2 | |||||
| add r9, r4, r7 @ R9=a3+b3 | |||||
| mov r8, r8, asr #COL_SHIFT | |||||
| mov r9, r9, asr #COL_SHIFT | |||||
| strh r8, [r14, #32] | |||||
| strh r9, [r14, #48] | |||||
| sub r8, r4, r7 @ R8=a3-b3 | |||||
| sub r9, r3, r5 @ R9=a2-b2 | |||||
| mov r8, r8, asr #COL_SHIFT | |||||
| mov r9, r9, asr #COL_SHIFT | |||||
| strh r8, [r14, #64] | |||||
| strh r9, [r14, #80] | |||||
| sub r8, r2, r1 @ R8=a1-b1 | |||||
| sub r9, r6, r0 @ R9=a0-b0 | |||||
| mov r8, r8, asr #COL_SHIFT | |||||
| mov r9, r9, asr #COL_SHIFT | |||||
| strh r8, [r14, #96] | |||||
| strh r9, [r14, #112] | |||||
| @@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3, | |||||
| @@ R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free), | |||||
| @@ R12=__const_ptr_, R14=&block[n] | |||||
| @@ col[0 ] = ((a0 + b0) >> COL_SHIFT); | |||||
| @@ col[8 ] = ((a1 + b1) >> COL_SHIFT); | |||||
| @@ col[16] = ((a2 + b2) >> COL_SHIFT); | |||||
| @@ col[24] = ((a3 + b3) >> COL_SHIFT); | |||||
| @@ col[32] = ((a3 - b3) >> COL_SHIFT); | |||||
| @@ col[40] = ((a2 - b2) >> COL_SHIFT); | |||||
| @@ col[48] = ((a1 - b1) >> COL_SHIFT); | |||||
| @@ col[56] = ((a0 - b0) >> COL_SHIFT); | |||||
| @@@@@ no optimisation here @@@@@ | |||||
| add r8, r6, r0 @ R8=a0+b0 | |||||
| add r9, r2, r1 @ R9=a1+b1 | |||||
| mov r8, r8, asr #COL_SHIFT | |||||
| mov r9, r9, asr #COL_SHIFT | |||||
| strh r8, [r14, #0] | |||||
| strh r9, [r14, #16] | |||||
| add r8, r3, r5 @ R8=a2+b2 | |||||
| add r9, r4, r7 @ R9=a3+b3 | |||||
| mov r8, r8, asr #COL_SHIFT | |||||
| mov r9, r9, asr #COL_SHIFT | |||||
| strh r8, [r14, #32] | |||||
| strh r9, [r14, #48] | |||||
| sub r8, r4, r7 @ R8=a3-b3 | |||||
| sub r9, r3, r5 @ R9=a2-b2 | |||||
| mov r8, r8, asr #COL_SHIFT | |||||
| mov r9, r9, asr #COL_SHIFT | |||||
| strh r8, [r14, #64] | |||||
| strh r9, [r14, #80] | |||||
| sub r8, r2, r1 @ R8=a1-b1 | |||||
| sub r9, r6, r0 @ R9=a0-b0 | |||||
| mov r8, r8, asr #COL_SHIFT | |||||
| mov r9, r9, asr #COL_SHIFT | |||||
| strh r8, [r14, #96] | |||||
| strh r9, [r14, #112] | |||||
| __end_col_loop: | __end_col_loop: | ||||
| @@ at this point, R0-R11 (free) | |||||
| @@ R12=__const_ptr_, R14=&block[n] | |||||
| ldr r0, [sp, #0] @ R0=block | |||||
| teq r0, r14 @ compare current &block[n] to block, when block is reached, the loop is finished. | |||||
| sub r14, r14, #2 | |||||
| bne __col_loop | |||||
| @@ at this point, R0-R11 (free) | |||||
| @@ R12=__const_ptr_, R14=&block[n] | |||||
| ldr r0, [sp, #0] @ R0=block | |||||
| teq r0, r14 @ compare current &block[n] to block, when block is reached, the loop is finished. | |||||
| sub r14, r14, #2 | |||||
| bne __col_loop | |||||
| @@ -466,15 +466,15 @@ __end_simple_idct_ARM: | |||||
| @@ kind of sub-function, here not to overload the common case. | @@ kind of sub-function, here not to overload the common case. | ||||
| __end_bef_a_evaluation: | __end_bef_a_evaluation: | ||||
| add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1) | |||||
| add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1) | |||||
| mul r11, r8, r4 @ R11=W2*ROWr16[2] | mul r11, r8, r4 @ R11=W2*ROWr16[2] | ||||
| sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3) | sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3) | ||||
| add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0) | add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0) | ||||
| bal __end_a_evaluation | |||||
| bal __end_a_evaluation | |||||
| __constant_ptr__: @@ see #defines at the beginning of the source code for values. | __constant_ptr__: @@ see #defines at the beginning of the source code for values. | ||||
| .align | |||||
| .align | |||||
| .word W1 | .word W1 | ||||
| .word W2 | .word W2 | ||||
| .word W3 | .word W3 | ||||
| @@ -15,21 +15,21 @@ extern "C" { | |||||
| #include <sys/types.h> /* size_t */ | #include <sys/types.h> /* size_t */ | ||||
| //FIXME the following 2 really dont belong in here | //FIXME the following 2 really dont belong in here | ||||
| #define FFMPEG_VERSION_INT 0x000409 | |||||
| #define FFMPEG_VERSION "CVS" | |||||
| #define FFMPEG_VERSION_INT 0x000409 | |||||
| #define FFMPEG_VERSION "CVS" | |||||
| #define AV_STRINGIFY(s) AV_TOSTRING(s) | |||||
| #define AV_STRINGIFY(s) AV_TOSTRING(s) | |||||
| #define AV_TOSTRING(s) #s | #define AV_TOSTRING(s) #s | ||||
| #define LIBAVCODEC_VERSION_INT ((51<<16)+(0<<8)+0) | |||||
| #define LIBAVCODEC_VERSION 51.0.0 | |||||
| #define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT | |||||
| #define LIBAVCODEC_VERSION_INT ((51<<16)+(0<<8)+0) | |||||
| #define LIBAVCODEC_VERSION 51.0.0 | |||||
| #define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT | |||||
| #define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION) | |||||
| #define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION) | |||||
| #define AV_NOPTS_VALUE int64_t_C(0x8000000000000000) | |||||
| #define AV_TIME_BASE 1000000 | |||||
| #define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE} | |||||
| #define AV_NOPTS_VALUE int64_t_C(0x8000000000000000) | |||||
| #define AV_TIME_BASE 1000000 | |||||
| #define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE} | |||||
| enum CodecID { | enum CodecID { | ||||
| CODEC_ID_NONE, | CODEC_ID_NONE, | ||||
| @@ -362,9 +362,9 @@ extern int motion_estimation_method; | |||||
| #define CODEC_FLAG2_LOCAL_HEADER 0x00000008 ///< place global headers at every keyframe instead of in extradata | #define CODEC_FLAG2_LOCAL_HEADER 0x00000008 ///< place global headers at every keyframe instead of in extradata | ||||
| /* Unsupported options : | /* Unsupported options : | ||||
| * Syntax Arithmetic coding (SAC) | |||||
| * Reference Picture Selection | |||||
| * Independant Segment Decoding */ | |||||
| * Syntax Arithmetic coding (SAC) | |||||
| * Reference Picture Selection | |||||
| * Independant Segment Decoding */ | |||||
| /* /Fx */ | /* /Fx */ | ||||
| /* codec capabilities */ | /* codec capabilities */ | ||||
| @@ -646,9 +646,9 @@ typedef struct AVPanScan{ | |||||
| */\ | */\ | ||||
| int8_t *ref_index[2]; | int8_t *ref_index[2]; | ||||
| #define FF_QSCALE_TYPE_MPEG1 0 | |||||
| #define FF_QSCALE_TYPE_MPEG2 1 | |||||
| #define FF_QSCALE_TYPE_H264 2 | |||||
| #define FF_QSCALE_TYPE_MPEG1 0 | |||||
| #define FF_QSCALE_TYPE_MPEG2 1 | |||||
| #define FF_QSCALE_TYPE_H264 2 | |||||
| #define FF_BUFFER_TYPE_INTERNAL 1 | #define FF_BUFFER_TYPE_INTERNAL 1 | ||||
| #define FF_BUFFER_TYPE_USER 2 ///< Direct rendering buffers (image is (de)allocated by user) | #define FF_BUFFER_TYPE_USER 2 ///< Direct rendering buffers (image is (de)allocated by user) | ||||
| @@ -684,9 +684,9 @@ typedef struct AVCLASS AVClass; | |||||
| struct AVCLASS { | struct AVCLASS { | ||||
| const char* class_name; | const char* class_name; | ||||
| const char* (*item_name)(void*); /* actually passing a pointer to an AVCodecContext | const char* (*item_name)(void*); /* actually passing a pointer to an AVCodecContext | ||||
| or AVFormatContext, which begin with an AVClass. | |||||
| Needed because av_log is in libavcodec and has no visibility | |||||
| of AVIn/OutputFormat */ | |||||
| or AVFormatContext, which begin with an AVClass. | |||||
| Needed because av_log is in libavcodec and has no visibility | |||||
| of AVIn/OutputFormat */ | |||||
| struct AVOption *option; | struct AVOption *option; | ||||
| }; | }; | ||||
| @@ -1252,18 +1252,18 @@ typedef struct AVCodecContext { | |||||
| * result into program crash) | * result into program crash) | ||||
| */ | */ | ||||
| unsigned dsp_mask; | unsigned dsp_mask; | ||||
| #define FF_MM_FORCE 0x80000000 /* force usage of selected flags (OR) */ | |||||
| #define FF_MM_FORCE 0x80000000 /* force usage of selected flags (OR) */ | |||||
| /* lower 16 bits - CPU features */ | /* lower 16 bits - CPU features */ | ||||
| #ifdef HAVE_MMX | #ifdef HAVE_MMX | ||||
| #define FF_MM_MMX 0x0001 /* standard MMX */ | |||||
| #define FF_MM_3DNOW 0x0004 /* AMD 3DNOW */ | |||||
| #define FF_MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ | |||||
| #define FF_MM_SSE 0x0008 /* SSE functions */ | |||||
| #define FF_MM_SSE2 0x0010 /* PIV SSE2 functions */ | |||||
| #define FF_MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */ | |||||
| #define FF_MM_MMX 0x0001 /* standard MMX */ | |||||
| #define FF_MM_3DNOW 0x0004 /* AMD 3DNOW */ | |||||
| #define FF_MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ | |||||
| #define FF_MM_SSE 0x0008 /* SSE functions */ | |||||
| #define FF_MM_SSE2 0x0010 /* PIV SSE2 functions */ | |||||
| #define FF_MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */ | |||||
| #endif /* HAVE_MMX */ | #endif /* HAVE_MMX */ | ||||
| #ifdef HAVE_IWMMXT | #ifdef HAVE_IWMMXT | ||||
| #define FF_MM_IWMMXT 0x0100 /* XScale IWMMXT */ | |||||
| #define FF_MM_IWMMXT 0x0100 /* XScale IWMMXT */ | |||||
| #endif /* HAVE_IWMMXT */ | #endif /* HAVE_IWMMXT */ | ||||
| /** | /** | ||||
| @@ -2223,7 +2223,7 @@ int avcodec_find_best_pix_fmt(int pix_fmt_mask, int src_pix_fmt, | |||||
| #define FF_ALPHA_TRANSP 0x0001 /* image has some totally transparent pixels */ | #define FF_ALPHA_TRANSP 0x0001 /* image has some totally transparent pixels */ | ||||
| #define FF_ALPHA_SEMI_TRANSP 0x0002 /* image has some transparent pixels */ | #define FF_ALPHA_SEMI_TRANSP 0x0002 /* image has some transparent pixels */ | ||||
| int img_get_alpha_info(const AVPicture *src, | int img_get_alpha_info(const AVPicture *src, | ||||
| int pix_fmt, int width, int height); | |||||
| int pix_fmt, int width, int height); | |||||
| /* convert among pixel formats */ | /* convert among pixel formats */ | ||||
| int img_convert(AVPicture *dst, int dst_pix_fmt, | int img_convert(AVPicture *dst, int dst_pix_fmt, | ||||
| @@ -35,20 +35,20 @@ typedef struct ThreadContext{ | |||||
| // it's odd Be never patented that :D | // it's odd Be never patented that :D | ||||
| struct benaphore { | struct benaphore { | ||||
| vint32 atom; | |||||
| sem_id sem; | |||||
| vint32 atom; | |||||
| sem_id sem; | |||||
| }; | }; | ||||
| static inline int lock_ben(struct benaphore *ben) | static inline int lock_ben(struct benaphore *ben) | ||||
| { | { | ||||
| if (atomic_add(&ben->atom, 1) > 0) | |||||
| return acquire_sem(ben->sem); | |||||
| return B_OK; | |||||
| if (atomic_add(&ben->atom, 1) > 0) | |||||
| return acquire_sem(ben->sem); | |||||
| return B_OK; | |||||
| } | } | ||||
| static inline int unlock_ben(struct benaphore *ben) | static inline int unlock_ben(struct benaphore *ben) | ||||
| { | { | ||||
| if (atomic_add(&ben->atom, -1) > 1) | |||||
| return release_sem(ben->sem); | |||||
| return B_OK; | |||||
| if (atomic_add(&ben->atom, -1) > 1) | |||||
| return release_sem(ben->sem); | |||||
| return B_OK; | |||||
| } | } | ||||
| static struct benaphore av_thread_lib_ben; | static struct benaphore av_thread_lib_ben; | ||||
| @@ -155,25 +155,25 @@ fail: | |||||
| int avcodec_thread_lock_lib(void) | int avcodec_thread_lock_lib(void) | ||||
| { | { | ||||
| return lock_ben(&av_thread_lib_ben); | |||||
| return lock_ben(&av_thread_lib_ben); | |||||
| } | } | ||||
| int avcodec_thread_unlock_lib(void) | int avcodec_thread_unlock_lib(void) | ||||
| { | { | ||||
| return unlock_ben(&av_thread_lib_ben); | |||||
| return unlock_ben(&av_thread_lib_ben); | |||||
| } | } | ||||
| /* our versions of _init and _fini (which are called by those actually from crt.o) */ | /* our versions of _init and _fini (which are called by those actually from crt.o) */ | ||||
| void initialize_after(void) | void initialize_after(void) | ||||
| { | { | ||||
| av_thread_lib_ben.atom = 0; | |||||
| av_thread_lib_ben.sem = create_sem(0, "libavcodec benaphore"); | |||||
| av_thread_lib_ben.atom = 0; | |||||
| av_thread_lib_ben.sem = create_sem(0, "libavcodec benaphore"); | |||||
| } | } | ||||
| void uninitialize_before(void) | void uninitialize_before(void) | ||||
| { | { | ||||
| delete_sem(av_thread_lib_ben.sem); | |||||
| delete_sem(av_thread_lib_ben.sem); | |||||
| } | } | ||||
| @@ -83,7 +83,7 @@ int check_marker(GetBitContext *s, const char *msg) | |||||
| { | { | ||||
| int bit= get_bits1(s); | int bit= get_bits1(s); | ||||
| if(!bit) | if(!bit) | ||||
| av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg); | |||||
| av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg); | |||||
| return bit; | return bit; | ||||
| } | } | ||||
| @@ -146,7 +146,7 @@ typedef struct RL_VLC_ELEM { | |||||
| # ifdef __GNUC__ | # ifdef __GNUC__ | ||||
| static inline uint32_t unaligned32(const void *v) { | static inline uint32_t unaligned32(const void *v) { | ||||
| struct Unaligned { | struct Unaligned { | ||||
| uint32_t i; | |||||
| uint32_t i; | |||||
| } __attribute__((packed)); | } __attribute__((packed)); | ||||
| return ((const struct Unaligned *) v)->i; | return ((const struct Unaligned *) v)->i; | ||||
| @@ -183,7 +183,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) | |||||
| bit_buf = (bit_buf<<n) | value; | bit_buf = (bit_buf<<n) | value; | ||||
| bit_left-=n; | bit_left-=n; | ||||
| } else { | } else { | ||||
| bit_buf<<=bit_left; | |||||
| bit_buf<<=bit_left; | |||||
| bit_buf |= value >> (n - bit_left); | bit_buf |= value >> (n - bit_left); | ||||
| #ifdef UNALIGNED_STORES_ARE_BAD | #ifdef UNALIGNED_STORES_ARE_BAD | ||||
| if (3 & (intptr_t) s->buf_ptr) { | if (3 & (intptr_t) s->buf_ptr) { | ||||
| @@ -196,7 +196,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) | |||||
| *(uint32_t *)s->buf_ptr = be2me_32(bit_buf); | *(uint32_t *)s->buf_ptr = be2me_32(bit_buf); | ||||
| //printf("bitbuf = %08x\n", bit_buf); | //printf("bitbuf = %08x\n", bit_buf); | ||||
| s->buf_ptr+=4; | s->buf_ptr+=4; | ||||
| bit_left+=32 - n; | |||||
| bit_left+=32 - n; | |||||
| bit_buf = value; | bit_buf = value; | ||||
| } | } | ||||
| @@ -212,21 +212,21 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) | |||||
| # ifdef ALIGNED_BITSTREAM_WRITER | # ifdef ALIGNED_BITSTREAM_WRITER | ||||
| # if defined(ARCH_X86) || defined(ARCH_X86_64) | # if defined(ARCH_X86) || defined(ARCH_X86_64) | ||||
| asm volatile( | asm volatile( | ||||
| "movl %0, %%ecx \n\t" | |||||
| "xorl %%eax, %%eax \n\t" | |||||
| "shrdl %%cl, %1, %%eax \n\t" | |||||
| "shrl %%cl, %1 \n\t" | |||||
| "movl %0, %%ecx \n\t" | |||||
| "shrl $3, %%ecx \n\t" | |||||
| "andl $0xFFFFFFFC, %%ecx \n\t" | |||||
| "bswapl %1 \n\t" | |||||
| "orl %1, (%2, %%ecx) \n\t" | |||||
| "bswapl %%eax \n\t" | |||||
| "addl %3, %0 \n\t" | |||||
| "movl %%eax, 4(%2, %%ecx) \n\t" | |||||
| : "=&r" (s->index), "=&r" (value) | |||||
| : "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n)) | |||||
| : "%eax", "%ecx" | |||||
| "movl %0, %%ecx \n\t" | |||||
| "xorl %%eax, %%eax \n\t" | |||||
| "shrdl %%cl, %1, %%eax \n\t" | |||||
| "shrl %%cl, %1 \n\t" | |||||
| "movl %0, %%ecx \n\t" | |||||
| "shrl $3, %%ecx \n\t" | |||||
| "andl $0xFFFFFFFC, %%ecx \n\t" | |||||
| "bswapl %1 \n\t" | |||||
| "orl %1, (%2, %%ecx) \n\t" | |||||
| "bswapl %%eax \n\t" | |||||
| "addl %3, %0 \n\t" | |||||
| "movl %%eax, 4(%2, %%ecx) \n\t" | |||||
| : "=&r" (s->index), "=&r" (value) | |||||
| : "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n)) | |||||
| : "%eax", "%ecx" | |||||
| ); | ); | ||||
| # else | # else | ||||
| int index= s->index; | int index= s->index; | ||||
| @@ -243,20 +243,20 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) | |||||
| # else //ALIGNED_BITSTREAM_WRITER | # else //ALIGNED_BITSTREAM_WRITER | ||||
| # if defined(ARCH_X86) || defined(ARCH_X86_64) | # if defined(ARCH_X86) || defined(ARCH_X86_64) | ||||
| asm volatile( | asm volatile( | ||||
| "movl $7, %%ecx \n\t" | |||||
| "andl %0, %%ecx \n\t" | |||||
| "addl %3, %%ecx \n\t" | |||||
| "negl %%ecx \n\t" | |||||
| "shll %%cl, %1 \n\t" | |||||
| "bswapl %1 \n\t" | |||||
| "movl %0, %%ecx \n\t" | |||||
| "shrl $3, %%ecx \n\t" | |||||
| "orl %1, (%%ecx, %2) \n\t" | |||||
| "addl %3, %0 \n\t" | |||||
| "movl $0, 4(%%ecx, %2) \n\t" | |||||
| : "=&r" (s->index), "=&r" (value) | |||||
| : "r" (s->buf), "r" (n), "0" (s->index), "1" (value) | |||||
| : "%ecx" | |||||
| "movl $7, %%ecx \n\t" | |||||
| "andl %0, %%ecx \n\t" | |||||
| "addl %3, %%ecx \n\t" | |||||
| "negl %%ecx \n\t" | |||||
| "shll %%cl, %1 \n\t" | |||||
| "bswapl %1 \n\t" | |||||
| "movl %0, %%ecx \n\t" | |||||
| "shrl $3, %%ecx \n\t" | |||||
| "orl %1, (%%ecx, %2) \n\t" | |||||
| "addl %3, %0 \n\t" | |||||
| "movl $0, 4(%%ecx, %2) \n\t" | |||||
| : "=&r" (s->index), "=&r" (value) | |||||
| : "r" (s->buf), "r" (n), "0" (s->index), "1" (value) | |||||
| : "%ecx" | |||||
| ); | ); | ||||
| # else | # else | ||||
| int index= s->index; | int index= s->index; | ||||
| @@ -276,9 +276,9 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) | |||||
| static inline uint8_t* pbBufPtr(PutBitContext *s) | static inline uint8_t* pbBufPtr(PutBitContext *s) | ||||
| { | { | ||||
| #ifdef ALT_BITSTREAM_WRITER | #ifdef ALT_BITSTREAM_WRITER | ||||
| return s->buf + (s->index>>3); | |||||
| return s->buf + (s->index>>3); | |||||
| #else | #else | ||||
| return s->buf_ptr; | |||||
| return s->buf_ptr; | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -290,10 +290,10 @@ static inline void skip_put_bytes(PutBitContext *s, int n){ | |||||
| assert((put_bits_count(s)&7)==0); | assert((put_bits_count(s)&7)==0); | ||||
| #ifdef ALT_BITSTREAM_WRITER | #ifdef ALT_BITSTREAM_WRITER | ||||
| FIXME may need some cleaning of the buffer | FIXME may need some cleaning of the buffer | ||||
| s->index += n<<3; | |||||
| s->index += n<<3; | |||||
| #else | #else | ||||
| assert(s->bit_left==32); | assert(s->bit_left==32); | ||||
| s->buf_ptr += n; | |||||
| s->buf_ptr += n; | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -366,10 +366,10 @@ for examples see get_bits, show_bits, skip_bits, get_vlc | |||||
| static inline int unaligned32_be(const void *v) | static inline int unaligned32_be(const void *v) | ||||
| { | { | ||||
| #ifdef CONFIG_ALIGN | #ifdef CONFIG_ALIGN | ||||
| const uint8_t *p=v; | |||||
| return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]); | |||||
| const uint8_t *p=v; | |||||
| return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]); | |||||
| #else | #else | ||||
| return be2me_32( unaligned32(v)); //original | |||||
| return be2me_32( unaligned32(v)); //original | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -528,8 +528,8 @@ static inline int get_bits_count(GetBitContext *s){ | |||||
| #if defined(ARCH_X86) || defined(ARCH_X86_64) | #if defined(ARCH_X86) || defined(ARCH_X86_64) | ||||
| # define SKIP_CACHE(name, gb, num)\ | # define SKIP_CACHE(name, gb, num)\ | ||||
| asm(\ | asm(\ | ||||
| "shldl %2, %1, %0 \n\t"\ | |||||
| "shll %2, %1 \n\t"\ | |||||
| "shldl %2, %1, %0 \n\t"\ | |||||
| "shll %2, %1 \n\t"\ | |||||
| : "+r" (name##_cache0), "+r" (name##_cache1)\ | : "+r" (name##_cache0), "+r" (name##_cache1)\ | ||||
| : "Ic" ((uint8_t)num)\ | : "Ic" ((uint8_t)num)\ | ||||
| ); | ); | ||||
| @@ -61,13 +61,13 @@ static int decode_frame(AVCodecContext *avctx, | |||||
| uint8_t *cb= &a->picture.data[1][ y*a->picture.linesize[1] ]; | uint8_t *cb= &a->picture.data[1][ y*a->picture.linesize[1] ]; | ||||
| uint8_t *cr= &a->picture.data[2][ y*a->picture.linesize[2] ]; | uint8_t *cr= &a->picture.data[2][ y*a->picture.linesize[2] ]; | ||||
| for(x=0; x<avctx->width; x+=4){ | for(x=0; x<avctx->width; x+=4){ | ||||
| luma[3] = get_bits(&a->gb, 5) << 3; | |||||
| luma[2] = get_bits(&a->gb, 5) << 3; | |||||
| luma[1] = get_bits(&a->gb, 5) << 3; | |||||
| luma[0] = get_bits(&a->gb, 5) << 3; | |||||
| luma+= 4; | |||||
| *(cb++) = get_bits(&a->gb, 6) << 2; | |||||
| *(cr++) = get_bits(&a->gb, 6) << 2; | |||||
| luma[3] = get_bits(&a->gb, 5) << 3; | |||||
| luma[2] = get_bits(&a->gb, 5) << 3; | |||||
| luma[1] = get_bits(&a->gb, 5) << 3; | |||||
| luma[0] = get_bits(&a->gb, 5) << 3; | |||||
| luma+= 4; | |||||
| *(cb++) = get_bits(&a->gb, 6) << 2; | |||||
| *(cr++) = get_bits(&a->gb, 6) << 2; | |||||
| } | } | ||||
| } | } | ||||
| @@ -65,14 +65,14 @@ int64_t gettime(void) | |||||
| static short idct_mmx_perm[64]; | static short idct_mmx_perm[64]; | ||||
| static short idct_simple_mmx_perm[64]={ | static short idct_simple_mmx_perm[64]={ | ||||
| 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |||||
| 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |||||
| 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |||||
| 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |||||
| 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |||||
| 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |||||
| 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |||||
| 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |||||
| 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |||||
| 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |||||
| 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |||||
| 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |||||
| 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |||||
| 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |||||
| 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |||||
| 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |||||
| }; | }; | ||||
| void idct_mmx_init(void) | void idct_mmx_init(void) | ||||
| @@ -81,8 +81,8 @@ void idct_mmx_init(void) | |||||
| /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ | /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ | ||||
| for (i = 0; i < 64; i++) { | for (i = 0; i < 64; i++) { | ||||
| idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |||||
| // idct_simple_mmx_perm[i] = simple_block_permute_op(i); | |||||
| idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |||||
| // idct_simple_mmx_perm[i] = simple_block_permute_op(i); | |||||
| } | } | ||||
| } | } | ||||
| @@ -151,7 +151,7 @@ void dct_error(const char *name, int is_idct, | |||||
| for(i=0;i<64;i++) | for(i=0;i<64;i++) | ||||
| block[idct_simple_mmx_perm[i]] = block1[i]; | block[idct_simple_mmx_perm[i]] = block1[i]; | ||||
| } else { | |||||
| } else { | |||||
| for(i=0; i<64; i++) | for(i=0; i<64; i++) | ||||
| block[i]= block1[i]; | block[i]= block1[i]; | ||||
| } | } | ||||
| @@ -186,9 +186,9 @@ void dct_error(const char *name, int is_idct, | |||||
| if (v > err_inf) | if (v > err_inf) | ||||
| err_inf = v; | err_inf = v; | ||||
| err2 += v * v; | err2 += v * v; | ||||
| sysErr[i] += block[i] - block1[i]; | |||||
| blockSumErr += v; | |||||
| if( abs(block[i])>maxout) maxout=abs(block[i]); | |||||
| sysErr[i] += block[i] - block1[i]; | |||||
| blockSumErr += v; | |||||
| if( abs(block[i])>maxout) maxout=abs(block[i]); | |||||
| } | } | ||||
| if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr; | if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr; | ||||
| #if 0 // print different matrix pairs | #if 0 // print different matrix pairs | ||||
| @@ -209,7 +209,7 @@ void dct_error(const char *name, int is_idct, | |||||
| #if 1 // dump systematic errors | #if 1 // dump systematic errors | ||||
| for(i=0; i<64; i++){ | for(i=0; i<64; i++){ | ||||
| if(i%8==0) printf("\n"); | |||||
| if(i%8==0) printf("\n"); | |||||
| printf("%5d ", (int)sysErr[i]); | printf("%5d ", (int)sysErr[i]); | ||||
| } | } | ||||
| printf("\n"); | printf("\n"); | ||||
| @@ -503,7 +503,7 @@ int main(int argc, char **argv) | |||||
| dct_error("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, test); | dct_error("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, test); | ||||
| // dct_error("ODIVX-C", 1, odivx_idct_c, idct); | // dct_error("ODIVX-C", 1, odivx_idct_c, idct); | ||||
| //printf(" test against odivx idct\n"); | //printf(" test against odivx idct\n"); | ||||
| // dct_error("REF", 1, idct, odivx_idct_c); | |||||
| // dct_error("REF", 1, idct, odivx_idct_c); | |||||
| // dct_error("INT", 1, j_rev_dct, odivx_idct_c); | // dct_error("INT", 1, j_rev_dct, odivx_idct_c); | ||||
| // dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c); | // dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c); | ||||
| // dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c); | // dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c); | ||||
| @@ -124,14 +124,14 @@ const uint32_t inverse[256]={ | |||||
| /* Input permutation for the simple_idct_mmx */ | /* Input permutation for the simple_idct_mmx */ | ||||
| static const uint8_t simple_mmx_permutation[64]={ | static const uint8_t simple_mmx_permutation[64]={ | ||||
| 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |||||
| 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |||||
| 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |||||
| 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |||||
| 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |||||
| 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |||||
| 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |||||
| 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |||||
| 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |||||
| 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |||||
| 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |||||
| 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |||||
| 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |||||
| 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |||||
| 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |||||
| 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |||||
| }; | }; | ||||
| static int pix_sum_c(uint8_t * pix, int line_size) | static int pix_sum_c(uint8_t * pix, int line_size) | ||||
| @@ -140,18 +140,18 @@ static int pix_sum_c(uint8_t * pix, int line_size) | |||||
| s = 0; | s = 0; | ||||
| for (i = 0; i < 16; i++) { | for (i = 0; i < 16; i++) { | ||||
| for (j = 0; j < 16; j += 8) { | |||||
| s += pix[0]; | |||||
| s += pix[1]; | |||||
| s += pix[2]; | |||||
| s += pix[3]; | |||||
| s += pix[4]; | |||||
| s += pix[5]; | |||||
| s += pix[6]; | |||||
| s += pix[7]; | |||||
| pix += 8; | |||||
| } | |||||
| pix += line_size - 16; | |||||
| for (j = 0; j < 16; j += 8) { | |||||
| s += pix[0]; | |||||
| s += pix[1]; | |||||
| s += pix[2]; | |||||
| s += pix[3]; | |||||
| s += pix[4]; | |||||
| s += pix[5]; | |||||
| s += pix[6]; | |||||
| s += pix[7]; | |||||
| pix += 8; | |||||
| } | |||||
| pix += line_size - 16; | |||||
| } | } | ||||
| return s; | return s; | ||||
| } | } | ||||
| @@ -163,33 +163,33 @@ static int pix_norm1_c(uint8_t * pix, int line_size) | |||||
| s = 0; | s = 0; | ||||
| for (i = 0; i < 16; i++) { | for (i = 0; i < 16; i++) { | ||||
| for (j = 0; j < 16; j += 8) { | |||||
| for (j = 0; j < 16; j += 8) { | |||||
| #if 0 | #if 0 | ||||
| s += sq[pix[0]]; | |||||
| s += sq[pix[1]]; | |||||
| s += sq[pix[2]]; | |||||
| s += sq[pix[3]]; | |||||
| s += sq[pix[4]]; | |||||
| s += sq[pix[5]]; | |||||
| s += sq[pix[6]]; | |||||
| s += sq[pix[7]]; | |||||
| s += sq[pix[0]]; | |||||
| s += sq[pix[1]]; | |||||
| s += sq[pix[2]]; | |||||
| s += sq[pix[3]]; | |||||
| s += sq[pix[4]]; | |||||
| s += sq[pix[5]]; | |||||
| s += sq[pix[6]]; | |||||
| s += sq[pix[7]]; | |||||
| #else | #else | ||||
| #if LONG_MAX > 2147483647 | #if LONG_MAX > 2147483647 | ||||
| register uint64_t x=*(uint64_t*)pix; | |||||
| s += sq[x&0xff]; | |||||
| s += sq[(x>>8)&0xff]; | |||||
| s += sq[(x>>16)&0xff]; | |||||
| s += sq[(x>>24)&0xff]; | |||||
| register uint64_t x=*(uint64_t*)pix; | |||||
| s += sq[x&0xff]; | |||||
| s += sq[(x>>8)&0xff]; | |||||
| s += sq[(x>>16)&0xff]; | |||||
| s += sq[(x>>24)&0xff]; | |||||
| s += sq[(x>>32)&0xff]; | s += sq[(x>>32)&0xff]; | ||||
| s += sq[(x>>40)&0xff]; | s += sq[(x>>40)&0xff]; | ||||
| s += sq[(x>>48)&0xff]; | s += sq[(x>>48)&0xff]; | ||||
| s += sq[(x>>56)&0xff]; | s += sq[(x>>56)&0xff]; | ||||
| #else | #else | ||||
| register uint32_t x=*(uint32_t*)pix; | |||||
| s += sq[x&0xff]; | |||||
| s += sq[(x>>8)&0xff]; | |||||
| s += sq[(x>>16)&0xff]; | |||||
| s += sq[(x>>24)&0xff]; | |||||
| register uint32_t x=*(uint32_t*)pix; | |||||
| s += sq[x&0xff]; | |||||
| s += sq[(x>>8)&0xff]; | |||||
| s += sq[(x>>16)&0xff]; | |||||
| s += sq[(x>>24)&0xff]; | |||||
| x=*(uint32_t*)(pix+4); | x=*(uint32_t*)(pix+4); | ||||
| s += sq[x&0xff]; | s += sq[x&0xff]; | ||||
| s += sq[(x>>8)&0xff]; | s += sq[(x>>8)&0xff]; | ||||
| @@ -197,9 +197,9 @@ static int pix_norm1_c(uint8_t * pix, int line_size) | |||||
| s += sq[(x>>24)&0xff]; | s += sq[(x>>24)&0xff]; | ||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| pix += 8; | |||||
| } | |||||
| pix += line_size - 16; | |||||
| pix += 8; | |||||
| } | |||||
| pix += line_size - 16; | |||||
| } | } | ||||
| return s; | return s; | ||||
| } | } | ||||
| @@ -410,7 +410,7 @@ static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int lin | |||||
| } | } | ||||
| static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, | static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, | ||||
| const uint8_t *s2, int stride){ | |||||
| const uint8_t *s2, int stride){ | |||||
| int i; | int i; | ||||
| /* read the pixels */ | /* read the pixels */ | ||||
| @@ -431,7 +431,7 @@ static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, | |||||
| static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | ||||
| int line_size) | |||||
| int line_size) | |||||
| { | { | ||||
| int i; | int i; | ||||
| uint8_t *cm = cropTbl + MAX_NEG_CROP; | uint8_t *cm = cropTbl + MAX_NEG_CROP; | ||||
| @@ -453,7 +453,7 @@ static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | |||||
| } | } | ||||
| static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, | static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, | ||||
| int line_size) | |||||
| int line_size) | |||||
| { | { | ||||
| int i; | int i; | ||||
| uint8_t *cm = cropTbl + MAX_NEG_CROP; | uint8_t *cm = cropTbl + MAX_NEG_CROP; | ||||
| @@ -471,7 +471,7 @@ static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels | |||||
| } | } | ||||
| static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, | static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, | ||||
| int line_size) | |||||
| int line_size) | |||||
| { | { | ||||
| int i; | int i; | ||||
| uint8_t *cm = cropTbl + MAX_NEG_CROP; | uint8_t *cm = cropTbl + MAX_NEG_CROP; | ||||
| @@ -1214,7 +1214,7 @@ static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int | |||||
| int i,j; | int i,j; | ||||
| for (i=0; i < height; i++) { | for (i=0; i < height; i++) { | ||||
| for (j=0; j < width; j++) { | for (j=0; j < width; j++) { | ||||
| dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; | |||||
| dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; | |||||
| } | } | ||||
| src += stride; | src += stride; | ||||
| dst += stride; | dst += stride; | ||||
| @@ -1225,7 +1225,7 @@ static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int | |||||
| int i,j; | int i,j; | ||||
| for (i=0; i < height; i++) { | for (i=0; i < height; i++) { | ||||
| for (j=0; j < width; j++) { | for (j=0; j < width; j++) { | ||||
| dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; | |||||
| dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; | |||||
| } | } | ||||
| src += stride; | src += stride; | ||||
| dst += stride; | dst += stride; | ||||
| @@ -1236,7 +1236,7 @@ static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int | |||||
| int i,j; | int i,j; | ||||
| for (i=0; i < height; i++) { | for (i=0; i < height; i++) { | ||||
| for (j=0; j < width; j++) { | for (j=0; j < width; j++) { | ||||
| dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; | |||||
| dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; | |||||
| } | } | ||||
| src += stride; | src += stride; | ||||
| dst += stride; | dst += stride; | ||||
| @@ -1247,7 +1247,7 @@ static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int | |||||
| int i,j; | int i,j; | ||||
| for (i=0; i < height; i++) { | for (i=0; i < height; i++) { | ||||
| for (j=0; j < width; j++) { | for (j=0; j < width; j++) { | ||||
| dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; | |||||
| dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; | |||||
| } | } | ||||
| src += stride; | src += stride; | ||||
| dst += stride; | dst += stride; | ||||
| @@ -1258,7 +1258,7 @@ static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int | |||||
| int i,j; | int i,j; | ||||
| for (i=0; i < height; i++) { | for (i=0; i < height; i++) { | ||||
| for (j=0; j < width; j++) { | for (j=0; j < width; j++) { | ||||
| dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; | |||||
| dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; | |||||
| } | } | ||||
| src += stride; | src += stride; | ||||
| dst += stride; | dst += stride; | ||||
| @@ -1269,7 +1269,7 @@ static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int | |||||
| int i,j; | int i,j; | ||||
| for (i=0; i < height; i++) { | for (i=0; i < height; i++) { | ||||
| for (j=0; j < width; j++) { | for (j=0; j < width; j++) { | ||||
| dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; | |||||
| dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; | |||||
| } | } | ||||
| src += stride; | src += stride; | ||||
| dst += stride; | dst += stride; | ||||
| @@ -1280,7 +1280,7 @@ static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int | |||||
| int i,j; | int i,j; | ||||
| for (i=0; i < height; i++) { | for (i=0; i < height; i++) { | ||||
| for (j=0; j < width; j++) { | for (j=0; j < width; j++) { | ||||
| dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; | |||||
| dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; | |||||
| } | } | ||||
| src += stride; | src += stride; | ||||
| dst += stride; | dst += stride; | ||||
| @@ -1291,7 +1291,7 @@ static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int | |||||
| int i,j; | int i,j; | ||||
| for (i=0; i < height; i++) { | for (i=0; i < height; i++) { | ||||
| for (j=0; j < width; j++) { | for (j=0; j < width; j++) { | ||||
| dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; | |||||
| dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; | |||||
| } | } | ||||
| src += stride; | src += stride; | ||||
| dst += stride; | dst += stride; | ||||
| @@ -1311,7 +1311,7 @@ static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int | |||||
| int i,j; | int i,j; | ||||
| for (i=0; i < height; i++) { | for (i=0; i < height; i++) { | ||||
| for (j=0; j < width; j++) { | for (j=0; j < width; j++) { | ||||
| dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; | |||||
| dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; | |||||
| } | } | ||||
| src += stride; | src += stride; | ||||
| dst += stride; | dst += stride; | ||||
| @@ -1322,7 +1322,7 @@ static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int | |||||
| int i,j; | int i,j; | ||||
| for (i=0; i < height; i++) { | for (i=0; i < height; i++) { | ||||
| for (j=0; j < width; j++) { | for (j=0; j < width; j++) { | ||||
| dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; | |||||
| dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; | |||||
| } | } | ||||
| src += stride; | src += stride; | ||||
| dst += stride; | dst += stride; | ||||
| @@ -1333,7 +1333,7 @@ static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int | |||||
| int i,j; | int i,j; | ||||
| for (i=0; i < height; i++) { | for (i=0; i < height; i++) { | ||||
| for (j=0; j < width; j++) { | for (j=0; j < width; j++) { | ||||
| dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; | |||||
| dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; | |||||
| } | } | ||||
| src += stride; | src += stride; | ||||
| dst += stride; | dst += stride; | ||||
| @@ -1344,7 +1344,7 @@ static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int | |||||
| int i,j; | int i,j; | ||||
| for (i=0; i < height; i++) { | for (i=0; i < height; i++) { | ||||
| for (j=0; j < width; j++) { | for (j=0; j < width; j++) { | ||||
| dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |||||
| dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |||||
| } | } | ||||
| src += stride; | src += stride; | ||||
| dst += stride; | dst += stride; | ||||
| @@ -1355,7 +1355,7 @@ static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int | |||||
| int i,j; | int i,j; | ||||
| for (i=0; i < height; i++) { | for (i=0; i < height; i++) { | ||||
| for (j=0; j < width; j++) { | for (j=0; j < width; j++) { | ||||
| dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |||||
| dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |||||
| } | } | ||||
| src += stride; | src += stride; | ||||
| dst += stride; | dst += stride; | ||||
| @@ -1366,7 +1366,7 @@ static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int | |||||
| int i,j; | int i,j; | ||||
| for (i=0; i < height; i++) { | for (i=0; i < height; i++) { | ||||
| for (j=0; j < width; j++) { | for (j=0; j < width; j++) { | ||||
| dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; | |||||
| dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; | |||||
| } | } | ||||
| src += stride; | src += stride; | ||||
| dst += stride; | dst += stride; | ||||
| @@ -1377,7 +1377,7 @@ static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int | |||||
| int i,j; | int i,j; | ||||
| for (i=0; i < height; i++) { | for (i=0; i < height; i++) { | ||||
| for (j=0; j < width; j++) { | for (j=0; j < width; j++) { | ||||
| dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |||||
| dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |||||
| } | } | ||||
| src += stride; | src += stride; | ||||
| dst += stride; | dst += stride; | ||||
| @@ -1388,7 +1388,7 @@ static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int | |||||
| int i,j; | int i,j; | ||||
| for (i=0; i < height; i++) { | for (i=0; i < height; i++) { | ||||
| for (j=0; j < width; j++) { | for (j=0; j < width; j++) { | ||||
| dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |||||
| dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |||||
| } | } | ||||
| src += stride; | src += stride; | ||||
| dst += stride; | dst += stride; | ||||
| @@ -3666,15 +3666,15 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||
| #ifdef CONFIG_ENCODERS | #ifdef CONFIG_ENCODERS | ||||
| if(avctx->dct_algo==FF_DCT_FASTINT) { | if(avctx->dct_algo==FF_DCT_FASTINT) { | ||||
| c->fdct = fdct_ifast; | c->fdct = fdct_ifast; | ||||
| c->fdct248 = fdct_ifast248; | |||||
| c->fdct248 = fdct_ifast248; | |||||
| } | } | ||||
| else if(avctx->dct_algo==FF_DCT_FAAN) { | else if(avctx->dct_algo==FF_DCT_FAAN) { | ||||
| c->fdct = ff_faandct; | c->fdct = ff_faandct; | ||||
| c->fdct248 = ff_faandct248; | |||||
| c->fdct248 = ff_faandct248; | |||||
| } | } | ||||
| else { | else { | ||||
| c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default | c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default | ||||
| c->fdct248 = ff_fdct248_islow; | |||||
| c->fdct248 = ff_fdct248_islow; | |||||
| } | } | ||||
| #endif //CONFIG_ENCODERS | #endif //CONFIG_ENCODERS | ||||
| @@ -151,7 +151,7 @@ typedef struct DSPContext { | |||||
| * global motion compensation. | * global motion compensation. | ||||
| */ | */ | ||||
| void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy, | void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy, | ||||
| int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); | |||||
| int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); | |||||
| void (*clear_blocks)(DCTELEM *blocks/*align 16*/); | void (*clear_blocks)(DCTELEM *blocks/*align 16*/); | ||||
| int (*pix_sum)(uint8_t * pix, int line_size); | int (*pix_sum)(uint8_t * pix, int line_size); | ||||
| int (*pix_norm1)(uint8_t * pix, int line_size); | int (*pix_norm1)(uint8_t * pix, int line_size); | ||||
| @@ -342,7 +342,7 @@ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scant | |||||
| void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type); | void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type); | ||||
| #define BYTE_VEC32(c) ((c)*0x01010101UL) | |||||
| #define BYTE_VEC32(c) ((c)*0x01010101UL) | |||||
| static inline uint32_t rnd_avg32(uint32_t a, uint32_t b) | static inline uint32_t rnd_avg32(uint32_t a, uint32_t b) | ||||
| { | { | ||||
| @@ -194,7 +194,7 @@ channels_multi (int flags) | |||||
| { | { | ||||
| if (flags & DTS_LFE) | if (flags & DTS_LFE) | ||||
| return 6; | return 6; | ||||
| else if (flags & 1) /* center channel */ | |||||
| else if (flags & 1) /* center channel */ | |||||
| return 5; | return 5; | ||||
| else if ((flags & DTS_CHANNEL_MASK) == DTS_2F2R) | else if ((flags & DTS_CHANNEL_MASK) == DTS_2F2R) | ||||
| return 4; | return 4; | ||||
| @@ -84,7 +84,7 @@ static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm) | |||||
| j = perm[i]; | j = perm[i]; | ||||
| s->dv_idct_shift[0][0][q][j] = | s->dv_idct_shift[0][0][q][j] = | ||||
| dv_quant_shifts[q][dv_88_areas[i]] + 1; | dv_quant_shifts[q][dv_88_areas[i]] + 1; | ||||
| s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1; | |||||
| s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1; | |||||
| } | } | ||||
| /* 248DCT */ | /* 248DCT */ | ||||
| @@ -92,7 +92,7 @@ static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm) | |||||
| /* 248 table */ | /* 248 table */ | ||||
| s->dv_idct_shift[0][1][q][i] = | s->dv_idct_shift[0][1][q][i] = | ||||
| dv_quant_shifts[q][dv_248_areas[i]] + 1; | dv_quant_shifts[q][dv_248_areas[i]] + 1; | ||||
| s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1; | |||||
| s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -114,35 +114,35 @@ static int dvvideo_init(AVCodecContext *avctx) | |||||
| done = 1; | done = 1; | ||||
| dv_vlc_map = av_mallocz_static(DV_VLC_MAP_LEV_SIZE*DV_VLC_MAP_RUN_SIZE*sizeof(struct dv_vlc_pair)); | dv_vlc_map = av_mallocz_static(DV_VLC_MAP_LEV_SIZE*DV_VLC_MAP_RUN_SIZE*sizeof(struct dv_vlc_pair)); | ||||
| if (!dv_vlc_map) | |||||
| return -ENOMEM; | |||||
| /* dv_anchor lets each thread know its Id */ | |||||
| dv_anchor = av_malloc(12*27*sizeof(void*)); | |||||
| if (!dv_anchor) { | |||||
| return -ENOMEM; | |||||
| } | |||||
| for (i=0; i<12*27; i++) | |||||
| dv_anchor[i] = (void*)(size_t)i; | |||||
| /* it's faster to include sign bit in a generic VLC parsing scheme */ | |||||
| for (i=0, j=0; i<NB_DV_VLC; i++, j++) { | |||||
| new_dv_vlc_bits[j] = dv_vlc_bits[i]; | |||||
| new_dv_vlc_len[j] = dv_vlc_len[i]; | |||||
| new_dv_vlc_run[j] = dv_vlc_run[i]; | |||||
| new_dv_vlc_level[j] = dv_vlc_level[i]; | |||||
| if (dv_vlc_level[i]) { | |||||
| new_dv_vlc_bits[j] <<= 1; | |||||
| new_dv_vlc_len[j]++; | |||||
| j++; | |||||
| new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1; | |||||
| new_dv_vlc_len[j] = dv_vlc_len[i] + 1; | |||||
| new_dv_vlc_run[j] = dv_vlc_run[i]; | |||||
| new_dv_vlc_level[j] = -dv_vlc_level[i]; | |||||
| } | |||||
| } | |||||
| if (!dv_vlc_map) | |||||
| return -ENOMEM; | |||||
| /* dv_anchor lets each thread know its Id */ | |||||
| dv_anchor = av_malloc(12*27*sizeof(void*)); | |||||
| if (!dv_anchor) { | |||||
| return -ENOMEM; | |||||
| } | |||||
| for (i=0; i<12*27; i++) | |||||
| dv_anchor[i] = (void*)(size_t)i; | |||||
| /* it's faster to include sign bit in a generic VLC parsing scheme */ | |||||
| for (i=0, j=0; i<NB_DV_VLC; i++, j++) { | |||||
| new_dv_vlc_bits[j] = dv_vlc_bits[i]; | |||||
| new_dv_vlc_len[j] = dv_vlc_len[i]; | |||||
| new_dv_vlc_run[j] = dv_vlc_run[i]; | |||||
| new_dv_vlc_level[j] = dv_vlc_level[i]; | |||||
| if (dv_vlc_level[i]) { | |||||
| new_dv_vlc_bits[j] <<= 1; | |||||
| new_dv_vlc_len[j]++; | |||||
| j++; | |||||
| new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1; | |||||
| new_dv_vlc_len[j] = dv_vlc_len[i] + 1; | |||||
| new_dv_vlc_run[j] = dv_vlc_run[i]; | |||||
| new_dv_vlc_level[j] = -dv_vlc_level[i]; | |||||
| } | |||||
| } | |||||
| /* NOTE: as a trick, we use the fact the no codes are unused | /* NOTE: as a trick, we use the fact the no codes are unused | ||||
| to accelerate the parsing of partial codes */ | to accelerate the parsing of partial codes */ | ||||
| @@ -150,10 +150,10 @@ static int dvvideo_init(AVCodecContext *avctx) | |||||
| new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2, 0); | new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2, 0); | ||||
| dv_rl_vlc = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM)); | dv_rl_vlc = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM)); | ||||
| if (!dv_rl_vlc) { | |||||
| av_free(dv_anchor); | |||||
| return -ENOMEM; | |||||
| } | |||||
| if (!dv_rl_vlc) { | |||||
| av_free(dv_anchor); | |||||
| return -ENOMEM; | |||||
| } | |||||
| for(i = 0; i < dv_vlc.table_size; i++){ | for(i = 0; i < dv_vlc.table_size; i++){ | ||||
| int code= dv_vlc.table[i][0]; | int code= dv_vlc.table[i][0]; | ||||
| int len = dv_vlc.table[i][1]; | int len = dv_vlc.table[i][1]; | ||||
| @@ -170,49 +170,49 @@ static int dvvideo_init(AVCodecContext *avctx) | |||||
| dv_rl_vlc[i].level = level; | dv_rl_vlc[i].level = level; | ||||
| dv_rl_vlc[i].run = run; | dv_rl_vlc[i].run = run; | ||||
| } | } | ||||
| free_vlc(&dv_vlc); | |||||
| free_vlc(&dv_vlc); | |||||
| for (i = 0; i < NB_DV_VLC - 1; i++) { | |||||
| for (i = 0; i < NB_DV_VLC - 1; i++) { | |||||
| if (dv_vlc_run[i] >= DV_VLC_MAP_RUN_SIZE) | if (dv_vlc_run[i] >= DV_VLC_MAP_RUN_SIZE) | ||||
| continue; | |||||
| continue; | |||||
| #ifdef DV_CODEC_TINY_TARGET | #ifdef DV_CODEC_TINY_TARGET | ||||
| if (dv_vlc_level[i] >= DV_VLC_MAP_LEV_SIZE) | if (dv_vlc_level[i] >= DV_VLC_MAP_LEV_SIZE) | ||||
| continue; | |||||
| continue; | |||||
| #endif | #endif | ||||
| if (dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size != 0) | |||||
| continue; | |||||
| if (dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size != 0) | |||||
| continue; | |||||
| dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].vlc = dv_vlc_bits[i] << | |||||
| (!!dv_vlc_level[i]); | |||||
| dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size = dv_vlc_len[i] + | |||||
| (!!dv_vlc_level[i]); | |||||
| } | |||||
| for (i = 0; i < DV_VLC_MAP_RUN_SIZE; i++) { | |||||
| dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].vlc = dv_vlc_bits[i] << | |||||
| (!!dv_vlc_level[i]); | |||||
| dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size = dv_vlc_len[i] + | |||||
| (!!dv_vlc_level[i]); | |||||
| } | |||||
| for (i = 0; i < DV_VLC_MAP_RUN_SIZE; i++) { | |||||
| #ifdef DV_CODEC_TINY_TARGET | #ifdef DV_CODEC_TINY_TARGET | ||||
| for (j = 1; j < DV_VLC_MAP_LEV_SIZE; j++) { | |||||
| if (dv_vlc_map[i][j].size == 0) { | |||||
| dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc | | |||||
| (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size)); | |||||
| dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size + | |||||
| dv_vlc_map[0][j].size; | |||||
| } | |||||
| } | |||||
| for (j = 1; j < DV_VLC_MAP_LEV_SIZE; j++) { | |||||
| if (dv_vlc_map[i][j].size == 0) { | |||||
| dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc | | |||||
| (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size)); | |||||
| dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size + | |||||
| dv_vlc_map[0][j].size; | |||||
| } | |||||
| } | |||||
| #else | #else | ||||
| for (j = 1; j < DV_VLC_MAP_LEV_SIZE/2; j++) { | |||||
| if (dv_vlc_map[i][j].size == 0) { | |||||
| dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc | | |||||
| (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size)); | |||||
| dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size + | |||||
| dv_vlc_map[0][j].size; | |||||
| } | |||||
| dv_vlc_map[i][((uint16_t)(-j))&0x1ff].vlc = | |||||
| dv_vlc_map[i][j].vlc | 1; | |||||
| dv_vlc_map[i][((uint16_t)(-j))&0x1ff].size = | |||||
| dv_vlc_map[i][j].size; | |||||
| } | |||||
| for (j = 1; j < DV_VLC_MAP_LEV_SIZE/2; j++) { | |||||
| if (dv_vlc_map[i][j].size == 0) { | |||||
| dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc | | |||||
| (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size)); | |||||
| dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size + | |||||
| dv_vlc_map[0][j].size; | |||||
| } | |||||
| dv_vlc_map[i][((uint16_t)(-j))&0x1ff].vlc = | |||||
| dv_vlc_map[i][j].vlc | 1; | |||||
| dv_vlc_map[i][((uint16_t)(-j))&0x1ff].size = | |||||
| dv_vlc_map[i][j].size; | |||||
| } | |||||
| #endif | #endif | ||||
| } | |||||
| } | |||||
| } | } | ||||
| /* Generic DSP setup */ | /* Generic DSP setup */ | ||||
| @@ -241,7 +241,7 @@ static int dvvideo_init(AVCodecContext *avctx) | |||||
| /* FIXME: I really don't think this should be here */ | /* FIXME: I really don't think this should be here */ | ||||
| if (dv_codec_profile(avctx)) | if (dv_codec_profile(avctx)) | ||||
| avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt; | |||||
| avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt; | |||||
| avctx->coded_frame = &s->picture; | avctx->coded_frame = &s->picture; | ||||
| s->avctx= avctx; | s->avctx= avctx; | ||||
| @@ -306,9 +306,9 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block) | |||||
| /* if we must parse a partial vlc, we do it here */ | /* if we must parse a partial vlc, we do it here */ | ||||
| if (partial_bit_count > 0) { | if (partial_bit_count > 0) { | ||||
| re_cache = ((unsigned)re_cache >> partial_bit_count) | | re_cache = ((unsigned)re_cache >> partial_bit_count) | | ||||
| (mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count)); | |||||
| re_index -= partial_bit_count; | |||||
| mb->partial_bit_count = 0; | |||||
| (mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count)); | |||||
| re_index -= partial_bit_count; | |||||
| mb->partial_bit_count = 0; | |||||
| } | } | ||||
| /* get the AC coefficients until last_index is reached */ | /* get the AC coefficients until last_index is reached */ | ||||
| @@ -318,30 +318,30 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block) | |||||
| #endif | #endif | ||||
| /* our own optimized GET_RL_VLC */ | /* our own optimized GET_RL_VLC */ | ||||
| index = NEG_USR32(re_cache, TEX_VLC_BITS); | index = NEG_USR32(re_cache, TEX_VLC_BITS); | ||||
| vlc_len = dv_rl_vlc[index].len; | |||||
| vlc_len = dv_rl_vlc[index].len; | |||||
| if (vlc_len < 0) { | if (vlc_len < 0) { | ||||
| index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level; | index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level; | ||||
| vlc_len = TEX_VLC_BITS - vlc_len; | vlc_len = TEX_VLC_BITS - vlc_len; | ||||
| } | } | ||||
| level = dv_rl_vlc[index].level; | level = dv_rl_vlc[index].level; | ||||
| run = dv_rl_vlc[index].run; | |||||
| /* gotta check if we're still within gb boundaries */ | |||||
| if (re_index + vlc_len > last_index) { | |||||
| /* should be < 16 bits otherwise a codeword could have been parsed */ | |||||
| mb->partial_bit_count = last_index - re_index; | |||||
| mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count); | |||||
| re_index = last_index; | |||||
| break; | |||||
| } | |||||
| re_index += vlc_len; | |||||
| run = dv_rl_vlc[index].run; | |||||
| /* gotta check if we're still within gb boundaries */ | |||||
| if (re_index + vlc_len > last_index) { | |||||
| /* should be < 16 bits otherwise a codeword could have been parsed */ | |||||
| mb->partial_bit_count = last_index - re_index; | |||||
| mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count); | |||||
| re_index = last_index; | |||||
| break; | |||||
| } | |||||
| re_index += vlc_len; | |||||
| #ifdef VLC_DEBUG | #ifdef VLC_DEBUG | ||||
| printf("run=%d level=%d\n", run, level); | |||||
| printf("run=%d level=%d\n", run, level); | |||||
| #endif | #endif | ||||
| pos += run; | |||||
| if (pos >= 64) | |||||
| break; | |||||
| pos += run; | |||||
| if (pos >= 64) | |||||
| break; | |||||
| assert(level); | assert(level); | ||||
| pos1 = scan_table[pos]; | pos1 = scan_table[pos]; | ||||
| @@ -404,7 +404,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s, | |||||
| block = block1; | block = block1; | ||||
| for(j = 0;j < 6; j++) { | for(j = 0;j < 6; j++) { | ||||
| last_index = block_sizes[j]; | last_index = block_sizes[j]; | ||||
| init_get_bits(&gb, buf_ptr, last_index); | |||||
| init_get_bits(&gb, buf_ptr, last_index); | |||||
| /* get the dc */ | /* get the dc */ | ||||
| dc = get_sbits(&gb, 9); | dc = get_sbits(&gb, 9); | ||||
| @@ -444,7 +444,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s, | |||||
| block = block1; | block = block1; | ||||
| mb = mb1; | mb = mb1; | ||||
| init_get_bits(&gb, mb_bit_buffer, put_bits_count(&pb)); | init_get_bits(&gb, mb_bit_buffer, put_bits_count(&pb)); | ||||
| flush_put_bits(&pb); | |||||
| flush_put_bits(&pb); | |||||
| for(j = 0;j < 6; j++, block += 64, mb++) { | for(j = 0;j < 6; j++, block += 64, mb++) { | ||||
| if (mb->pos < 64 && get_bits_left(&gb) > 0) { | if (mb->pos < 64 && get_bits_left(&gb) > 0) { | ||||
| dv_decode_ac(&gb, mb, block); | dv_decode_ac(&gb, mb, block); | ||||
| @@ -456,7 +456,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s, | |||||
| /* all blocks are finished, so the extra bytes can be used at | /* all blocks are finished, so the extra bytes can be used at | ||||
| the video segment level */ | the video segment level */ | ||||
| if (j >= 6) | if (j >= 6) | ||||
| bit_copy(&vs_pb, &gb); | |||||
| bit_copy(&vs_pb, &gb); | |||||
| } | } | ||||
| /* we need a pass other the whole video segment */ | /* we need a pass other the whole video segment */ | ||||
| @@ -475,8 +475,8 @@ static inline void dv_decode_video_segment(DVVideoContext *s, | |||||
| #endif | #endif | ||||
| dv_decode_ac(&gb, mb, block); | dv_decode_ac(&gb, mb, block); | ||||
| } | } | ||||
| if (mb->pos >= 64 && mb->pos < 127) | |||||
| av_log(NULL, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos); | |||||
| if (mb->pos >= 64 && mb->pos < 127) | |||||
| av_log(NULL, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos); | |||||
| block += 64; | block += 64; | ||||
| mb++; | mb++; | ||||
| } | } | ||||
| @@ -508,7 +508,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s, | |||||
| if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) { | if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) { | ||||
| uint64_t aligned_pixels[64/8]; | uint64_t aligned_pixels[64/8]; | ||||
| uint8_t *pixels= (uint8_t*)aligned_pixels; | uint8_t *pixels= (uint8_t*)aligned_pixels; | ||||
| uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1; | |||||
| uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1; | |||||
| int x, y, linesize; | int x, y, linesize; | ||||
| /* NOTE: at end of line, the macroblock is handled as 420 */ | /* NOTE: at end of line, the macroblock is handled as 420 */ | ||||
| idct_put(pixels, 8, block); | idct_put(pixels, 8, block); | ||||
| @@ -543,21 +543,21 @@ static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc) | |||||
| int size; | int size; | ||||
| if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) { | if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) { | ||||
| *vlc = dv_vlc_map[run][level].vlc | sign; | *vlc = dv_vlc_map[run][level].vlc | sign; | ||||
| size = dv_vlc_map[run][level].size; | |||||
| size = dv_vlc_map[run][level].size; | |||||
| } | } | ||||
| else { | else { | ||||
| if (level < DV_VLC_MAP_LEV_SIZE) { | if (level < DV_VLC_MAP_LEV_SIZE) { | ||||
| *vlc = dv_vlc_map[0][level].vlc | sign; | |||||
| size = dv_vlc_map[0][level].size; | |||||
| } else { | |||||
| *vlc = dv_vlc_map[0][level].vlc | sign; | |||||
| size = dv_vlc_map[0][level].size; | |||||
| } else { | |||||
| *vlc = 0xfe00 | (level << 1) | sign; | *vlc = 0xfe00 | (level << 1) | sign; | ||||
| size = 16; | |||||
| } | |||||
| if (run) { | |||||
| *vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc : | |||||
| (0x1f80 | (run - 1))) << size; | |||||
| size += (run < 16) ? dv_vlc_map[run-1][0].size : 13; | |||||
| } | |||||
| size = 16; | |||||
| } | |||||
| if (run) { | |||||
| *vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc : | |||||
| (0x1f80 | (run - 1))) << size; | |||||
| size += (run < 16) ? dv_vlc_map[run-1][0].size : 13; | |||||
| } | |||||
| } | } | ||||
| return size; | return size; | ||||
| @@ -568,13 +568,13 @@ static always_inline int dv_rl2vlc_size(int run, int level) | |||||
| int size; | int size; | ||||
| if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) { | if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) { | ||||
| size = dv_vlc_map[run][level].size; | |||||
| size = dv_vlc_map[run][level].size; | |||||
| } | } | ||||
| else { | else { | ||||
| size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16; | |||||
| if (run) { | |||||
| size += (run < 16) ? dv_vlc_map[run-1][0].size : 13; | |||||
| } | |||||
| size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16; | |||||
| if (run) { | |||||
| size += (run < 16) ? dv_vlc_map[run-1][0].size : 13; | |||||
| } | |||||
| } | } | ||||
| return size; | return size; | ||||
| } | } | ||||
| @@ -620,14 +620,14 @@ static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext | |||||
| for (; size > (bits_left = put_bits_left(pb)); pb++) { | for (; size > (bits_left = put_bits_left(pb)); pb++) { | ||||
| if (bits_left) { | if (bits_left) { | ||||
| size -= bits_left; | size -= bits_left; | ||||
| put_bits(pb, bits_left, vlc >> size); | |||||
| vlc = vlc & ((1<<size)-1); | |||||
| } | |||||
| if (pb + 1 >= pb_end) { | |||||
| bi->partial_bit_count = size; | |||||
| bi->partial_bit_buffer = vlc; | |||||
| return pb; | |||||
| } | |||||
| put_bits(pb, bits_left, vlc >> size); | |||||
| vlc = vlc & ((1<<size)-1); | |||||
| } | |||||
| if (pb + 1 >= pb_end) { | |||||
| bi->partial_bit_count = size; | |||||
| bi->partial_bit_buffer = vlc; | |||||
| return pb; | |||||
| } | |||||
| } | } | ||||
| /* Store VLC */ | /* Store VLC */ | ||||
| @@ -712,14 +712,14 @@ static always_inline int dv_guess_dct_mode(DCTELEM *blk) { | |||||
| s = blk; | s = blk; | ||||
| for(i=0; i<7; i++) { | for(i=0; i<7; i++) { | ||||
| score88 += SC(0, 8) + SC(1, 9) + SC(2, 10) + SC(3, 11) + | score88 += SC(0, 8) + SC(1, 9) + SC(2, 10) + SC(3, 11) + | ||||
| SC(4, 12) + SC(5,13) + SC(6, 14) + SC(7, 15); | |||||
| SC(4, 12) + SC(5,13) + SC(6, 14) + SC(7, 15); | |||||
| s += 8; | s += 8; | ||||
| } | } | ||||
| /* Compute 2-4-8 score (small values give a better chance for 2-4-8 DCT) */ | /* Compute 2-4-8 score (small values give a better chance for 2-4-8 DCT) */ | ||||
| s = blk; | s = blk; | ||||
| for(i=0; i<6; i++) { | for(i=0; i<6; i++) { | ||||
| score248 += SC(0, 16) + SC(1,17) + SC(2, 18) + SC(3, 19) + | score248 += SC(0, 16) + SC(1,17) + SC(2, 18) + SC(3, 19) + | ||||
| SC(4, 20) + SC(5,21) + SC(6, 22) + SC(7, 23); | |||||
| SC(4, 20) + SC(5,21) + SC(6, 22) + SC(7, 23); | |||||
| s += 8; | s += 8; | ||||
| } | } | ||||
| @@ -736,30 +736,30 @@ static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos) | |||||
| b = blks; | b = blks; | ||||
| for (i=0; i<5; i++) { | for (i=0; i<5; i++) { | ||||
| if (!qnos[i]) | if (!qnos[i]) | ||||
| continue; | |||||
| continue; | |||||
| qnos[i]--; | |||||
| size[i] = 0; | |||||
| qnos[i]--; | |||||
| size[i] = 0; | |||||
| for (j=0; j<6; j++, b++) { | for (j=0; j<6; j++, b++) { | ||||
| for (a=0; a<4; a++) { | |||||
| if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) { | |||||
| b->bit_size[a] = 1; // 4 areas 4 bits for EOB :) | |||||
| b->area_q[a]++; | |||||
| for (a=0; a<4; a++) { | |||||
| if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) { | |||||
| b->bit_size[a] = 1; // 4 areas 4 bits for EOB :) | |||||
| b->area_q[a]++; | |||||
| prev= b->prev[a]; | prev= b->prev[a]; | ||||
| for (k= b->next[prev] ; k<mb_area_start[a+1]; k= b->next[k]) { | for (k= b->next[prev] ; k<mb_area_start[a+1]; k= b->next[k]) { | ||||
| b->mb[k] >>= 1; | |||||
| if (b->mb[k]) { | |||||
| b->mb[k] >>= 1; | |||||
| if (b->mb[k]) { | |||||
| b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]); | b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]); | ||||
| prev= k; | |||||
| prev= k; | |||||
| } else { | } else { | ||||
| b->next[prev] = b->next[k]; | b->next[prev] = b->next[k]; | ||||
| } | } | ||||
| } | |||||
| } | |||||
| b->prev[a+1]= prev; | b->prev[a+1]= prev; | ||||
| } | |||||
| size[i] += b->bit_size[a]; | |||||
| } | |||||
| } | |||||
| } | |||||
| size[i] += b->bit_size[a]; | |||||
| } | |||||
| } | |||||
| } | } | ||||
| } while ((vs_total_ac_bits < size[0] + size[1] + size[2] + size[3] + size[4]) && | } while ((vs_total_ac_bits < size[0] + size[1] + size[2] + size[3] + size[4]) && | ||||
| (qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4])); | (qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4])); | ||||
| @@ -797,68 +797,68 @@ static inline void dv_encode_video_segment(DVVideoContext *s, | |||||
| mb_x = v & 0xff; | mb_x = v & 0xff; | ||||
| mb_y = v >> 8; | mb_y = v >> 8; | ||||
| y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8); | y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8); | ||||
| c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ? | |||||
| ((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) : | |||||
| (((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8)); | |||||
| do_edge_wrap = 0; | |||||
| qnos[mb_index] = 15; /* No quantization */ | |||||
| c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ? | |||||
| ((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) : | |||||
| (((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8)); | |||||
| do_edge_wrap = 0; | |||||
| qnos[mb_index] = 15; /* No quantization */ | |||||
| ptr = dif + mb_index*80 + 4; | ptr = dif + mb_index*80 + 4; | ||||
| for(j = 0;j < 6; j++) { | for(j = 0;j < 6; j++) { | ||||
| if (j < 4) { /* Four Y blocks */ | if (j < 4) { /* Four Y blocks */ | ||||
| /* NOTE: at end of line, the macroblock is handled as 420 */ | |||||
| if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { | |||||
| /* NOTE: at end of line, the macroblock is handled as 420 */ | |||||
| if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { | |||||
| data = y_ptr + (j * 8); | data = y_ptr + (j * 8); | ||||
| } else { | } else { | ||||
| data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]); | data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]); | ||||
| } | } | ||||
| linesize = s->picture.linesize[0]; | |||||
| linesize = s->picture.linesize[0]; | |||||
| } else { /* Cr and Cb blocks */ | } else { /* Cr and Cb blocks */ | ||||
| /* don't ask Fabrice why they inverted Cb and Cr ! */ | |||||
| data = s->picture.data[6 - j] + c_offset; | |||||
| linesize = s->picture.linesize[6 - j]; | |||||
| if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) | |||||
| do_edge_wrap = 1; | |||||
| } | |||||
| /* Everything is set up -- now just copy data -> DCT block */ | |||||
| if (do_edge_wrap) { /* Edge wrap copy: 4x16 -> 8x8 */ | |||||
| uint8_t* d; | |||||
| DCTELEM *b = block; | |||||
| for (i=0;i<8;i++) { | |||||
| d = data + 8 * linesize; | |||||
| b[0] = data[0]; b[1] = data[1]; b[2] = data[2]; b[3] = data[3]; | |||||
| /* don't ask Fabrice why they inverted Cb and Cr ! */ | |||||
| data = s->picture.data[6 - j] + c_offset; | |||||
| linesize = s->picture.linesize[6 - j]; | |||||
| if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) | |||||
| do_edge_wrap = 1; | |||||
| } | |||||
| /* Everything is set up -- now just copy data -> DCT block */ | |||||
| if (do_edge_wrap) { /* Edge wrap copy: 4x16 -> 8x8 */ | |||||
| uint8_t* d; | |||||
| DCTELEM *b = block; | |||||
| for (i=0;i<8;i++) { | |||||
| d = data + 8 * linesize; | |||||
| b[0] = data[0]; b[1] = data[1]; b[2] = data[2]; b[3] = data[3]; | |||||
| b[4] = d[0]; b[5] = d[1]; b[6] = d[2]; b[7] = d[3]; | b[4] = d[0]; b[5] = d[1]; b[6] = d[2]; b[7] = d[3]; | ||||
| data += linesize; | |||||
| b += 8; | |||||
| } | |||||
| } else { /* Simple copy: 8x8 -> 8x8 */ | |||||
| s->get_pixels(block, data, linesize); | |||||
| } | |||||
| data += linesize; | |||||
| b += 8; | |||||
| } | |||||
| } else { /* Simple copy: 8x8 -> 8x8 */ | |||||
| s->get_pixels(block, data, linesize); | |||||
| } | |||||
| if(s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) | if(s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) | ||||
| enc_blk->dct_mode = dv_guess_dct_mode(block); | enc_blk->dct_mode = dv_guess_dct_mode(block); | ||||
| else | else | ||||
| enc_blk->dct_mode = 0; | enc_blk->dct_mode = 0; | ||||
| enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0; | |||||
| enc_blk->partial_bit_count = 0; | |||||
| enc_blk->partial_bit_buffer = 0; | |||||
| enc_blk->cur_ac = 0; | |||||
| enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0; | |||||
| enc_blk->partial_bit_count = 0; | |||||
| enc_blk->partial_bit_buffer = 0; | |||||
| enc_blk->cur_ac = 0; | |||||
| s->fdct[enc_blk->dct_mode](block); | |||||
| s->fdct[enc_blk->dct_mode](block); | |||||
| dv_set_class_number(block, enc_blk, | |||||
| enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4); | |||||
| dv_set_class_number(block, enc_blk, | |||||
| enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4); | |||||
| init_put_bits(pb, ptr, block_sizes[j]/8); | init_put_bits(pb, ptr, block_sizes[j]/8); | ||||
| put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2)); | |||||
| put_bits(pb, 1, enc_blk->dct_mode); | |||||
| put_bits(pb, 2, enc_blk->cno); | |||||
| vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] + | |||||
| enc_blk->bit_size[2] + enc_blk->bit_size[3]; | |||||
| ++enc_blk; | |||||
| ++pb; | |||||
| ptr += block_sizes[j]/8; | |||||
| put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2)); | |||||
| put_bits(pb, 1, enc_blk->dct_mode); | |||||
| put_bits(pb, 2, enc_blk->cno); | |||||
| vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] + | |||||
| enc_blk->bit_size[2] + enc_blk->bit_size[3]; | |||||
| ++enc_blk; | |||||
| ++pb; | |||||
| ptr += block_sizes[j]/8; | |||||
| } | } | ||||
| } | } | ||||
| @@ -898,7 +898,7 @@ static int dv_decode_mt(AVCodecContext *avctx, void* sl) | |||||
| DVVideoContext *s = avctx->priv_data; | DVVideoContext *s = avctx->priv_data; | ||||
| int slice = (size_t)sl; | int slice = (size_t)sl; | ||||
| dv_decode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80], | dv_decode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80], | ||||
| &s->sys->video_place[slice*5]); | |||||
| &s->sys->video_place[slice*5]); | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -907,7 +907,7 @@ static int dv_encode_mt(AVCodecContext *avctx, void* sl) | |||||
| DVVideoContext *s = avctx->priv_data; | DVVideoContext *s = avctx->priv_data; | ||||
| int slice = (size_t)sl; | int slice = (size_t)sl; | ||||
| dv_encode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80], | dv_encode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80], | ||||
| &s->sys->video_place[slice*5]); | |||||
| &s->sys->video_place[slice*5]); | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -940,7 +940,7 @@ static int dvvideo_decode_frame(AVCodecContext *avctx, | |||||
| s->buf = buf; | s->buf = buf; | ||||
| avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL, | avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL, | ||||
| s->sys->difseg_size * 27); | |||||
| s->sys->difseg_size * 27); | |||||
| emms_c(); | emms_c(); | ||||
| @@ -958,7 +958,7 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size, | |||||
| s->sys = dv_codec_profile(c); | s->sys = dv_codec_profile(c); | ||||
| if (!s->sys) | if (!s->sys) | ||||
| return -1; | |||||
| return -1; | |||||
| if(buf_size < s->sys->frame_size) | if(buf_size < s->sys->frame_size) | ||||
| return -1; | return -1; | ||||
| @@ -969,7 +969,7 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size, | |||||
| s->buf = buf; | s->buf = buf; | ||||
| c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL, | c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL, | ||||
| s->sys->difseg_size * 27); | |||||
| s->sys->difseg_size * 27); | |||||
| emms_c(); | emms_c(); | ||||
| return s->sys->frame_size; | return s->sys->frame_size; | ||||
| @@ -192,7 +192,7 @@ static void dvb_encode_rle4(uint8_t **pq, | |||||
| #define SCALEBITS 10 | #define SCALEBITS 10 | ||||
| #define ONE_HALF (1 << (SCALEBITS - 1)) | #define ONE_HALF (1 << (SCALEBITS - 1)) | ||||
| #define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5)) | |||||
| #define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5)) | |||||
| #define RGB_TO_Y_CCIR(r, g, b) \ | #define RGB_TO_Y_CCIR(r, g, b) \ | ||||
| ((FIX(0.29900*219.0/255.0) * (r) + FIX(0.58700*219.0/255.0) * (g) + \ | ((FIX(0.29900*219.0/255.0) * (r) + FIX(0.58700*219.0/255.0) * (g) + \ | ||||
| @@ -108,8 +108,8 @@ static void filter181(int16_t *data, int width, int height, int stride){ | |||||
| /** | /** | ||||
| * guess the dc of blocks which dont have a undamaged dc | * guess the dc of blocks which dont have a undamaged dc | ||||
| * @param w width in 8 pixel blocks | |||||
| * @param h height in 8 pixel blocks | |||||
| * @param w width in 8 pixel blocks | |||||
| * @param h height in 8 pixel blocks | |||||
| */ | */ | ||||
| static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, int is_luma){ | static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, int is_luma){ | ||||
| int b_x, b_y; | int b_x, b_y; | ||||
| @@ -192,8 +192,8 @@ static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, i | |||||
| /** | /** | ||||
| * simple horizontal deblocking filter used for error resilience | * simple horizontal deblocking filter used for error resilience | ||||
| * @param w width in 8 pixel blocks | |||||
| * @param h height in 8 pixel blocks | |||||
| * @param w width in 8 pixel blocks | |||||
| * @param h height in 8 pixel blocks | |||||
| */ | */ | ||||
| static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){ | static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){ | ||||
| int b_x, b_y; | int b_x, b_y; | ||||
| @@ -252,8 +252,8 @@ static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int st | |||||
| /** | /** | ||||
| * simple vertical deblocking filter used for error resilience | * simple vertical deblocking filter used for error resilience | ||||
| * @param w width in 8 pixel blocks | |||||
| * @param h height in 8 pixel blocks | |||||
| * @param w width in 8 pixel blocks | |||||
| * @param h height in 8 pixel blocks | |||||
| */ | */ | ||||
| static void v_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){ | static void v_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){ | ||||
| int b_x, b_y; | int b_x, b_y; | ||||
| @@ -348,7 +348,7 @@ static void guess_mv(MpegEncContext *s){ | |||||
| s->mv_type = MV_TYPE_16X16; | s->mv_type = MV_TYPE_16X16; | ||||
| s->mb_skipped=0; | s->mb_skipped=0; | ||||
| s->dsp.clear_blocks(s->block[0]); | |||||
| s->dsp.clear_blocks(s->block[0]); | |||||
| s->mb_x= mb_x; | s->mb_x= mb_x; | ||||
| s->mb_y= mb_y; | s->mb_y= mb_y; | ||||
| @@ -476,7 +476,7 @@ int score_sum=0; | |||||
| s->mv_type = MV_TYPE_16X16; | s->mv_type = MV_TYPE_16X16; | ||||
| s->mb_skipped=0; | s->mb_skipped=0; | ||||
| s->dsp.clear_blocks(s->block[0]); | |||||
| s->dsp.clear_blocks(s->block[0]); | |||||
| s->mb_x= mb_x; | s->mb_x= mb_x; | ||||
| s->mb_y= mb_y; | s->mb_y= mb_y; | ||||
| @@ -582,7 +582,7 @@ static int is_intra_more_likely(MpegEncContext *s){ | |||||
| uint8_t *mb_ptr = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize; | uint8_t *mb_ptr = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize; | ||||
| uint8_t *last_mb_ptr= s->last_picture.data [0] + mb_x*16 + mb_y*16*s->linesize; | uint8_t *last_mb_ptr= s->last_picture.data [0] + mb_x*16 + mb_y*16*s->linesize; | ||||
| is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr , s->linesize, 16); | |||||
| is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr , s->linesize, 16); | |||||
| is_intra_likely -= s->dsp.sad[0](NULL, last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize, 16); | is_intra_likely -= s->dsp.sad[0](NULL, last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize, 16); | ||||
| }else{ | }else{ | ||||
| if(IS_INTRA(s->current_picture.mb_type[mb_xy])) | if(IS_INTRA(s->current_picture.mb_type[mb_xy])) | ||||
| @@ -873,7 +873,7 @@ void ff_er_frame_end(MpegEncContext *s){ | |||||
| s->mv[0][0][1] = s->current_picture.motion_val[0][ mb_x*2 + mb_y*2*s->b8_stride ][1]; | s->mv[0][0][1] = s->current_picture.motion_val[0][ mb_x*2 + mb_y*2*s->b8_stride ][1]; | ||||
| } | } | ||||
| s->dsp.clear_blocks(s->block[0]); | |||||
| s->dsp.clear_blocks(s->block[0]); | |||||
| s->mb_x= mb_x; | s->mb_x= mb_x; | ||||
| s->mb_y= mb_y; | s->mb_y= mb_y; | ||||
| @@ -46,7 +46,7 @@ static int Faac_encode_init(AVCodecContext *avctx) | |||||
| /* check faac version */ | /* check faac version */ | ||||
| faac_cfg = faacEncGetCurrentConfiguration(s->faac_handle); | faac_cfg = faacEncGetCurrentConfiguration(s->faac_handle); | ||||
| if (faac_cfg->version != FAAC_CFG_VERSION) { | if (faac_cfg->version != FAAC_CFG_VERSION) { | ||||
| av_log(avctx, AV_LOG_ERROR, "wrong libfaac version (compiled for: %d, using %d)\n", FAAC_CFG_VERSION, faac_cfg->version); | |||||
| av_log(avctx, AV_LOG_ERROR, "wrong libfaac version (compiled for: %d, using %d)\n", FAAC_CFG_VERSION, faac_cfg->version); | |||||
| faacEncClose(s->faac_handle); | faacEncClose(s->faac_handle); | ||||
| return -1; | return -1; | ||||
| } | } | ||||
| @@ -47,8 +47,8 @@ static const char* libfaadname = "libfaad.so.0"; | |||||
| #endif | #endif | ||||
| typedef struct { | typedef struct { | ||||
| void* handle; /* dlopen handle */ | |||||
| void* faac_handle; /* FAAD library handle */ | |||||
| void* handle; /* dlopen handle */ | |||||
| void* faac_handle; /* FAAD library handle */ | |||||
| int frame_size; | int frame_size; | ||||
| int sample_size; | int sample_size; | ||||
| int flags; | int flags; | ||||
| @@ -57,36 +57,36 @@ typedef struct { | |||||
| faacDecHandle FAADAPI (*faacDecOpen)(void); | faacDecHandle FAADAPI (*faacDecOpen)(void); | ||||
| faacDecConfigurationPtr FAADAPI (*faacDecGetCurrentConfiguration)(faacDecHandle hDecoder); | faacDecConfigurationPtr FAADAPI (*faacDecGetCurrentConfiguration)(faacDecHandle hDecoder); | ||||
| #ifndef FAAD2_VERSION | #ifndef FAAD2_VERSION | ||||
| int FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder, | |||||
| int FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder, | |||||
| faacDecConfigurationPtr config); | faacDecConfigurationPtr config); | ||||
| int FAADAPI (*faacDecInit)(faacDecHandle hDecoder, | |||||
| unsigned char *buffer, | |||||
| unsigned long *samplerate, | |||||
| unsigned long *channels); | |||||
| int FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer, | |||||
| int FAADAPI (*faacDecInit)(faacDecHandle hDecoder, | |||||
| unsigned char *buffer, | |||||
| unsigned long *samplerate, | |||||
| unsigned long *channels); | |||||
| int FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer, | |||||
| unsigned long SizeOfDecoderSpecificInfo, | unsigned long SizeOfDecoderSpecificInfo, | ||||
| unsigned long *samplerate, unsigned long *channels); | unsigned long *samplerate, unsigned long *channels); | ||||
| int FAADAPI (*faacDecDecode)(faacDecHandle hDecoder, | |||||
| unsigned char *buffer, | |||||
| unsigned long *bytesconsumed, | |||||
| short *sample_buffer, | |||||
| int FAADAPI (*faacDecDecode)(faacDecHandle hDecoder, | |||||
| unsigned char *buffer, | |||||
| unsigned long *bytesconsumed, | |||||
| short *sample_buffer, | |||||
| unsigned long *samples); | unsigned long *samples); | ||||
| #else | #else | ||||
| unsigned char FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder, | |||||
| unsigned char FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder, | |||||
| faacDecConfigurationPtr config); | faacDecConfigurationPtr config); | ||||
| long FAADAPI (*faacDecInit)(faacDecHandle hDecoder, | |||||
| unsigned char *buffer, | |||||
| unsigned long buffer_size, | |||||
| unsigned long *samplerate, | |||||
| unsigned char *channels); | |||||
| char FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer, | |||||
| long FAADAPI (*faacDecInit)(faacDecHandle hDecoder, | |||||
| unsigned char *buffer, | |||||
| unsigned long buffer_size, | |||||
| unsigned long *samplerate, | |||||
| unsigned char *channels); | |||||
| char FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer, | |||||
| unsigned long SizeOfDecoderSpecificInfo, | unsigned long SizeOfDecoderSpecificInfo, | ||||
| unsigned long *samplerate, unsigned char *channels); | unsigned long *samplerate, unsigned char *channels); | ||||
| void *FAADAPI (*faacDecDecode)(faacDecHandle hDecoder, | |||||
| faacDecFrameInfo *hInfo, | |||||
| unsigned char *buffer, | |||||
| unsigned long buffer_size); | |||||
| char* FAADAPI (*faacDecGetErrorMessage)(unsigned char errcode); | |||||
| void *FAADAPI (*faacDecDecode)(faacDecHandle hDecoder, | |||||
| faacDecFrameInfo *hInfo, | |||||
| unsigned char *buffer, | |||||
| unsigned long buffer_size); | |||||
| char* FAADAPI (*faacDecGetErrorMessage)(unsigned char errcode); | |||||
| #endif | #endif | ||||
| void FAADAPI (*faacDecClose)(faacDecHandle hDecoder); | void FAADAPI (*faacDecClose)(faacDecHandle hDecoder); | ||||
| @@ -112,14 +112,14 @@ static int faac_init_mp4(AVCodecContext *avctx) | |||||
| int r = 0; | int r = 0; | ||||
| if (avctx->extradata) | if (avctx->extradata) | ||||
| r = s->faacDecInit2(s->faac_handle, (uint8_t*) avctx->extradata, | |||||
| avctx->extradata_size, | |||||
| &samplerate, &channels); | |||||
| r = s->faacDecInit2(s->faac_handle, (uint8_t*) avctx->extradata, | |||||
| avctx->extradata_size, | |||||
| &samplerate, &channels); | |||||
| // else r = s->faacDecInit(s->faac_handle ... ); | // else r = s->faacDecInit(s->faac_handle ... ); | ||||
| if (r < 0) | if (r < 0) | ||||
| av_log(avctx, AV_LOG_ERROR, "faacDecInit2 failed r:%d sr:%ld ch:%ld s:%d\n", | |||||
| r, samplerate, (long)channels, avctx->extradata_size); | |||||
| av_log(avctx, AV_LOG_ERROR, "faacDecInit2 failed r:%d sr:%ld ch:%ld s:%d\n", | |||||
| r, samplerate, (long)channels, avctx->extradata_size); | |||||
| avctx->sample_rate = samplerate; | avctx->sample_rate = samplerate; | ||||
| avctx->channels = channels; | avctx->channels = channels; | ||||
| @@ -141,7 +141,7 @@ static int faac_decode_frame(AVCodecContext *avctx, | |||||
| void *out; | void *out; | ||||
| #endif | #endif | ||||
| if(buf_size == 0) | if(buf_size == 0) | ||||
| return 0; | |||||
| return 0; | |||||
| #ifndef FAAD2_VERSION | #ifndef FAAD2_VERSION | ||||
| out = s->faacDecDecode(s->faac_handle, | out = s->faacDecDecode(s->faac_handle, | ||||
| (unsigned char*)buf, | (unsigned char*)buf, | ||||
| @@ -150,16 +150,16 @@ static int faac_decode_frame(AVCodecContext *avctx, | |||||
| &samples); | &samples); | ||||
| samples *= s->sample_size; | samples *= s->sample_size; | ||||
| if (data_size) | if (data_size) | ||||
| *data_size = samples; | |||||
| *data_size = samples; | |||||
| return (buf_size < (int)bytesconsumed) | return (buf_size < (int)bytesconsumed) | ||||
| ? buf_size : (int)bytesconsumed; | |||||
| ? buf_size : (int)bytesconsumed; | |||||
| #else | #else | ||||
| out = s->faacDecDecode(s->faac_handle, &frame_info, (unsigned char*)buf, (unsigned long)buf_size); | out = s->faacDecDecode(s->faac_handle, &frame_info, (unsigned char*)buf, (unsigned long)buf_size); | ||||
| if (frame_info.error > 0) { | if (frame_info.error > 0) { | ||||
| av_log(avctx, AV_LOG_ERROR, "faac: frame decoding failed: %s\n", | |||||
| s->faacDecGetErrorMessage(frame_info.error)); | |||||
| av_log(avctx, AV_LOG_ERROR, "faac: frame decoding failed: %s\n", | |||||
| s->faacDecGetErrorMessage(frame_info.error)); | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -167,10 +167,10 @@ static int faac_decode_frame(AVCodecContext *avctx, | |||||
| memcpy(data, out, frame_info.samples); // CHECKME - can we cheat this one | memcpy(data, out, frame_info.samples); // CHECKME - can we cheat this one | ||||
| if (data_size) | if (data_size) | ||||
| *data_size = frame_info.samples; | |||||
| *data_size = frame_info.samples; | |||||
| return (buf_size < (int)frame_info.bytesconsumed) | return (buf_size < (int)frame_info.bytesconsumed) | ||||
| ? buf_size : (int)frame_info.bytesconsumed; | |||||
| ? buf_size : (int)frame_info.bytesconsumed; | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -196,8 +196,8 @@ static int faac_decode_init(AVCodecContext *avctx) | |||||
| s->handle = dlopen(libfaadname, RTLD_LAZY); | s->handle = dlopen(libfaadname, RTLD_LAZY); | ||||
| if (!s->handle) | if (!s->handle) | ||||
| { | { | ||||
| av_log(avctx, AV_LOG_ERROR, "FAAD library: %s could not be opened! \n%s\n", | |||||
| libfaadname, dlerror()); | |||||
| av_log(avctx, AV_LOG_ERROR, "FAAD library: %s could not be opened! \n%s\n", | |||||
| libfaadname, dlerror()); | |||||
| return -1; | return -1; | ||||
| } | } | ||||
| #define dfaac(a, b) \ | #define dfaac(a, b) \ | ||||
| @@ -209,32 +209,32 @@ static int faac_decode_init(AVCodecContext *avctx) | |||||
| #endif /* CONFIG_FAADBIN */ | #endif /* CONFIG_FAADBIN */ | ||||
| // resolve all needed function calls | // resolve all needed function calls | ||||
| dfaac(Open, (faacDecHandle FAADAPI (*)(void))); | |||||
| dfaac(GetCurrentConfiguration, (faacDecConfigurationPtr | |||||
| FAADAPI (*)(faacDecHandle))); | |||||
| dfaac(Open, (faacDecHandle FAADAPI (*)(void))); | |||||
| dfaac(GetCurrentConfiguration, (faacDecConfigurationPtr | |||||
| FAADAPI (*)(faacDecHandle))); | |||||
| #ifndef FAAD2_VERSION | #ifndef FAAD2_VERSION | ||||
| dfaac(SetConfiguration, (int FAADAPI (*)(faacDecHandle, | |||||
| faacDecConfigurationPtr))); | |||||
| dfaac(SetConfiguration, (int FAADAPI (*)(faacDecHandle, | |||||
| faacDecConfigurationPtr))); | |||||
| dfaac(Init, (int FAADAPI (*)(faacDecHandle, unsigned char*, | |||||
| unsigned long*, unsigned long*))); | |||||
| dfaac(Init, (int FAADAPI (*)(faacDecHandle, unsigned char*, | |||||
| unsigned long*, unsigned long*))); | |||||
| dfaac(Init2, (int FAADAPI (*)(faacDecHandle, unsigned char*, | dfaac(Init2, (int FAADAPI (*)(faacDecHandle, unsigned char*, | ||||
| unsigned long, unsigned long*, | |||||
| unsigned long*))); | |||||
| unsigned long, unsigned long*, | |||||
| unsigned long*))); | |||||
| dfaac(Close, (void FAADAPI (*)(faacDecHandle hDecoder))); | dfaac(Close, (void FAADAPI (*)(faacDecHandle hDecoder))); | ||||
| dfaac(Decode, (int FAADAPI (*)(faacDecHandle, unsigned char*, | |||||
| unsigned long*, short*, unsigned long*))); | |||||
| dfaac(Decode, (int FAADAPI (*)(faacDecHandle, unsigned char*, | |||||
| unsigned long*, short*, unsigned long*))); | |||||
| #else | #else | ||||
| dfaac(SetConfiguration, (unsigned char FAADAPI (*)(faacDecHandle, | |||||
| faacDecConfigurationPtr))); | |||||
| dfaac(Init, (long FAADAPI (*)(faacDecHandle, unsigned char*, | |||||
| unsigned long, unsigned long*, unsigned char*))); | |||||
| dfaac(Init2, (char FAADAPI (*)(faacDecHandle, unsigned char*, | |||||
| unsigned long, unsigned long*, | |||||
| unsigned char*))); | |||||
| dfaac(Decode, (void *FAADAPI (*)(faacDecHandle, faacDecFrameInfo*, | |||||
| unsigned char*, unsigned long))); | |||||
| dfaac(GetErrorMessage, (char* FAADAPI (*)(unsigned char))); | |||||
| dfaac(SetConfiguration, (unsigned char FAADAPI (*)(faacDecHandle, | |||||
| faacDecConfigurationPtr))); | |||||
| dfaac(Init, (long FAADAPI (*)(faacDecHandle, unsigned char*, | |||||
| unsigned long, unsigned long*, unsigned char*))); | |||||
| dfaac(Init2, (char FAADAPI (*)(faacDecHandle, unsigned char*, | |||||
| unsigned long, unsigned long*, | |||||
| unsigned char*))); | |||||
| dfaac(Decode, (void *FAADAPI (*)(faacDecHandle, faacDecFrameInfo*, | |||||
| unsigned char*, unsigned long))); | |||||
| dfaac(GetErrorMessage, (char* FAADAPI (*)(unsigned char))); | |||||
| #endif | #endif | ||||
| #undef dfacc | #undef dfacc | ||||
| @@ -243,8 +243,8 @@ static int faac_decode_init(AVCodecContext *avctx) | |||||
| } | } | ||||
| if (err) { | if (err) { | ||||
| dlclose(s->handle); | dlclose(s->handle); | ||||
| av_log(avctx, AV_LOG_ERROR, "FAAD library: cannot resolve %s in %s!\n", | |||||
| err, libfaadname); | |||||
| av_log(avctx, AV_LOG_ERROR, "FAAD library: cannot resolve %s in %s!\n", | |||||
| err, libfaadname); | |||||
| return -1; | return -1; | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -260,31 +260,31 @@ static int faac_decode_init(AVCodecContext *avctx) | |||||
| faac_cfg = s->faacDecGetCurrentConfiguration(s->faac_handle); | faac_cfg = s->faacDecGetCurrentConfiguration(s->faac_handle); | ||||
| if (faac_cfg) { | if (faac_cfg) { | ||||
| switch (avctx->bits_per_sample) { | |||||
| case 8: av_log(avctx, AV_LOG_ERROR, "FAADlib unsupported bps %d\n", avctx->bits_per_sample); break; | |||||
| default: | |||||
| case 16: | |||||
| switch (avctx->bits_per_sample) { | |||||
| case 8: av_log(avctx, AV_LOG_ERROR, "FAADlib unsupported bps %d\n", avctx->bits_per_sample); break; | |||||
| default: | |||||
| case 16: | |||||
| #ifdef FAAD2_VERSION | #ifdef FAAD2_VERSION | ||||
| faac_cfg->outputFormat = FAAD_FMT_16BIT; | |||||
| faac_cfg->outputFormat = FAAD_FMT_16BIT; | |||||
| #endif | #endif | ||||
| s->sample_size = 2; | |||||
| break; | |||||
| case 24: | |||||
| s->sample_size = 2; | |||||
| break; | |||||
| case 24: | |||||
| #ifdef FAAD2_VERSION | #ifdef FAAD2_VERSION | ||||
| faac_cfg->outputFormat = FAAD_FMT_24BIT; | |||||
| faac_cfg->outputFormat = FAAD_FMT_24BIT; | |||||
| #endif | #endif | ||||
| s->sample_size = 3; | |||||
| break; | |||||
| case 32: | |||||
| s->sample_size = 3; | |||||
| break; | |||||
| case 32: | |||||
| #ifdef FAAD2_VERSION | #ifdef FAAD2_VERSION | ||||
| faac_cfg->outputFormat = FAAD_FMT_32BIT; | |||||
| faac_cfg->outputFormat = FAAD_FMT_32BIT; | |||||
| #endif | #endif | ||||
| s->sample_size = 4; | |||||
| break; | |||||
| } | |||||
| s->sample_size = 4; | |||||
| break; | |||||
| } | |||||
| faac_cfg->defSampleRate = (!avctx->sample_rate) ? 44100 : avctx->sample_rate; | |||||
| faac_cfg->defObjectType = LC; | |||||
| faac_cfg->defSampleRate = (!avctx->sample_rate) ? 44100 : avctx->sample_rate; | |||||
| faac_cfg->defObjectType = LC; | |||||
| } | } | ||||
| s->faacDecSetConfiguration(s->faac_handle, faac_cfg); | s->faacDecSetConfiguration(s->faac_handle, faac_cfg); | ||||
| @@ -204,15 +204,15 @@ void ff_faandct248(DCTELEM * data) | |||||
| data[8*6 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1)); | data[8*6 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1)); | ||||
| tmp10 = tmp4 + tmp7; | tmp10 = tmp4 + tmp7; | ||||
| tmp11 = tmp5 + tmp6; | |||||
| tmp12 = tmp5 - tmp6; | |||||
| tmp13 = tmp4 - tmp7; | |||||
| tmp11 = tmp5 + tmp6; | |||||
| tmp12 = tmp5 - tmp6; | |||||
| tmp13 = tmp4 - tmp7; | |||||
| data[8*1 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11)); | |||||
| data[8*5 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11)); | |||||
| data[8*1 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11)); | |||||
| data[8*5 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11)); | |||||
| z1 = (tmp12 + tmp13)* A1; | |||||
| data[8*3 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1)); | |||||
| data[8*7 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1)); | |||||
| z1 = (tmp12 + tmp13)* A1; | |||||
| data[8*3 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1)); | |||||
| data[8*7 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1)); | |||||
| } | } | ||||
| } | } | ||||
| @@ -64,51 +64,51 @@ void init_fdct() | |||||
| void fdct(block) | void fdct(block) | ||||
| short *block; | short *block; | ||||
| { | { | ||||
| register int i, j; | |||||
| double s; | |||||
| double tmp[64]; | |||||
| register int i, j; | |||||
| double s; | |||||
| double tmp[64]; | |||||
| for(i = 0; i < 8; i++) | |||||
| for(j = 0; j < 8; j++) | |||||
| { | |||||
| s = 0.0; | |||||
| for(i = 0; i < 8; i++) | |||||
| for(j = 0; j < 8; j++) | |||||
| { | |||||
| s = 0.0; | |||||
| /* | /* | ||||
| * for(k = 0; k < 8; k++) | |||||
| * s += c[j][k] * block[8 * i + k]; | |||||
| * for(k = 0; k < 8; k++) | |||||
| * s += c[j][k] * block[8 * i + k]; | |||||
| */ | */ | ||||
| s += c[j][0] * block[8 * i + 0]; | |||||
| s += c[j][1] * block[8 * i + 1]; | |||||
| s += c[j][2] * block[8 * i + 2]; | |||||
| s += c[j][3] * block[8 * i + 3]; | |||||
| s += c[j][4] * block[8 * i + 4]; | |||||
| s += c[j][5] * block[8 * i + 5]; | |||||
| s += c[j][6] * block[8 * i + 6]; | |||||
| s += c[j][7] * block[8 * i + 7]; | |||||
| tmp[8 * i + j] = s; | |||||
| } | |||||
| for(j = 0; j < 8; j++) | |||||
| for(i = 0; i < 8; i++) | |||||
| { | |||||
| s = 0.0; | |||||
| s += c[j][0] * block[8 * i + 0]; | |||||
| s += c[j][1] * block[8 * i + 1]; | |||||
| s += c[j][2] * block[8 * i + 2]; | |||||
| s += c[j][3] * block[8 * i + 3]; | |||||
| s += c[j][4] * block[8 * i + 4]; | |||||
| s += c[j][5] * block[8 * i + 5]; | |||||
| s += c[j][6] * block[8 * i + 6]; | |||||
| s += c[j][7] * block[8 * i + 7]; | |||||
| tmp[8 * i + j] = s; | |||||
| } | |||||
| for(j = 0; j < 8; j++) | |||||
| for(i = 0; i < 8; i++) | |||||
| { | |||||
| s = 0.0; | |||||
| /* | /* | ||||
| * for(k = 0; k < 8; k++) | |||||
| * s += c[i][k] * tmp[8 * k + j]; | |||||
| * for(k = 0; k < 8; k++) | |||||
| * s += c[i][k] * tmp[8 * k + j]; | |||||
| */ | */ | ||||
| s += c[i][0] * tmp[8 * 0 + j]; | |||||
| s += c[i][1] * tmp[8 * 1 + j]; | |||||
| s += c[i][2] * tmp[8 * 2 + j]; | |||||
| s += c[i][3] * tmp[8 * 3 + j]; | |||||
| s += c[i][4] * tmp[8 * 4 + j]; | |||||
| s += c[i][5] * tmp[8 * 5 + j]; | |||||
| s += c[i][6] * tmp[8 * 6 + j]; | |||||
| s += c[i][7] * tmp[8 * 7 + j]; | |||||
| s*=8.0; | |||||
| block[8 * i + j] = (short)floor(s + 0.499999); | |||||
| s += c[i][0] * tmp[8 * 0 + j]; | |||||
| s += c[i][1] * tmp[8 * 1 + j]; | |||||
| s += c[i][2] * tmp[8 * 2 + j]; | |||||
| s += c[i][3] * tmp[8 * 3 + j]; | |||||
| s += c[i][4] * tmp[8 * 4 + j]; | |||||
| s += c[i][5] * tmp[8 * 5 + j]; | |||||
| s += c[i][6] * tmp[8 * 6 + j]; | |||||
| s += c[i][7] * tmp[8 * 7 + j]; | |||||
| s*=8.0; | |||||
| block[8 * i + j] = (short)floor(s + 0.499999); | |||||
| /* | /* | ||||
| * reason for adding 0.499999 instead of 0.5: | * reason for adding 0.499999 instead of 0.5: | ||||
| * s is quite often x.5 (at least for i and/or j = 0 or 4) | * s is quite often x.5 (at least for i and/or j = 0 or 4) | ||||
| @@ -149,8 +149,8 @@ int ff_fft_init(FFTContext *s, int nbits, int inverse) | |||||
| void ff_fft_calc_c(FFTContext *s, FFTComplex *z) | void ff_fft_calc_c(FFTContext *s, FFTComplex *z) | ||||
| { | { | ||||
| int ln = s->nbits; | int ln = s->nbits; | ||||
| int j, np, np2; | |||||
| int nblocks, nloops; | |||||
| int j, np, np2; | |||||
| int nblocks, nloops; | |||||
| register FFTComplex *p, *q; | register FFTComplex *p, *q; | ||||
| FFTComplex *exptab = s->exptab; | FFTComplex *exptab = s->exptab; | ||||
| int l; | int l; | ||||
| @@ -31,30 +31,30 @@ | |||||
| * instead of simply using 32bit integer arithmetic. | * instead of simply using 32bit integer arithmetic. | ||||
| */ | */ | ||||
| typedef struct Float11 { | typedef struct Float11 { | ||||
| int sign; /**< 1bit sign */ | |||||
| int exp; /**< 4bit exponent */ | |||||
| int mant; /**< 6bit mantissa */ | |||||
| int sign; /**< 1bit sign */ | |||||
| int exp; /**< 4bit exponent */ | |||||
| int mant; /**< 6bit mantissa */ | |||||
| } Float11; | } Float11; | ||||
| static inline Float11* i2f(int16_t i, Float11* f) | static inline Float11* i2f(int16_t i, Float11* f) | ||||
| { | { | ||||
| f->sign = (i < 0); | |||||
| if (f->sign) | |||||
| i = -i; | |||||
| f->exp = av_log2_16bit(i) + !!i; | |||||
| f->mant = i? (i<<6) >> f->exp : | |||||
| 1<<5; | |||||
| return f; | |||||
| f->sign = (i < 0); | |||||
| if (f->sign) | |||||
| i = -i; | |||||
| f->exp = av_log2_16bit(i) + !!i; | |||||
| f->mant = i? (i<<6) >> f->exp : | |||||
| 1<<5; | |||||
| return f; | |||||
| } | } | ||||
| static inline int16_t mult(Float11* f1, Float11* f2) | static inline int16_t mult(Float11* f1, Float11* f2) | ||||
| { | { | ||||
| int res, exp; | |||||
| int res, exp; | |||||
| exp = f1->exp + f2->exp; | |||||
| res = (((f1->mant * f2->mant) + 0x30) >> 4) << 7; | |||||
| res = exp > 26 ? res << (exp - 26) : res >> (26 - exp); | |||||
| return (f1->sign ^ f2->sign) ? -res : res; | |||||
| exp = f1->exp + f2->exp; | |||||
| res = (((f1->mant * f2->mant) + 0x30) >> 4) << 7; | |||||
| res = exp > 26 ? res << (exp - 26) : res >> (26 - exp); | |||||
| return (f1->sign ^ f2->sign) ? -res : res; | |||||
| } | } | ||||
| static inline int sgn(int value) | static inline int sgn(int value) | ||||
| @@ -63,32 +63,32 @@ static inline int sgn(int value) | |||||
| } | } | ||||
| typedef struct G726Tables { | typedef struct G726Tables { | ||||
| int bits; /**< bits per sample */ | |||||
| int* quant; /**< quantization table */ | |||||
| int* iquant; /**< inverse quantization table */ | |||||
| int* W; /**< special table #1 ;-) */ | |||||
| int* F; /**< special table #2 */ | |||||
| int bits; /**< bits per sample */ | |||||
| int* quant; /**< quantization table */ | |||||
| int* iquant; /**< inverse quantization table */ | |||||
| int* W; /**< special table #1 ;-) */ | |||||
| int* F; /**< special table #2 */ | |||||
| } G726Tables; | } G726Tables; | ||||
| typedef struct G726Context { | typedef struct G726Context { | ||||
| G726Tables* tbls; /**< static tables needed for computation */ | |||||
| Float11 sr[2]; /**< prev. reconstructed samples */ | |||||
| Float11 dq[6]; /**< prev. difference */ | |||||
| int a[2]; /**< second order predictor coeffs */ | |||||
| int b[6]; /**< sixth order predictor coeffs */ | |||||
| int pk[2]; /**< signs of prev. 2 sez + dq */ | |||||
| int ap; /**< scale factor control */ | |||||
| int yu; /**< fast scale factor */ | |||||
| int yl; /**< slow scale factor */ | |||||
| int dms; /**< short average magnitude of F[i] */ | |||||
| int dml; /**< long average magnitude of F[i] */ | |||||
| int td; /**< tone detect */ | |||||
| int se; /**< estimated signal for the next iteration */ | |||||
| int sez; /**< estimated second order prediction */ | |||||
| int y; /**< quantizer scaling factor for the next iteration */ | |||||
| G726Tables* tbls; /**< static tables needed for computation */ | |||||
| Float11 sr[2]; /**< prev. reconstructed samples */ | |||||
| Float11 dq[6]; /**< prev. difference */ | |||||
| int a[2]; /**< second order predictor coeffs */ | |||||
| int b[6]; /**< sixth order predictor coeffs */ | |||||
| int pk[2]; /**< signs of prev. 2 sez + dq */ | |||||
| int ap; /**< scale factor control */ | |||||
| int yu; /**< fast scale factor */ | |||||
| int yl; /**< slow scale factor */ | |||||
| int dms; /**< short average magnitude of F[i] */ | |||||
| int dml; /**< long average magnitude of F[i] */ | |||||
| int td; /**< tone detect */ | |||||
| int se; /**< estimated signal for the next iteration */ | |||||
| int sez; /**< estimated second order prediction */ | |||||
| int y; /**< quantizer scaling factor for the next iteration */ | |||||
| } G726Context; | } G726Context; | ||||
| static int quant_tbl16[] = /**< 16kbit/s 2bits per sample */ | static int quant_tbl16[] = /**< 16kbit/s 2bits per sample */ | ||||
| @@ -113,34 +113,34 @@ static int quant_tbl32[] = /**< 32kbit/s 4bits per sample | |||||
| { -125, 79, 177, 245, 299, 348, 399, INT_MAX }; | { -125, 79, 177, 245, 299, 348, 399, INT_MAX }; | ||||
| static int iquant_tbl32[] = | static int iquant_tbl32[] = | ||||
| { INT_MIN, 4, 135, 213, 273, 323, 373, 425, | { INT_MIN, 4, 135, 213, 273, 323, 373, 425, | ||||
| 425, 373, 323, 273, 213, 135, 4, INT_MIN }; | |||||
| 425, 373, 323, 273, 213, 135, 4, INT_MIN }; | |||||
| static int W_tbl32[] = | static int W_tbl32[] = | ||||
| { -12, 18, 41, 64, 112, 198, 355, 1122, | { -12, 18, 41, 64, 112, 198, 355, 1122, | ||||
| 1122, 355, 198, 112, 64, 41, 18, -12}; | |||||
| 1122, 355, 198, 112, 64, 41, 18, -12}; | |||||
| static int F_tbl32[] = | static int F_tbl32[] = | ||||
| { 0, 0, 0, 1, 1, 1, 3, 7, 7, 3, 1, 1, 1, 0, 0, 0 }; | { 0, 0, 0, 1, 1, 1, 3, 7, 7, 3, 1, 1, 1, 0, 0, 0 }; | ||||
| static int quant_tbl40[] = /**< 40kbit/s 5bits per sample */ | static int quant_tbl40[] = /**< 40kbit/s 5bits per sample */ | ||||
| { -122, -16, 67, 138, 197, 249, 297, 338, | { -122, -16, 67, 138, 197, 249, 297, 338, | ||||
| 377, 412, 444, 474, 501, 527, 552, INT_MAX }; | |||||
| 377, 412, 444, 474, 501, 527, 552, INT_MAX }; | |||||
| static int iquant_tbl40[] = | static int iquant_tbl40[] = | ||||
| { INT_MIN, -66, 28, 104, 169, 224, 274, 318, | { INT_MIN, -66, 28, 104, 169, 224, 274, 318, | ||||
| 358, 395, 429, 459, 488, 514, 539, 566, | |||||
| 566, 539, 514, 488, 459, 429, 395, 358, | |||||
| 318, 274, 224, 169, 104, 28, -66, INT_MIN }; | |||||
| 358, 395, 429, 459, 488, 514, 539, 566, | |||||
| 566, 539, 514, 488, 459, 429, 395, 358, | |||||
| 318, 274, 224, 169, 104, 28, -66, INT_MIN }; | |||||
| static int W_tbl40[] = | static int W_tbl40[] = | ||||
| { 14, 14, 24, 39, 40, 41, 58, 100, | { 14, 14, 24, 39, 40, 41, 58, 100, | ||||
| 141, 179, 219, 280, 358, 440, 529, 696, | |||||
| 696, 529, 440, 358, 280, 219, 179, 141, | |||||
| 100, 58, 41, 40, 39, 24, 14, 14 }; | |||||
| 141, 179, 219, 280, 358, 440, 529, 696, | |||||
| 696, 529, 440, 358, 280, 219, 179, 141, | |||||
| 100, 58, 41, 40, 39, 24, 14, 14 }; | |||||
| static int F_tbl40[] = | static int F_tbl40[] = | ||||
| { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 6, | { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 6, | ||||
| 6, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; | |||||
| 6, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; | |||||
| static G726Tables G726Tables_pool[] = | static G726Tables G726Tables_pool[] = | ||||
| {{ 2, quant_tbl16, iquant_tbl16, W_tbl16, F_tbl16 }, | {{ 2, quant_tbl16, iquant_tbl16, W_tbl16, F_tbl16 }, | ||||
| { 3, quant_tbl24, iquant_tbl24, W_tbl24, F_tbl24 }, | |||||
| { 4, quant_tbl32, iquant_tbl32, W_tbl32, F_tbl32 }, | |||||
| { 3, quant_tbl24, iquant_tbl24, W_tbl24, F_tbl24 }, | |||||
| { 4, quant_tbl32, iquant_tbl32, W_tbl32, F_tbl32 }, | |||||
| { 5, quant_tbl40, iquant_tbl40, W_tbl40, F_tbl40 }}; | { 5, quant_tbl40, iquant_tbl40, W_tbl40, F_tbl40 }}; | ||||
| @@ -207,20 +207,20 @@ static inline int16_t g726_iterate(G726Context* c, int16_t I) | |||||
| dq0 = dq ? sgn(dq) : 0; | dq0 = dq ? sgn(dq) : 0; | ||||
| if (tr) { | if (tr) { | ||||
| c->a[0] = 0; | c->a[0] = 0; | ||||
| c->a[1] = 0; | |||||
| c->a[1] = 0; | |||||
| for (i=0; i<6; i++) | for (i=0; i<6; i++) | ||||
| c->b[i] = 0; | |||||
| c->b[i] = 0; | |||||
| } else { | } else { | ||||
| /* This is a bit crazy, but it really is +255 not +256 */ | |||||
| fa1 = clip((-c->a[0]*c->pk[0]*pk0)>>5, -256, 255); | |||||
| /* This is a bit crazy, but it really is +255 not +256 */ | |||||
| fa1 = clip((-c->a[0]*c->pk[0]*pk0)>>5, -256, 255); | |||||
| c->a[1] += 128*pk0*c->pk[1] + fa1 - (c->a[1]>>7); | |||||
| c->a[1] = clip(c->a[1], -12288, 12288); | |||||
| c->a[1] += 128*pk0*c->pk[1] + fa1 - (c->a[1]>>7); | |||||
| c->a[1] = clip(c->a[1], -12288, 12288); | |||||
| c->a[0] += 64*3*pk0*c->pk[0] - (c->a[0] >> 8); | c->a[0] += 64*3*pk0*c->pk[0] - (c->a[0] >> 8); | ||||
| c->a[0] = clip(c->a[0], -(15360 - c->a[1]), 15360 - c->a[1]); | |||||
| c->a[0] = clip(c->a[0], -(15360 - c->a[1]), 15360 - c->a[1]); | |||||
| for (i=0; i<6; i++) | for (i=0; i<6; i++) | ||||
| c->b[i] += 128*dq0*sgn(-c->dq[i].sign) - (c->b[i]>>8); | |||||
| c->b[i] += 128*dq0*sgn(-c->dq[i].sign) - (c->b[i]>>8); | |||||
| } | } | ||||
| /* Update Dq and Sr and Pk */ | /* Update Dq and Sr and Pk */ | ||||
| @@ -323,13 +323,13 @@ static int g726_init(AVCodecContext * avctx) | |||||
| if (avctx->channels != 1 || | if (avctx->channels != 1 || | ||||
| (avctx->bit_rate != 16000 && avctx->bit_rate != 24000 && | (avctx->bit_rate != 16000 && avctx->bit_rate != 24000 && | ||||
| avctx->bit_rate != 32000 && avctx->bit_rate != 40000)) { | |||||
| avctx->bit_rate != 32000 && avctx->bit_rate != 40000)) { | |||||
| av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n"); | av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n"); | ||||
| return -1; | |||||
| return -1; | |||||
| } | } | ||||
| if (avctx->sample_rate != 8000 && avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL) { | if (avctx->sample_rate != 8000 && avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL) { | ||||
| av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n"); | av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n"); | ||||
| return -1; | |||||
| return -1; | |||||
| } | } | ||||
| g726_reset(&c->c, avctx->bit_rate); | g726_reset(&c->c, avctx->bit_rate); | ||||
| c->code_size = c->c.tbls->bits; | c->code_size = c->c.tbls->bits; | ||||
| @@ -384,12 +384,12 @@ static int g726_decode_frame(AVCodecContext *avctx, | |||||
| init_get_bits(&gb, buf, buf_size * 8); | init_get_bits(&gb, buf, buf_size * 8); | ||||
| if (c->bits_left) { | if (c->bits_left) { | ||||
| int s = c->code_size - c->bits_left;; | int s = c->code_size - c->bits_left;; | ||||
| code = (c->bit_buffer << s) | get_bits(&gb, s); | |||||
| *samples++ = g726_decode(&c->c, code & mask); | |||||
| code = (c->bit_buffer << s) | get_bits(&gb, s); | |||||
| *samples++ = g726_decode(&c->c, code & mask); | |||||
| } | } | ||||
| while (get_bits_count(&gb) + c->code_size <= buf_size*8) | while (get_bits_count(&gb) + c->code_size <= buf_size*8) | ||||
| *samples++ = g726_decode(&c->c, get_bits(&gb, c->code_size) & mask); | |||||
| *samples++ = g726_decode(&c->c, get_bits(&gb, c->code_size) & mask); | |||||
| c->bits_left = buf_size*8 - get_bits_count(&gb); | c->bits_left = buf_size*8 - get_bits_count(&gb); | ||||
| c->bit_buffer = get_bits(&gb, c->bits_left); | c->bit_buffer = get_bits(&gb, c->bits_left); | ||||
| @@ -288,7 +288,7 @@ static inline int get_sr_golomb_flac(GetBitContext *gb, int k, int limit, int es | |||||
| * read unsigned golomb rice code (shorten). | * read unsigned golomb rice code (shorten). | ||||
| */ | */ | ||||
| static inline unsigned int get_ur_golomb_shorten(GetBitContext *gb, int k){ | static inline unsigned int get_ur_golomb_shorten(GetBitContext *gb, int k){ | ||||
| return get_ur_golomb_jpegls(gb, k, INT_MAX, 0); | |||||
| return get_ur_golomb_jpegls(gb, k, INT_MAX, 0); | |||||
| } | } | ||||
| /** | /** | ||||
| @@ -395,7 +395,7 @@ static inline void set_te_golomb(PutBitContext *pb, int i, int range){ | |||||
| */ | */ | ||||
| static inline void set_se_golomb(PutBitContext *pb, int i){ | static inline void set_se_golomb(PutBitContext *pb, int i){ | ||||
| // if (i>32767 || i<-32767) | // if (i>32767 || i<-32767) | ||||
| // av_log(NULL,AV_LOG_ERROR,"value out of range %d\n", i); | |||||
| // av_log(NULL,AV_LOG_ERROR,"value out of range %d\n", i); | |||||
| #if 0 | #if 0 | ||||
| if(i<=0) i= -2*i; | if(i<=0) i= -2*i; | ||||
| else i= 2*i-1; | else i= 2*i-1; | ||||
| @@ -231,11 +231,11 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) | |||||
| (coded_frame_rate_base * (int64_t)s->avctx->time_base.den); | (coded_frame_rate_base * (int64_t)s->avctx->time_base.den); | ||||
| put_bits(&s->pb, 8, temp_ref & 0xff); /* TemporalReference */ | put_bits(&s->pb, 8, temp_ref & 0xff); /* TemporalReference */ | ||||
| put_bits(&s->pb, 1, 1); /* marker */ | |||||
| put_bits(&s->pb, 1, 0); /* h263 id */ | |||||
| put_bits(&s->pb, 1, 0); /* split screen off */ | |||||
| put_bits(&s->pb, 1, 0); /* camera off */ | |||||
| put_bits(&s->pb, 1, 0); /* freeze picture release off */ | |||||
| put_bits(&s->pb, 1, 1); /* marker */ | |||||
| put_bits(&s->pb, 1, 0); /* h263 id */ | |||||
| put_bits(&s->pb, 1, 0); /* split screen off */ | |||||
| put_bits(&s->pb, 1, 0); /* camera off */ | |||||
| put_bits(&s->pb, 1, 0); /* freeze picture release off */ | |||||
| format = h263_get_picture_format(s->width, s->height); | format = h263_get_picture_format(s->width, s->height); | ||||
| if (!s->h263_plus) { | if (!s->h263_plus) { | ||||
| @@ -245,12 +245,12 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) | |||||
| /* By now UMV IS DISABLED ON H.263v1, since the restrictions | /* By now UMV IS DISABLED ON H.263v1, since the restrictions | ||||
| of H.263v1 UMV implies to check the predicted MV after | of H.263v1 UMV implies to check the predicted MV after | ||||
| calculation of the current MB to see if we're on the limits */ | calculation of the current MB to see if we're on the limits */ | ||||
| put_bits(&s->pb, 1, 0); /* Unrestricted Motion Vector: off */ | |||||
| put_bits(&s->pb, 1, 0); /* SAC: off */ | |||||
| put_bits(&s->pb, 1, s->obmc); /* Advanced Prediction */ | |||||
| put_bits(&s->pb, 1, 0); /* only I/P frames, no PB frame */ | |||||
| put_bits(&s->pb, 1, 0); /* Unrestricted Motion Vector: off */ | |||||
| put_bits(&s->pb, 1, 0); /* SAC: off */ | |||||
| put_bits(&s->pb, 1, s->obmc); /* Advanced Prediction */ | |||||
| put_bits(&s->pb, 1, 0); /* only I/P frames, no PB frame */ | |||||
| put_bits(&s->pb, 5, s->qscale); | put_bits(&s->pb, 5, s->qscale); | ||||
| put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */ | |||||
| put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */ | |||||
| } else { | } else { | ||||
| int ufep=1; | int ufep=1; | ||||
| /* H.263v2 */ | /* H.263v2 */ | ||||
| @@ -286,9 +286,9 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) | |||||
| put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */ | put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */ | ||||
| /* This should be here if PLUSPTYPE */ | /* This should be here if PLUSPTYPE */ | ||||
| put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */ | |||||
| put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */ | |||||
| if (format == 7) { | |||||
| if (format == 7) { | |||||
| /* Custom Picture Format (CPFMT) */ | /* Custom Picture Format (CPFMT) */ | ||||
| aspect_to_info(s, s->avctx->sample_aspect_ratio); | aspect_to_info(s, s->avctx->sample_aspect_ratio); | ||||
| @@ -299,7 +299,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) | |||||
| if (s->aspect_ratio_info == FF_ASPECT_EXTENDED){ | if (s->aspect_ratio_info == FF_ASPECT_EXTENDED){ | ||||
| put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.num); | put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.num); | ||||
| put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den); | put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den); | ||||
| } | |||||
| } | |||||
| } | } | ||||
| if(s->custom_pcf){ | if(s->custom_pcf){ | ||||
| if(ufep){ | if(ufep){ | ||||
| @@ -320,7 +320,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) | |||||
| put_bits(&s->pb, 5, s->qscale); | put_bits(&s->pb, 5, s->qscale); | ||||
| } | } | ||||
| put_bits(&s->pb, 1, 0); /* no PEI */ | |||||
| put_bits(&s->pb, 1, 0); /* no PEI */ | |||||
| if(s->h263_slice_structured){ | if(s->h263_slice_structured){ | ||||
| put_bits(&s->pb, 1, 1); | put_bits(&s->pb, 1, 1); | ||||
| @@ -823,8 +823,8 @@ static inline void mpeg4_encode_blocks(MpegEncContext * s, DCTELEM block[6][64], | |||||
| } | } | ||||
| void mpeg4_encode_mb(MpegEncContext * s, | void mpeg4_encode_mb(MpegEncContext * s, | ||||
| DCTELEM block[6][64], | |||||
| int motion_x, int motion_y) | |||||
| DCTELEM block[6][64], | |||||
| int motion_x, int motion_y) | |||||
| { | { | ||||
| int cbpc, cbpy, pred_x, pred_y; | int cbpc, cbpy, pred_x, pred_y; | ||||
| PutBitContext * const pb2 = s->data_partitioning ? &s->pb2 : &s->pb; | PutBitContext * const pb2 = s->data_partitioning ? &s->pb2 : &s->pb; | ||||
| @@ -884,7 +884,7 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||||
| return; | return; | ||||
| } | } | ||||
| put_bits(&s->pb, 1, 0); /* mb coded modb1=0 */ | |||||
| put_bits(&s->pb, 1, 0); /* mb coded modb1=0 */ | |||||
| put_bits(&s->pb, 1, cbp ? 0 : 1); /* modb2 */ //FIXME merge | put_bits(&s->pb, 1, cbp ? 0 : 1); /* modb2 */ //FIXME merge | ||||
| put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we don't need it :) | put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we don't need it :) | ||||
| if(cbp) put_bits(&s->pb, 6, cbp); | if(cbp) put_bits(&s->pb, 6, cbp); | ||||
| @@ -998,7 +998,7 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||||
| if(pic==NULL || pic->pict_type!=B_TYPE) break; | if(pic==NULL || pic->pict_type!=B_TYPE) break; | ||||
| b_pic= pic->data[0] + offset + 16; //FIXME +16 | b_pic= pic->data[0] + offset + 16; //FIXME +16 | ||||
| diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16); | |||||
| diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16); | |||||
| if(diff>s->qscale*70){ //FIXME check that 70 is optimal | if(diff>s->qscale*70){ //FIXME check that 70 is optimal | ||||
| s->mb_skipped=0; | s->mb_skipped=0; | ||||
| break; | break; | ||||
| @@ -1021,7 +1021,7 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||||
| } | } | ||||
| } | } | ||||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||||
| cbpc = cbp & 3; | cbpc = cbp & 3; | ||||
| cbpy = cbp >> 2; | cbpy = cbp >> 2; | ||||
| cbpy ^= 0xf; | cbpy ^= 0xf; | ||||
| @@ -1121,7 +1121,7 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||||
| int dc_diff[6]; //dc values with the dc prediction subtracted | int dc_diff[6]; //dc values with the dc prediction subtracted | ||||
| int dir[6]; //prediction direction | int dir[6]; //prediction direction | ||||
| int zigzag_last_index[6]; | int zigzag_last_index[6]; | ||||
| uint8_t *scan_table[6]; | |||||
| uint8_t *scan_table[6]; | |||||
| int i; | int i; | ||||
| for(i=0; i<6; i++){ | for(i=0; i<6; i++){ | ||||
| @@ -1152,7 +1152,7 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||||
| intra_MCBPC_code[cbpc]); | intra_MCBPC_code[cbpc]); | ||||
| } else { | } else { | ||||
| if(s->dquant) cbpc+=8; | if(s->dquant) cbpc+=8; | ||||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||||
| put_bits(&s->pb, | put_bits(&s->pb, | ||||
| inter_MCBPC_bits[cbpc + 4], | inter_MCBPC_bits[cbpc + 4], | ||||
| inter_MCBPC_code[cbpc + 4]); | inter_MCBPC_code[cbpc + 4]); | ||||
| @@ -1185,8 +1185,8 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||||
| } | } | ||||
| void h263_encode_mb(MpegEncContext * s, | void h263_encode_mb(MpegEncContext * s, | ||||
| DCTELEM block[6][64], | |||||
| int motion_x, int motion_y) | |||||
| DCTELEM block[6][64], | |||||
| int motion_x, int motion_y) | |||||
| { | { | ||||
| int cbpc, cbpy, i, cbp, pred_x, pred_y; | int cbpc, cbpy, i, cbp, pred_x, pred_y; | ||||
| int16_t pred_dc; | int16_t pred_dc; | ||||
| @@ -1211,7 +1211,7 @@ void h263_encode_mb(MpegEncContext * s, | |||||
| return; | return; | ||||
| } | } | ||||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||||
| cbpc = cbp & 3; | cbpc = cbp & 3; | ||||
| cbpy = cbp >> 2; | cbpy = cbp >> 2; | ||||
| @@ -1346,14 +1346,14 @@ void h263_encode_mb(MpegEncContext * s, | |||||
| intra_MCBPC_code[cbpc]); | intra_MCBPC_code[cbpc]); | ||||
| } else { | } else { | ||||
| if(s->dquant) cbpc+=8; | if(s->dquant) cbpc+=8; | ||||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||||
| put_bits(&s->pb, | put_bits(&s->pb, | ||||
| inter_MCBPC_bits[cbpc + 4], | inter_MCBPC_bits[cbpc + 4], | ||||
| inter_MCBPC_code[cbpc + 4]); | inter_MCBPC_code[cbpc + 4]); | ||||
| } | } | ||||
| if (s->h263_aic) { | if (s->h263_aic) { | ||||
| /* XXX: currently, we do not try to use ac prediction */ | /* XXX: currently, we do not try to use ac prediction */ | ||||
| put_bits(&s->pb, 1, 0); /* no AC prediction */ | |||||
| put_bits(&s->pb, 1, 0); /* no AC prediction */ | |||||
| } | } | ||||
| cbpy = cbp >> 2; | cbpy = cbp >> 2; | ||||
| put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); | put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); | ||||
| @@ -1796,7 +1796,7 @@ static void init_uni_dc_tab(void) | |||||
| v = abs(level); | v = abs(level); | ||||
| while (v) { | while (v) { | ||||
| v >>= 1; | v >>= 1; | ||||
| size++; | |||||
| size++; | |||||
| } | } | ||||
| if (level < 0) | if (level < 0) | ||||
| @@ -2318,14 +2318,14 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n | |||||
| put_bits(&s->pb, 16, 0); | put_bits(&s->pb, 16, 0); | ||||
| put_bits(&s->pb, 16, 0x120 + vol_number); /* video obj layer */ | put_bits(&s->pb, 16, 0x120 + vol_number); /* video obj layer */ | ||||
| put_bits(&s->pb, 1, 0); /* random access vol */ | |||||
| put_bits(&s->pb, 8, s->vo_type); /* video obj type indication */ | |||||
| put_bits(&s->pb, 1, 0); /* random access vol */ | |||||
| put_bits(&s->pb, 8, s->vo_type); /* video obj type indication */ | |||||
| if(s->workaround_bugs & FF_BUG_MS) { | if(s->workaround_bugs & FF_BUG_MS) { | ||||
| put_bits(&s->pb, 1, 0); /* is obj layer id= no */ | |||||
| put_bits(&s->pb, 1, 0); /* is obj layer id= no */ | |||||
| } else { | } else { | ||||
| put_bits(&s->pb, 1, 1); /* is obj layer id= yes */ | |||||
| put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */ | |||||
| put_bits(&s->pb, 3, 1); /* is obj layer priority */ | |||||
| put_bits(&s->pb, 1, 1); /* is obj layer id= yes */ | |||||
| put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */ | |||||
| put_bits(&s->pb, 3, 1); /* is obj layer priority */ | |||||
| } | } | ||||
| aspect_to_info(s, s->avctx->sample_aspect_ratio); | aspect_to_info(s, s->avctx->sample_aspect_ratio); | ||||
| @@ -2337,37 +2337,37 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n | |||||
| } | } | ||||
| if(s->workaround_bugs & FF_BUG_MS) { // | if(s->workaround_bugs & FF_BUG_MS) { // | ||||
| put_bits(&s->pb, 1, 0); /* vol control parameters= no @@@ */ | |||||
| put_bits(&s->pb, 1, 0); /* vol control parameters= no @@@ */ | |||||
| } else { | } else { | ||||
| put_bits(&s->pb, 1, 1); /* vol control parameters= yes */ | |||||
| put_bits(&s->pb, 2, 1); /* chroma format YUV 420/YV12 */ | |||||
| put_bits(&s->pb, 1, 1); /* vol control parameters= yes */ | |||||
| put_bits(&s->pb, 2, 1); /* chroma format YUV 420/YV12 */ | |||||
| put_bits(&s->pb, 1, s->low_delay); | put_bits(&s->pb, 1, s->low_delay); | ||||
| put_bits(&s->pb, 1, 0); /* vbv parameters= no */ | |||||
| put_bits(&s->pb, 1, 0); /* vbv parameters= no */ | |||||
| } | } | ||||
| put_bits(&s->pb, 2, RECT_SHAPE); /* vol shape= rectangle */ | |||||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||||
| put_bits(&s->pb, 2, RECT_SHAPE); /* vol shape= rectangle */ | |||||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||||
| put_bits(&s->pb, 16, s->avctx->time_base.den); | put_bits(&s->pb, 16, s->avctx->time_base.den); | ||||
| if (s->time_increment_bits < 1) | if (s->time_increment_bits < 1) | ||||
| s->time_increment_bits = 1; | s->time_increment_bits = 1; | ||||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||||
| put_bits(&s->pb, 1, 0); /* fixed vop rate=no */ | |||||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||||
| put_bits(&s->pb, 13, s->width); /* vol width */ | |||||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||||
| put_bits(&s->pb, 13, s->height); /* vol height */ | |||||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||||
| put_bits(&s->pb, 1, 0); /* fixed vop rate=no */ | |||||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||||
| put_bits(&s->pb, 13, s->width); /* vol width */ | |||||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||||
| put_bits(&s->pb, 13, s->height); /* vol height */ | |||||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||||
| put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1); | put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1); | ||||
| put_bits(&s->pb, 1, 1); /* obmc disable */ | |||||
| put_bits(&s->pb, 1, 1); /* obmc disable */ | |||||
| if (vo_ver_id == 1) { | if (vo_ver_id == 1) { | ||||
| put_bits(&s->pb, 1, s->vol_sprite_usage); /* sprite enable */ | |||||
| put_bits(&s->pb, 1, s->vol_sprite_usage); /* sprite enable */ | |||||
| }else{ | }else{ | ||||
| put_bits(&s->pb, 2, s->vol_sprite_usage); /* sprite enable */ | |||||
| put_bits(&s->pb, 2, s->vol_sprite_usage); /* sprite enable */ | |||||
| } | } | ||||
| put_bits(&s->pb, 1, 0); /* not 8 bit == false */ | |||||
| put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/ | |||||
| put_bits(&s->pb, 1, 0); /* not 8 bit == false */ | |||||
| put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/ | |||||
| if(s->mpeg_quant){ | if(s->mpeg_quant){ | ||||
| ff_write_quant_matrix(&s->pb, s->avctx->intra_matrix); | ff_write_quant_matrix(&s->pb, s->avctx->intra_matrix); | ||||
| @@ -2376,27 +2376,27 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n | |||||
| if (vo_ver_id != 1) | if (vo_ver_id != 1) | ||||
| put_bits(&s->pb, 1, s->quarter_sample); | put_bits(&s->pb, 1, s->quarter_sample); | ||||
| put_bits(&s->pb, 1, 1); /* complexity estimation disable */ | |||||
| put_bits(&s->pb, 1, 1); /* complexity estimation disable */ | |||||
| s->resync_marker= s->rtp_mode; | s->resync_marker= s->rtp_mode; | ||||
| put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */ | put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */ | ||||
| put_bits(&s->pb, 1, s->data_partitioning ? 1 : 0); | put_bits(&s->pb, 1, s->data_partitioning ? 1 : 0); | ||||
| if(s->data_partitioning){ | if(s->data_partitioning){ | ||||
| put_bits(&s->pb, 1, 0); /* no rvlc */ | |||||
| put_bits(&s->pb, 1, 0); /* no rvlc */ | |||||
| } | } | ||||
| if (vo_ver_id != 1){ | if (vo_ver_id != 1){ | ||||
| put_bits(&s->pb, 1, 0); /* newpred */ | |||||
| put_bits(&s->pb, 1, 0); /* reduced res vop */ | |||||
| put_bits(&s->pb, 1, 0); /* newpred */ | |||||
| put_bits(&s->pb, 1, 0); /* reduced res vop */ | |||||
| } | } | ||||
| put_bits(&s->pb, 1, 0); /* scalability */ | |||||
| put_bits(&s->pb, 1, 0); /* scalability */ | |||||
| ff_mpeg4_stuffing(&s->pb); | ff_mpeg4_stuffing(&s->pb); | ||||
| /* user data */ | /* user data */ | ||||
| if(!(s->flags & CODEC_FLAG_BITEXACT)){ | if(!(s->flags & CODEC_FLAG_BITEXACT)){ | ||||
| put_bits(&s->pb, 16, 0); | put_bits(&s->pb, 16, 0); | ||||
| put_bits(&s->pb, 16, 0x1B2); /* user_data */ | |||||
| ff_put_string(&s->pb, LIBAVCODEC_IDENT, 0); | |||||
| put_bits(&s->pb, 16, 0x1B2); /* user_data */ | |||||
| ff_put_string(&s->pb, LIBAVCODEC_IDENT, 0); | |||||
| } | } | ||||
| } | } | ||||
| @@ -2421,9 +2421,9 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) | |||||
| //printf("num:%d rate:%d base:%d\n", s->picture_number, s->time_base.den, FRAME_RATE_BASE); | //printf("num:%d rate:%d base:%d\n", s->picture_number, s->time_base.den, FRAME_RATE_BASE); | ||||
| put_bits(&s->pb, 16, 0); /* vop header */ | |||||
| put_bits(&s->pb, 16, VOP_STARTCODE); /* vop header */ | |||||
| put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */ | |||||
| put_bits(&s->pb, 16, 0); /* vop header */ | |||||
| put_bits(&s->pb, 16, VOP_STARTCODE); /* vop header */ | |||||
| put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */ | |||||
| assert(s->time>=0); | assert(s->time>=0); | ||||
| time_div= s->time/s->avctx->time_base.den; | time_div= s->time/s->avctx->time_base.den; | ||||
| @@ -2435,15 +2435,15 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) | |||||
| put_bits(&s->pb, 1, 0); | put_bits(&s->pb, 1, 0); | ||||
| put_bits(&s->pb, 1, 1); /* marker */ | |||||
| put_bits(&s->pb, s->time_increment_bits, time_mod); /* time increment */ | |||||
| put_bits(&s->pb, 1, 1); /* marker */ | |||||
| put_bits(&s->pb, 1, 1); /* vop coded */ | |||||
| put_bits(&s->pb, 1, 1); /* marker */ | |||||
| put_bits(&s->pb, s->time_increment_bits, time_mod); /* time increment */ | |||||
| put_bits(&s->pb, 1, 1); /* marker */ | |||||
| put_bits(&s->pb, 1, 1); /* vop coded */ | |||||
| if ( s->pict_type == P_TYPE | if ( s->pict_type == P_TYPE | ||||
| || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) { | || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) { | ||||
| put_bits(&s->pb, 1, s->no_rounding); /* rounding type */ | |||||
| put_bits(&s->pb, 1, s->no_rounding); /* rounding type */ | |||||
| } | } | ||||
| put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */ | |||||
| put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */ | |||||
| if(!s->progressive_sequence){ | if(!s->progressive_sequence){ | ||||
| put_bits(&s->pb, 1, s->current_picture_ptr->top_field_first); | put_bits(&s->pb, 1, s->current_picture_ptr->top_field_first); | ||||
| put_bits(&s->pb, 1, s->alternate_scan); | put_bits(&s->pb, 1, s->alternate_scan); | ||||
| @@ -2453,9 +2453,9 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) | |||||
| put_bits(&s->pb, 5, s->qscale); | put_bits(&s->pb, 5, s->qscale); | ||||
| if (s->pict_type != I_TYPE) | if (s->pict_type != I_TYPE) | ||||
| put_bits(&s->pb, 3, s->f_code); /* fcode_for */ | |||||
| put_bits(&s->pb, 3, s->f_code); /* fcode_for */ | |||||
| if (s->pict_type == B_TYPE) | if (s->pict_type == B_TYPE) | ||||
| put_bits(&s->pb, 3, s->b_code); /* fcode_back */ | |||||
| put_bits(&s->pb, 3, s->b_code); /* fcode_back */ | |||||
| // printf("****frame %d\n", picture_number); | // printf("****frame %d\n", picture_number); | ||||
| } | } | ||||
| @@ -2492,9 +2492,9 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *di | |||||
| /* find prediction */ | /* find prediction */ | ||||
| if (n < 4) { | if (n < 4) { | ||||
| scale = s->y_dc_scale; | |||||
| scale = s->y_dc_scale; | |||||
| } else { | } else { | ||||
| scale = s->c_dc_scale; | |||||
| scale = s->c_dc_scale; | |||||
| } | } | ||||
| if(IS_3IV1) | if(IS_3IV1) | ||||
| scale= 8; | scale= 8; | ||||
| @@ -2520,10 +2520,10 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *di | |||||
| } | } | ||||
| if (abs(a - b) < abs(b - c)) { | if (abs(a - b) < abs(b - c)) { | ||||
| pred = c; | |||||
| pred = c; | |||||
| *dir_ptr = 1; /* top */ | *dir_ptr = 1; /* top */ | ||||
| } else { | } else { | ||||
| pred = a; | |||||
| pred = a; | |||||
| *dir_ptr = 0; /* left */ | *dir_ptr = 0; /* left */ | ||||
| } | } | ||||
| /* we assume pred is positive */ | /* we assume pred is positive */ | ||||
| @@ -2629,11 +2629,11 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n) | |||||
| // if(level<-255 || level>255) printf("dc overflow\n"); | // if(level<-255 || level>255) printf("dc overflow\n"); | ||||
| level+=256; | level+=256; | ||||
| if (n < 4) { | if (n < 4) { | ||||
| /* luminance */ | |||||
| put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]); | |||||
| /* luminance */ | |||||
| put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]); | |||||
| } else { | } else { | ||||
| /* chrominance */ | |||||
| put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]); | |||||
| /* chrominance */ | |||||
| put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]); | |||||
| } | } | ||||
| #else | #else | ||||
| int size, v; | int size, v; | ||||
| @@ -2641,25 +2641,25 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n) | |||||
| size = 0; | size = 0; | ||||
| v = abs(level); | v = abs(level); | ||||
| while (v) { | while (v) { | ||||
| v >>= 1; | |||||
| size++; | |||||
| v >>= 1; | |||||
| size++; | |||||
| } | } | ||||
| if (n < 4) { | if (n < 4) { | ||||
| /* luminance */ | |||||
| put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]); | |||||
| /* luminance */ | |||||
| put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]); | |||||
| } else { | } else { | ||||
| /* chrominance */ | |||||
| put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]); | |||||
| /* chrominance */ | |||||
| put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]); | |||||
| } | } | ||||
| /* encode remaining bits */ | /* encode remaining bits */ | ||||
| if (size > 0) { | if (size > 0) { | ||||
| if (level < 0) | |||||
| level = (-level) ^ ((1 << size) - 1); | |||||
| put_bits(&s->pb, size, level); | |||||
| if (size > 8) | |||||
| put_bits(&s->pb, 1, 1); | |||||
| if (level < 0) | |||||
| level = (-level) ^ ((1 << size) - 1); | |||||
| put_bits(&s->pb, size, level); | |||||
| if (size > 8) | |||||
| put_bits(&s->pb, 1, 1); | |||||
| } | } | ||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -2689,16 +2689,16 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n | |||||
| const int last_index = s->block_last_index[n]; | const int last_index = s->block_last_index[n]; | ||||
| if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away | if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away | ||||
| /* mpeg4 based DC predictor */ | |||||
| mpeg4_encode_dc(dc_pb, intra_dc, n); | |||||
| /* mpeg4 based DC predictor */ | |||||
| mpeg4_encode_dc(dc_pb, intra_dc, n); | |||||
| if(last_index<1) return; | if(last_index<1) return; | ||||
| i = 1; | |||||
| i = 1; | |||||
| rl = &rl_intra; | rl = &rl_intra; | ||||
| bits_tab= uni_mpeg4_intra_rl_bits; | bits_tab= uni_mpeg4_intra_rl_bits; | ||||
| len_tab = uni_mpeg4_intra_rl_len; | len_tab = uni_mpeg4_intra_rl_len; | ||||
| } else { | } else { | ||||
| if(last_index<0) return; | if(last_index<0) return; | ||||
| i = 0; | |||||
| i = 0; | |||||
| rl = &rl_inter; | rl = &rl_inter; | ||||
| bits_tab= uni_mpeg4_inter_rl_bits; | bits_tab= uni_mpeg4_inter_rl_bits; | ||||
| len_tab = uni_mpeg4_inter_rl_len; | len_tab = uni_mpeg4_inter_rl_len; | ||||
| @@ -2708,9 +2708,9 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n | |||||
| last_non_zero = i - 1; | last_non_zero = i - 1; | ||||
| #if 1 | #if 1 | ||||
| for (; i < last_index; i++) { | for (; i < last_index; i++) { | ||||
| int level = block[ scan_table[i] ]; | |||||
| if (level) { | |||||
| int run = i - last_non_zero - 1; | |||||
| int level = block[ scan_table[i] ]; | |||||
| if (level) { | |||||
| int run = i - last_non_zero - 1; | |||||
| level+=64; | level+=64; | ||||
| if((level&(~127)) == 0){ | if((level&(~127)) == 0){ | ||||
| const int index= UNI_MPEG4_ENC_INDEX(0, run, level); | const int index= UNI_MPEG4_ENC_INDEX(0, run, level); | ||||
| @@ -2718,11 +2718,11 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n | |||||
| }else{ //ESC3 | }else{ //ESC3 | ||||
| put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(0<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1); | put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(0<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1); | ||||
| } | } | ||||
| last_non_zero = i; | |||||
| } | |||||
| last_non_zero = i; | |||||
| } | |||||
| } | } | ||||
| /*if(i<=last_index)*/{ | /*if(i<=last_index)*/{ | ||||
| int level = block[ scan_table[i] ]; | |||||
| int level = block[ scan_table[i] ]; | |||||
| int run = i - last_non_zero - 1; | int run = i - last_non_zero - 1; | ||||
| level+=64; | level+=64; | ||||
| if((level&(~127)) == 0){ | if((level&(~127)) == 0){ | ||||
| @@ -2734,17 +2734,17 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n | |||||
| } | } | ||||
| #else | #else | ||||
| for (; i <= last_index; i++) { | for (; i <= last_index; i++) { | ||||
| const int slevel = block[ scan_table[i] ]; | |||||
| if (slevel) { | |||||
| const int slevel = block[ scan_table[i] ]; | |||||
| if (slevel) { | |||||
| int level; | int level; | ||||
| int run = i - last_non_zero - 1; | |||||
| last = (i == last_index); | |||||
| sign = 0; | |||||
| level = slevel; | |||||
| if (level < 0) { | |||||
| sign = 1; | |||||
| level = -level; | |||||
| } | |||||
| int run = i - last_non_zero - 1; | |||||
| last = (i == last_index); | |||||
| sign = 0; | |||||
| level = slevel; | |||||
| if (level < 0) { | |||||
| sign = 1; | |||||
| level = -level; | |||||
| } | |||||
| code = get_rl_index(rl, last, run, level); | code = get_rl_index(rl, last, run, level); | ||||
| put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); | put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); | ||||
| if (code == rl->n) { | if (code == rl->n) { | ||||
| @@ -2786,8 +2786,8 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n | |||||
| } else { | } else { | ||||
| put_bits(ac_pb, 1, sign); | put_bits(ac_pb, 1, sign); | ||||
| } | } | ||||
| last_non_zero = i; | |||||
| } | |||||
| last_non_zero = i; | |||||
| } | |||||
| } | } | ||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -2802,15 +2802,15 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in | |||||
| int len=0; | int len=0; | ||||
| if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away | if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away | ||||
| /* mpeg4 based DC predictor */ | |||||
| len += mpeg4_get_dc_length(intra_dc, n); | |||||
| /* mpeg4 based DC predictor */ | |||||
| len += mpeg4_get_dc_length(intra_dc, n); | |||||
| if(last_index<1) return len; | if(last_index<1) return len; | ||||
| i = 1; | |||||
| i = 1; | |||||
| rl = &rl_intra; | rl = &rl_intra; | ||||
| len_tab = uni_mpeg4_intra_rl_len; | len_tab = uni_mpeg4_intra_rl_len; | ||||
| } else { | } else { | ||||
| if(last_index<0) return 0; | if(last_index<0) return 0; | ||||
| i = 0; | |||||
| i = 0; | |||||
| rl = &rl_inter; | rl = &rl_inter; | ||||
| len_tab = uni_mpeg4_inter_rl_len; | len_tab = uni_mpeg4_inter_rl_len; | ||||
| } | } | ||||
| @@ -2818,9 +2818,9 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in | |||||
| /* AC coefs */ | /* AC coefs */ | ||||
| last_non_zero = i - 1; | last_non_zero = i - 1; | ||||
| for (; i < last_index; i++) { | for (; i < last_index; i++) { | ||||
| int level = block[ scan_table[i] ]; | |||||
| if (level) { | |||||
| int run = i - last_non_zero - 1; | |||||
| int level = block[ scan_table[i] ]; | |||||
| if (level) { | |||||
| int run = i - last_non_zero - 1; | |||||
| level+=64; | level+=64; | ||||
| if((level&(~127)) == 0){ | if((level&(~127)) == 0){ | ||||
| const int index= UNI_MPEG4_ENC_INDEX(0, run, level); | const int index= UNI_MPEG4_ENC_INDEX(0, run, level); | ||||
| @@ -2828,11 +2828,11 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in | |||||
| }else{ //ESC3 | }else{ //ESC3 | ||||
| len += 7+2+1+6+1+12+1; | len += 7+2+1+6+1+12+1; | ||||
| } | } | ||||
| last_non_zero = i; | |||||
| } | |||||
| last_non_zero = i; | |||||
| } | |||||
| } | } | ||||
| /*if(i<=last_index)*/{ | /*if(i<=last_index)*/{ | ||||
| int level = block[ scan_table[i] ]; | |||||
| int level = block[ scan_table[i] ]; | |||||
| int run = i - last_non_zero - 1; | int run = i - last_non_zero - 1; | ||||
| level+=64; | level+=64; | ||||
| if((level&(~127)) == 0){ | if((level&(~127)) == 0){ | ||||
| @@ -3251,7 +3251,7 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s) | |||||
| //FIXME reduced res stuff here | //FIXME reduced res stuff here | ||||
| if (s->pict_type != I_TYPE) { | if (s->pict_type != I_TYPE) { | ||||
| int f_code = get_bits(&s->gb, 3); /* fcode_for */ | |||||
| int f_code = get_bits(&s->gb, 3); /* fcode_for */ | |||||
| if(f_code==0){ | if(f_code==0){ | ||||
| av_log(s->avctx, AV_LOG_ERROR, "Error, video packet header damaged (f_code=0)\n"); | av_log(s->avctx, AV_LOG_ERROR, "Error, video packet header damaged (f_code=0)\n"); | ||||
| } | } | ||||
| @@ -4741,7 +4741,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||||
| if(intra) { | if(intra) { | ||||
| if(s->qscale < s->intra_dc_threshold){ | if(s->qscale < s->intra_dc_threshold){ | ||||
| /* DC coef */ | |||||
| /* DC coef */ | |||||
| if(s->partitioned_frame){ | if(s->partitioned_frame){ | ||||
| level = s->dc_val[0][ s->block_index[n] ]; | level = s->dc_val[0][ s->block_index[n] ]; | ||||
| if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale); | if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale); | ||||
| @@ -4898,7 +4898,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||||
| } | } | ||||
| } | } | ||||
| #endif | #endif | ||||
| if (level>0) level= level * qmul + qadd; | |||||
| if (level>0) level= level * qmul + qadd; | |||||
| else level= level * qmul - qadd; | else level= level * qmul - qadd; | ||||
| if((unsigned)(level + 2048) > 4095){ | if((unsigned)(level + 2048) > 4095){ | ||||
| @@ -5014,18 +5014,18 @@ int h263_decode_picture_header(MpegEncContext *s) | |||||
| } | } | ||||
| if (get_bits1(&s->gb) != 0) { | if (get_bits1(&s->gb) != 0) { | ||||
| av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n"); | av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n"); | ||||
| return -1; /* h263 id */ | |||||
| return -1; /* h263 id */ | |||||
| } | } | ||||
| skip_bits1(&s->gb); /* split screen off */ | |||||
| skip_bits1(&s->gb); /* camera off */ | |||||
| skip_bits1(&s->gb); /* freeze picture release off */ | |||||
| skip_bits1(&s->gb); /* split screen off */ | |||||
| skip_bits1(&s->gb); /* camera off */ | |||||
| skip_bits1(&s->gb); /* freeze picture release off */ | |||||
| format = get_bits(&s->gb, 3); | format = get_bits(&s->gb, 3); | ||||
| /* | /* | ||||
| 0 forbidden | 0 forbidden | ||||
| 1 sub-QCIF | 1 sub-QCIF | ||||
| 10 QCIF | 10 QCIF | ||||
| 7 extended PTYPE (PLUSPTYPE) | |||||
| 7 extended PTYPE (PLUSPTYPE) | |||||
| */ | */ | ||||
| if (format != 7 && format != 6) { | if (format != 7 && format != 6) { | ||||
| @@ -5042,17 +5042,17 @@ int h263_decode_picture_header(MpegEncContext *s) | |||||
| if (get_bits1(&s->gb) != 0) { | if (get_bits1(&s->gb) != 0) { | ||||
| av_log(s->avctx, AV_LOG_ERROR, "H263 SAC not supported\n"); | av_log(s->avctx, AV_LOG_ERROR, "H263 SAC not supported\n"); | ||||
| return -1; /* SAC: off */ | |||||
| return -1; /* SAC: off */ | |||||
| } | } | ||||
| s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */ | s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */ | ||||
| s->unrestricted_mv = s->h263_long_vectors || s->obmc; | s->unrestricted_mv = s->h263_long_vectors || s->obmc; | ||||
| if (get_bits1(&s->gb) != 0) { | if (get_bits1(&s->gb) != 0) { | ||||
| av_log(s->avctx, AV_LOG_ERROR, "H263 PB frame not supported\n"); | av_log(s->avctx, AV_LOG_ERROR, "H263 PB frame not supported\n"); | ||||
| return -1; /* not PB frame */ | |||||
| return -1; /* not PB frame */ | |||||
| } | } | ||||
| s->chroma_qscale= s->qscale = get_bits(&s->gb, 5); | s->chroma_qscale= s->qscale = get_bits(&s->gb, 5); | ||||
| skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */ | |||||
| skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */ | |||||
| s->width = width; | s->width = width; | ||||
| s->height = height; | s->height = height; | ||||
| @@ -5511,17 +5511,17 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ | |||||
| } | } | ||||
| s->low_delay= get_bits1(gb); | s->low_delay= get_bits1(gb); | ||||
| if(get_bits1(gb)){ /* vbv parameters */ | if(get_bits1(gb)){ /* vbv parameters */ | ||||
| get_bits(gb, 15); /* first_half_bitrate */ | |||||
| skip_bits1(gb); /* marker */ | |||||
| get_bits(gb, 15); /* latter_half_bitrate */ | |||||
| skip_bits1(gb); /* marker */ | |||||
| get_bits(gb, 15); /* first_half_vbv_buffer_size */ | |||||
| skip_bits1(gb); /* marker */ | |||||
| get_bits(gb, 3); /* latter_half_vbv_buffer_size */ | |||||
| get_bits(gb, 11); /* first_half_vbv_occupancy */ | |||||
| skip_bits1(gb); /* marker */ | |||||
| get_bits(gb, 15); /* latter_half_vbv_occupancy */ | |||||
| skip_bits1(gb); /* marker */ | |||||
| get_bits(gb, 15); /* first_half_bitrate */ | |||||
| skip_bits1(gb); /* marker */ | |||||
| get_bits(gb, 15); /* latter_half_bitrate */ | |||||
| skip_bits1(gb); /* marker */ | |||||
| get_bits(gb, 15); /* first_half_vbv_buffer_size */ | |||||
| skip_bits1(gb); /* marker */ | |||||
| get_bits(gb, 3); /* latter_half_vbv_buffer_size */ | |||||
| get_bits(gb, 11); /* first_half_vbv_occupancy */ | |||||
| skip_bits1(gb); /* marker */ | |||||
| get_bits(gb, 15); /* latter_half_vbv_occupancy */ | |||||
| skip_bits1(gb); /* marker */ | |||||
| } | } | ||||
| }else{ | }else{ | ||||
| // set low delay flag only once the smartest? low delay detection won't be overriden | // set low delay flag only once the smartest? low delay detection won't be overriden | ||||
| @@ -5628,7 +5628,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ | |||||
| /* load custom intra matrix */ | /* load custom intra matrix */ | ||||
| if(get_bits1(gb)){ | if(get_bits1(gb)){ | ||||
| int last=0; | int last=0; | ||||
| for(i=0; i<64; i++){ | |||||
| for(i=0; i<64; i++){ | |||||
| int j; | int j; | ||||
| v= get_bits(gb, 8); | v= get_bits(gb, 8); | ||||
| if(v==0) break; | if(v==0) break; | ||||
| @@ -5641,7 +5641,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ | |||||
| /* replicate last value */ | /* replicate last value */ | ||||
| for(; i<64; i++){ | for(; i<64; i++){ | ||||
| int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; | |||||
| int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; | |||||
| s->intra_matrix[j]= last; | s->intra_matrix[j]= last; | ||||
| s->chroma_intra_matrix[j]= last; | s->chroma_intra_matrix[j]= last; | ||||
| } | } | ||||
| @@ -5650,7 +5650,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ | |||||
| /* load custom non intra matrix */ | /* load custom non intra matrix */ | ||||
| if(get_bits1(gb)){ | if(get_bits1(gb)){ | ||||
| int last=0; | int last=0; | ||||
| for(i=0; i<64; i++){ | |||||
| for(i=0; i<64; i++){ | |||||
| int j; | int j; | ||||
| v= get_bits(gb, 8); | v= get_bits(gb, 8); | ||||
| if(v==0) break; | if(v==0) break; | ||||
| @@ -5663,7 +5663,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ | |||||
| /* replicate last value */ | /* replicate last value */ | ||||
| for(; i<64; i++){ | for(; i<64; i++){ | ||||
| int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; | |||||
| int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; | |||||
| s->inter_matrix[j]= last; | s->inter_matrix[j]= last; | ||||
| s->chroma_inter_matrix[j]= last; | s->chroma_inter_matrix[j]= last; | ||||
| } | } | ||||
| @@ -5794,7 +5794,7 @@ static int decode_user_data(MpegEncContext *s, GetBitContext *gb){ | |||||
| static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){ | static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){ | ||||
| int time_incr, time_increment; | int time_incr, time_increment; | ||||
| s->pict_type = get_bits(gb, 2) + I_TYPE; /* pict type: I = 0 , P = 1 */ | |||||
| s->pict_type = get_bits(gb, 2) + I_TYPE; /* pict type: I = 0 , P = 1 */ | |||||
| if(s->pict_type==B_TYPE && s->low_delay && s->vol_control_parameters==0 && !(s->flags & CODEC_FLAG_LOW_DELAY)){ | if(s->pict_type==B_TYPE && s->low_delay && s->vol_control_parameters==0 && !(s->flags & CODEC_FLAG_LOW_DELAY)){ | ||||
| av_log(s->avctx, AV_LOG_ERROR, "low_delay flag incorrectly, clearing it\n"); | av_log(s->avctx, AV_LOG_ERROR, "low_delay flag incorrectly, clearing it\n"); | ||||
| s->low_delay=0; | s->low_delay=0; | ||||
| @@ -5877,9 +5877,9 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){ | |||||
| if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == P_TYPE | if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == P_TYPE | ||||
| || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) { | || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) { | ||||
| /* rounding type for motion estimation */ | /* rounding type for motion estimation */ | ||||
| s->no_rounding = get_bits1(gb); | |||||
| s->no_rounding = get_bits1(gb); | |||||
| } else { | } else { | ||||
| s->no_rounding = 0; | |||||
| s->no_rounding = 0; | |||||
| } | } | ||||
| //FIXME reduced res stuff | //FIXME reduced res stuff | ||||
| @@ -5938,7 +5938,7 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){ | |||||
| } | } | ||||
| if (s->pict_type != I_TYPE) { | if (s->pict_type != I_TYPE) { | ||||
| s->f_code = get_bits(gb, 3); /* fcode_for */ | |||||
| s->f_code = get_bits(gb, 3); /* fcode_for */ | |||||
| if(s->f_code==0){ | if(s->f_code==0){ | ||||
| av_log(s->avctx, AV_LOG_ERROR, "Error, header damaged or not MPEG4 header (f_code=0)\n"); | av_log(s->avctx, AV_LOG_ERROR, "Error, header damaged or not MPEG4 header (f_code=0)\n"); | ||||
| return -1; // makes no sense to continue, as the MV decoding will break very quickly | return -1; // makes no sense to continue, as the MV decoding will break very quickly | ||||
| @@ -6094,15 +6094,15 @@ int intel_h263_decode_picture_header(MpegEncContext *s) | |||||
| if (get_bits1(&s->gb) != 1) { | if (get_bits1(&s->gb) != 1) { | ||||
| av_log(s->avctx, AV_LOG_ERROR, "Bad marker\n"); | av_log(s->avctx, AV_LOG_ERROR, "Bad marker\n"); | ||||
| return -1; /* marker */ | |||||
| return -1; /* marker */ | |||||
| } | } | ||||
| if (get_bits1(&s->gb) != 0) { | if (get_bits1(&s->gb) != 0) { | ||||
| av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n"); | av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n"); | ||||
| return -1; /* h263 id */ | |||||
| return -1; /* h263 id */ | |||||
| } | } | ||||
| skip_bits1(&s->gb); /* split screen off */ | |||||
| skip_bits1(&s->gb); /* camera off */ | |||||
| skip_bits1(&s->gb); /* freeze picture release off */ | |||||
| skip_bits1(&s->gb); /* split screen off */ | |||||
| skip_bits1(&s->gb); /* camera off */ | |||||
| skip_bits1(&s->gb); /* freeze picture release off */ | |||||
| format = get_bits(&s->gb, 3); | format = get_bits(&s->gb, 3); | ||||
| if (format != 7) { | if (format != 7) { | ||||
| @@ -6118,23 +6118,23 @@ int intel_h263_decode_picture_header(MpegEncContext *s) | |||||
| if (get_bits1(&s->gb) != 0) { | if (get_bits1(&s->gb) != 0) { | ||||
| av_log(s->avctx, AV_LOG_ERROR, "SAC not supported\n"); | av_log(s->avctx, AV_LOG_ERROR, "SAC not supported\n"); | ||||
| return -1; /* SAC: off */ | |||||
| return -1; /* SAC: off */ | |||||
| } | } | ||||
| if (get_bits1(&s->gb) != 0) { | if (get_bits1(&s->gb) != 0) { | ||||
| s->obmc= 1; | s->obmc= 1; | ||||
| av_log(s->avctx, AV_LOG_ERROR, "Advanced Prediction Mode not supported\n"); | av_log(s->avctx, AV_LOG_ERROR, "Advanced Prediction Mode not supported\n"); | ||||
| // return -1; /* advanced prediction mode: off */ | |||||
| // return -1; /* advanced prediction mode: off */ | |||||
| } | } | ||||
| if (get_bits1(&s->gb) != 0) { | if (get_bits1(&s->gb) != 0) { | ||||
| av_log(s->avctx, AV_LOG_ERROR, "PB frame mode no supported\n"); | av_log(s->avctx, AV_LOG_ERROR, "PB frame mode no supported\n"); | ||||
| return -1; /* PB frame mode */ | |||||
| return -1; /* PB frame mode */ | |||||
| } | } | ||||
| /* skip unknown header garbage */ | /* skip unknown header garbage */ | ||||
| skip_bits(&s->gb, 41); | skip_bits(&s->gb, 41); | ||||
| s->chroma_qscale= s->qscale = get_bits(&s->gb, 5); | s->chroma_qscale= s->qscale = get_bits(&s->gb, 5); | ||||
| skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */ | |||||
| skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */ | |||||
| /* PEI */ | /* PEI */ | ||||
| while (get_bits1(&s->gb) != 0) { | while (get_bits1(&s->gb) != 0) { | ||||
| @@ -6208,7 +6208,7 @@ int flv_h263_decode_picture_header(MpegEncContext *s) | |||||
| if (s->dropable) | if (s->dropable) | ||||
| s->pict_type = P_TYPE; | s->pict_type = P_TYPE; | ||||
| skip_bits1(&s->gb); /* deblocking flag */ | |||||
| skip_bits1(&s->gb); /* deblocking flag */ | |||||
| s->chroma_qscale= s->qscale = get_bits(&s->gb, 5); | s->chroma_qscale= s->qscale = get_bits(&s->gb, 5); | ||||
| s->h263_plus = 0; | s->h263_plus = 0; | ||||
| @@ -147,15 +147,15 @@ typedef struct H264Context{ | |||||
| MpegEncContext s; | MpegEncContext s; | ||||
| int nal_ref_idc; | int nal_ref_idc; | ||||
| int nal_unit_type; | int nal_unit_type; | ||||
| #define NAL_SLICE 1 | |||||
| #define NAL_DPA 2 | |||||
| #define NAL_DPB 3 | |||||
| #define NAL_DPC 4 | |||||
| #define NAL_IDR_SLICE 5 | |||||
| #define NAL_SEI 6 | |||||
| #define NAL_SPS 7 | |||||
| #define NAL_PPS 8 | |||||
| #define NAL_AUD 9 | |||||
| #define NAL_SLICE 1 | |||||
| #define NAL_DPA 2 | |||||
| #define NAL_DPB 3 | |||||
| #define NAL_DPC 4 | |||||
| #define NAL_IDR_SLICE 5 | |||||
| #define NAL_SEI 6 | |||||
| #define NAL_SPS 7 | |||||
| #define NAL_PPS 8 | |||||
| #define NAL_AUD 9 | |||||
| #define NAL_END_SEQUENCE 10 | #define NAL_END_SEQUENCE 10 | ||||
| #define NAL_END_STREAM 11 | #define NAL_END_STREAM 11 | ||||
| #define NAL_FILLER_DATA 12 | #define NAL_FILLER_DATA 12 | ||||
| @@ -1461,7 +1461,7 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c | |||||
| int i, si, di; | int i, si, di; | ||||
| uint8_t *dst; | uint8_t *dst; | ||||
| // src[0]&0x80; //forbidden bit | |||||
| // src[0]&0x80; //forbidden bit | |||||
| h->nal_ref_idc= src[0]>>5; | h->nal_ref_idc= src[0]>>5; | ||||
| h->nal_unit_type= src[0]&0x1F; | h->nal_unit_type= src[0]&0x1F; | ||||
| @@ -7545,8 +7545,8 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ | |||||
| case NAL_SPS_EXT: | case NAL_SPS_EXT: | ||||
| case NAL_AUXILIARY_SLICE: | case NAL_AUXILIARY_SLICE: | ||||
| break; | break; | ||||
| default: | |||||
| av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type); | |||||
| default: | |||||
| av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type); | |||||
| } | } | ||||
| } | } | ||||
| @@ -15,7 +15,7 @@ | |||||
| /* ebx saving is necessary for PIC. gcc seems unable to see it alone */ | /* ebx saving is necessary for PIC. gcc seems unable to see it alone */ | ||||
| #define cpuid(index,eax,ebx,ecx,edx)\ | #define cpuid(index,eax,ebx,ecx,edx)\ | ||||
| __asm __volatile\ | __asm __volatile\ | ||||
| ("mov %%"REG_b", %%"REG_S"\n\t"\ | |||||
| ("mov %%"REG_b", %%"REG_S"\n\t"\ | |||||
| "cpuid\n\t"\ | "cpuid\n\t"\ | ||||
| "xchg %%"REG_b", %%"REG_S\ | "xchg %%"REG_b", %%"REG_S\ | ||||
| : "=a" (eax), "=S" (ebx),\ | : "=a" (eax), "=S" (ebx),\ | ||||
| @@ -89,8 +89,8 @@ int mm_support(void) | |||||
| edx == 0x48727561 && | edx == 0x48727561 && | ||||
| ecx == 0x736c7561) { /* "CentaurHauls" */ | ecx == 0x736c7561) { /* "CentaurHauls" */ | ||||
| /* VIA C3 */ | /* VIA C3 */ | ||||
| if(ext_caps & (1<<24)) | |||||
| rval |= MM_MMXEXT; | |||||
| if(ext_caps & (1<<24)) | |||||
| rval |= MM_MMXEXT; | |||||
| } else if (ebx == 0x69727943 && | } else if (ebx == 0x69727943 && | ||||
| edx == 0x736e4978 && | edx == 0x736e4978 && | ||||
| ecx == 0x64616574) { | ecx == 0x64616574) { | ||||
| @@ -27,206 +27,206 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line | |||||
| { | { | ||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| __asm __volatile( | __asm __volatile( | ||||
| "lea (%3, %3), %%"REG_a" \n\t" | |||||
| ".balign 8 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq 1(%1), %%mm1 \n\t" | |||||
| "movq (%1, %3), %%mm2 \n\t" | |||||
| "movq 1(%1, %3), %%mm3 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%2) \n\t" | |||||
| "movq %%mm5, (%2, %3) \n\t" | |||||
| "add %%"REG_a", %1 \n\t" | |||||
| "add %%"REG_a", %2 \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq 1(%1), %%mm1 \n\t" | |||||
| "movq (%1, %3), %%mm2 \n\t" | |||||
| "movq 1(%1, %3), %%mm3 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%2) \n\t" | |||||
| "movq %%mm5, (%2, %3) \n\t" | |||||
| "add %%"REG_a", %1 \n\t" | |||||
| "add %%"REG_a", %2 \n\t" | |||||
| "subl $4, %0 \n\t" | |||||
| "jnz 1b \n\t" | |||||
| :"+g"(h), "+S"(pixels), "+D"(block) | |||||
| :"r"((long)line_size) | |||||
| :REG_a, "memory"); | |||||
| "lea (%3, %3), %%"REG_a" \n\t" | |||||
| ".balign 8 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq 1(%1), %%mm1 \n\t" | |||||
| "movq (%1, %3), %%mm2 \n\t" | |||||
| "movq 1(%1, %3), %%mm3 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%2) \n\t" | |||||
| "movq %%mm5, (%2, %3) \n\t" | |||||
| "add %%"REG_a", %1 \n\t" | |||||
| "add %%"REG_a", %2 \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq 1(%1), %%mm1 \n\t" | |||||
| "movq (%1, %3), %%mm2 \n\t" | |||||
| "movq 1(%1, %3), %%mm3 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%2) \n\t" | |||||
| "movq %%mm5, (%2, %3) \n\t" | |||||
| "add %%"REG_a", %1 \n\t" | |||||
| "add %%"REG_a", %2 \n\t" | |||||
| "subl $4, %0 \n\t" | |||||
| "jnz 1b \n\t" | |||||
| :"+g"(h), "+S"(pixels), "+D"(block) | |||||
| :"r"((long)line_size) | |||||
| :REG_a, "memory"); | |||||
| } | } | ||||
| static void attribute_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | static void attribute_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| { | { | ||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| __asm __volatile( | __asm __volatile( | ||||
| "testl $1, %0 \n\t" | |||||
| " jz 1f \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq (%2), %%mm1 \n\t" | |||||
| "add %4, %1 \n\t" | |||||
| "add $8, %2 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm4, %%mm6) | |||||
| "movq %%mm4, (%3) \n\t" | |||||
| "add %5, %3 \n\t" | |||||
| "decl %0 \n\t" | |||||
| ".balign 8 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq (%2), %%mm1 \n\t" | |||||
| "add %4, %1 \n\t" | |||||
| "movq (%1), %%mm2 \n\t" | |||||
| "movq 8(%2), %%mm3 \n\t" | |||||
| "add %4, %1 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%3) \n\t" | |||||
| "add %5, %3 \n\t" | |||||
| "movq %%mm5, (%3) \n\t" | |||||
| "add %5, %3 \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq 16(%2), %%mm1 \n\t" | |||||
| "add %4, %1 \n\t" | |||||
| "movq (%1), %%mm2 \n\t" | |||||
| "movq 24(%2), %%mm3 \n\t" | |||||
| "add %4, %1 \n\t" | |||||
| "add $32, %2 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%3) \n\t" | |||||
| "add %5, %3 \n\t" | |||||
| "movq %%mm5, (%3) \n\t" | |||||
| "add %5, %3 \n\t" | |||||
| "subl $4, %0 \n\t" | |||||
| "jnz 1b \n\t" | |||||
| "testl $1, %0 \n\t" | |||||
| " jz 1f \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq (%2), %%mm1 \n\t" | |||||
| "add %4, %1 \n\t" | |||||
| "add $8, %2 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm4, %%mm6) | |||||
| "movq %%mm4, (%3) \n\t" | |||||
| "add %5, %3 \n\t" | |||||
| "decl %0 \n\t" | |||||
| ".balign 8 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq (%2), %%mm1 \n\t" | |||||
| "add %4, %1 \n\t" | |||||
| "movq (%1), %%mm2 \n\t" | |||||
| "movq 8(%2), %%mm3 \n\t" | |||||
| "add %4, %1 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%3) \n\t" | |||||
| "add %5, %3 \n\t" | |||||
| "movq %%mm5, (%3) \n\t" | |||||
| "add %5, %3 \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq 16(%2), %%mm1 \n\t" | |||||
| "add %4, %1 \n\t" | |||||
| "movq (%1), %%mm2 \n\t" | |||||
| "movq 24(%2), %%mm3 \n\t" | |||||
| "add %4, %1 \n\t" | |||||
| "add $32, %2 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%3) \n\t" | |||||
| "add %5, %3 \n\t" | |||||
| "movq %%mm5, (%3) \n\t" | |||||
| "add %5, %3 \n\t" | |||||
| "subl $4, %0 \n\t" | |||||
| "jnz 1b \n\t" | |||||
| #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used | #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used | ||||
| :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) | :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) | ||||
| #else | #else | ||||
| :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) | :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) | ||||
| #endif | #endif | ||||
| :"S"((long)src1Stride), "D"((long)dstStride) | |||||
| :"memory"); | |||||
| :"S"((long)src1Stride), "D"((long)dstStride) | |||||
| :"memory"); | |||||
| } | } | ||||
| static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | { | ||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| __asm __volatile( | __asm __volatile( | ||||
| "lea (%3, %3), %%"REG_a" \n\t" | |||||
| ".balign 8 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq 1(%1), %%mm1 \n\t" | |||||
| "movq (%1, %3), %%mm2 \n\t" | |||||
| "movq 1(%1, %3), %%mm3 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%2) \n\t" | |||||
| "movq %%mm5, (%2, %3) \n\t" | |||||
| "movq 8(%1), %%mm0 \n\t" | |||||
| "movq 9(%1), %%mm1 \n\t" | |||||
| "movq 8(%1, %3), %%mm2 \n\t" | |||||
| "movq 9(%1, %3), %%mm3 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, 8(%2) \n\t" | |||||
| "movq %%mm5, 8(%2, %3) \n\t" | |||||
| "add %%"REG_a", %1 \n\t" | |||||
| "add %%"REG_a", %2 \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq 1(%1), %%mm1 \n\t" | |||||
| "movq (%1, %3), %%mm2 \n\t" | |||||
| "movq 1(%1, %3), %%mm3 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%2) \n\t" | |||||
| "movq %%mm5, (%2, %3) \n\t" | |||||
| "movq 8(%1), %%mm0 \n\t" | |||||
| "movq 9(%1), %%mm1 \n\t" | |||||
| "movq 8(%1, %3), %%mm2 \n\t" | |||||
| "movq 9(%1, %3), %%mm3 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, 8(%2) \n\t" | |||||
| "movq %%mm5, 8(%2, %3) \n\t" | |||||
| "add %%"REG_a", %1 \n\t" | |||||
| "add %%"REG_a", %2 \n\t" | |||||
| "subl $4, %0 \n\t" | |||||
| "jnz 1b \n\t" | |||||
| :"+g"(h), "+S"(pixels), "+D"(block) | |||||
| :"r"((long)line_size) | |||||
| :REG_a, "memory"); | |||||
| "lea (%3, %3), %%"REG_a" \n\t" | |||||
| ".balign 8 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq 1(%1), %%mm1 \n\t" | |||||
| "movq (%1, %3), %%mm2 \n\t" | |||||
| "movq 1(%1, %3), %%mm3 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%2) \n\t" | |||||
| "movq %%mm5, (%2, %3) \n\t" | |||||
| "movq 8(%1), %%mm0 \n\t" | |||||
| "movq 9(%1), %%mm1 \n\t" | |||||
| "movq 8(%1, %3), %%mm2 \n\t" | |||||
| "movq 9(%1, %3), %%mm3 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, 8(%2) \n\t" | |||||
| "movq %%mm5, 8(%2, %3) \n\t" | |||||
| "add %%"REG_a", %1 \n\t" | |||||
| "add %%"REG_a", %2 \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq 1(%1), %%mm1 \n\t" | |||||
| "movq (%1, %3), %%mm2 \n\t" | |||||
| "movq 1(%1, %3), %%mm3 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%2) \n\t" | |||||
| "movq %%mm5, (%2, %3) \n\t" | |||||
| "movq 8(%1), %%mm0 \n\t" | |||||
| "movq 9(%1), %%mm1 \n\t" | |||||
| "movq 8(%1, %3), %%mm2 \n\t" | |||||
| "movq 9(%1, %3), %%mm3 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, 8(%2) \n\t" | |||||
| "movq %%mm5, 8(%2, %3) \n\t" | |||||
| "add %%"REG_a", %1 \n\t" | |||||
| "add %%"REG_a", %2 \n\t" | |||||
| "subl $4, %0 \n\t" | |||||
| "jnz 1b \n\t" | |||||
| :"+g"(h), "+S"(pixels), "+D"(block) | |||||
| :"r"((long)line_size) | |||||
| :REG_a, "memory"); | |||||
| } | } | ||||
| static void attribute_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | static void attribute_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| { | { | ||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| __asm __volatile( | __asm __volatile( | ||||
| "testl $1, %0 \n\t" | |||||
| " jz 1f \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq (%2), %%mm1 \n\t" | |||||
| "movq 8(%1), %%mm2 \n\t" | |||||
| "movq 8(%2), %%mm3 \n\t" | |||||
| "add %4, %1 \n\t" | |||||
| "add $16, %2 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%3) \n\t" | |||||
| "movq %%mm5, 8(%3) \n\t" | |||||
| "add %5, %3 \n\t" | |||||
| "decl %0 \n\t" | |||||
| ".balign 8 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq (%2), %%mm1 \n\t" | |||||
| "movq 8(%1), %%mm2 \n\t" | |||||
| "movq 8(%2), %%mm3 \n\t" | |||||
| "add %4, %1 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%3) \n\t" | |||||
| "movq %%mm5, 8(%3) \n\t" | |||||
| "add %5, %3 \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq 16(%2), %%mm1 \n\t" | |||||
| "movq 8(%1), %%mm2 \n\t" | |||||
| "movq 24(%2), %%mm3 \n\t" | |||||
| "add %4, %1 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%3) \n\t" | |||||
| "movq %%mm5, 8(%3) \n\t" | |||||
| "add %5, %3 \n\t" | |||||
| "add $32, %2 \n\t" | |||||
| "subl $2, %0 \n\t" | |||||
| "jnz 1b \n\t" | |||||
| "testl $1, %0 \n\t" | |||||
| " jz 1f \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq (%2), %%mm1 \n\t" | |||||
| "movq 8(%1), %%mm2 \n\t" | |||||
| "movq 8(%2), %%mm3 \n\t" | |||||
| "add %4, %1 \n\t" | |||||
| "add $16, %2 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%3) \n\t" | |||||
| "movq %%mm5, 8(%3) \n\t" | |||||
| "add %5, %3 \n\t" | |||||
| "decl %0 \n\t" | |||||
| ".balign 8 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq (%2), %%mm1 \n\t" | |||||
| "movq 8(%1), %%mm2 \n\t" | |||||
| "movq 8(%2), %%mm3 \n\t" | |||||
| "add %4, %1 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%3) \n\t" | |||||
| "movq %%mm5, 8(%3) \n\t" | |||||
| "add %5, %3 \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq 16(%2), %%mm1 \n\t" | |||||
| "movq 8(%1), %%mm2 \n\t" | |||||
| "movq 24(%2), %%mm3 \n\t" | |||||
| "add %4, %1 \n\t" | |||||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||||
| "movq %%mm4, (%3) \n\t" | |||||
| "movq %%mm5, 8(%3) \n\t" | |||||
| "add %5, %3 \n\t" | |||||
| "add $32, %2 \n\t" | |||||
| "subl $2, %0 \n\t" | |||||
| "jnz 1b \n\t" | |||||
| #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used | #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used | ||||
| :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |||||
| :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |||||
| #else | #else | ||||
| :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |||||
| :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |||||
| #endif | #endif | ||||
| :"S"((long)src1Stride), "D"((long)dstStride) | |||||
| :"memory"); | |||||
| :"S"((long)src1Stride), "D"((long)dstStride) | |||||
| :"memory"); | |||||
| } | } | ||||
| static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| { | { | ||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| __asm __volatile( | __asm __volatile( | ||||
| "lea (%3, %3), %%"REG_a" \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| ".balign 8 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %3), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"),%%mm2 \n\t" | |||||
| PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) | |||||
| "movq %%mm4, (%2) \n\t" | |||||
| "movq %%mm5, (%2, %3) \n\t" | |||||
| "add %%"REG_a", %1 \n\t" | |||||
| "add %%"REG_a", %2 \n\t" | |||||
| "movq (%1, %3), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"),%%mm0 \n\t" | |||||
| PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) | |||||
| "movq %%mm4, (%2) \n\t" | |||||
| "movq %%mm5, (%2, %3) \n\t" | |||||
| "add %%"REG_a", %1 \n\t" | |||||
| "add %%"REG_a", %2 \n\t" | |||||
| "subl $4, %0 \n\t" | |||||
| "jnz 1b \n\t" | |||||
| :"+g"(h), "+S"(pixels), "+D"(block) | |||||
| :"r"((long)line_size) | |||||
| :REG_a, "memory"); | |||||
| "lea (%3, %3), %%"REG_a" \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| ".balign 8 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %3), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"),%%mm2 \n\t" | |||||
| PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) | |||||
| "movq %%mm4, (%2) \n\t" | |||||
| "movq %%mm5, (%2, %3) \n\t" | |||||
| "add %%"REG_a", %1 \n\t" | |||||
| "add %%"REG_a", %2 \n\t" | |||||
| "movq (%1, %3), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"),%%mm0 \n\t" | |||||
| PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) | |||||
| "movq %%mm4, (%2) \n\t" | |||||
| "movq %%mm5, (%2, %3) \n\t" | |||||
| "add %%"REG_a", %1 \n\t" | |||||
| "add %%"REG_a", %2 \n\t" | |||||
| "subl $4, %0 \n\t" | |||||
| "jnz 1b \n\t" | |||||
| :"+g"(h), "+S"(pixels), "+D"(block) | |||||
| :"r"((long)line_size) | |||||
| :REG_a, "memory"); | |||||
| } | } | ||||
| static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| @@ -234,65 +234,65 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin | |||||
| MOVQ_ZERO(mm7); | MOVQ_ZERO(mm7); | ||||
| SET_RND(mm6); // =2 for rnd and =1 for no_rnd version | SET_RND(mm6); // =2 for rnd and =1 for no_rnd version | ||||
| __asm __volatile( | __asm __volatile( | ||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq 1(%1), %%mm4 \n\t" | |||||
| "movq %%mm0, %%mm1 \n\t" | |||||
| "movq %%mm4, %%mm5 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||||
| "paddusw %%mm0, %%mm4 \n\t" | |||||
| "paddusw %%mm1, %%mm5 \n\t" | |||||
| "xor %%"REG_a", %%"REG_a" \n\t" | |||||
| "add %3, %1 \n\t" | |||||
| ".balign 8 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||||
| "movq %%mm0, %%mm1 \n\t" | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||||
| "paddusw %%mm2, %%mm0 \n\t" | |||||
| "paddusw %%mm3, %%mm1 \n\t" | |||||
| "paddusw %%mm6, %%mm4 \n\t" | |||||
| "paddusw %%mm6, %%mm5 \n\t" | |||||
| "paddusw %%mm0, %%mm4 \n\t" | |||||
| "paddusw %%mm1, %%mm5 \n\t" | |||||
| "psrlw $2, %%mm4 \n\t" | |||||
| "psrlw $2, %%mm5 \n\t" | |||||
| "packuswb %%mm5, %%mm4 \n\t" | |||||
| "movq %%mm4, (%2, %%"REG_a") \n\t" | |||||
| "add %3, %%"REG_a" \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq 1(%1), %%mm4 \n\t" | |||||
| "movq %%mm0, %%mm1 \n\t" | |||||
| "movq %%mm4, %%mm5 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||||
| "paddusw %%mm0, %%mm4 \n\t" | |||||
| "paddusw %%mm1, %%mm5 \n\t" | |||||
| "xor %%"REG_a", %%"REG_a" \n\t" | |||||
| "add %3, %1 \n\t" | |||||
| ".balign 8 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||||
| "movq %%mm0, %%mm1 \n\t" | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||||
| "paddusw %%mm2, %%mm0 \n\t" | |||||
| "paddusw %%mm3, %%mm1 \n\t" | |||||
| "paddusw %%mm6, %%mm4 \n\t" | |||||
| "paddusw %%mm6, %%mm5 \n\t" | |||||
| "paddusw %%mm0, %%mm4 \n\t" | |||||
| "paddusw %%mm1, %%mm5 \n\t" | |||||
| "psrlw $2, %%mm4 \n\t" | |||||
| "psrlw $2, %%mm5 \n\t" | |||||
| "packuswb %%mm5, %%mm4 \n\t" | |||||
| "movq %%mm4, (%2, %%"REG_a") \n\t" | |||||
| "add %3, %%"REG_a" \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 | |||||
| "movq 1(%1, %%"REG_a"), %%mm4 \n\t" | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "movq %%mm4, %%mm5 \n\t" | |||||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||||
| "paddusw %%mm2, %%mm4 \n\t" | |||||
| "paddusw %%mm3, %%mm5 \n\t" | |||||
| "paddusw %%mm6, %%mm0 \n\t" | |||||
| "paddusw %%mm6, %%mm1 \n\t" | |||||
| "paddusw %%mm4, %%mm0 \n\t" | |||||
| "paddusw %%mm5, %%mm1 \n\t" | |||||
| "psrlw $2, %%mm0 \n\t" | |||||
| "psrlw $2, %%mm1 \n\t" | |||||
| "packuswb %%mm1, %%mm0 \n\t" | |||||
| "movq %%mm0, (%2, %%"REG_a") \n\t" | |||||
| "add %3, %%"REG_a" \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 | |||||
| "movq 1(%1, %%"REG_a"), %%mm4 \n\t" | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "movq %%mm4, %%mm5 \n\t" | |||||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||||
| "paddusw %%mm2, %%mm4 \n\t" | |||||
| "paddusw %%mm3, %%mm5 \n\t" | |||||
| "paddusw %%mm6, %%mm0 \n\t" | |||||
| "paddusw %%mm6, %%mm1 \n\t" | |||||
| "paddusw %%mm4, %%mm0 \n\t" | |||||
| "paddusw %%mm5, %%mm1 \n\t" | |||||
| "psrlw $2, %%mm0 \n\t" | |||||
| "psrlw $2, %%mm1 \n\t" | |||||
| "packuswb %%mm1, %%mm0 \n\t" | |||||
| "movq %%mm0, (%2, %%"REG_a") \n\t" | |||||
| "add %3, %%"REG_a" \n\t" | |||||
| "subl $2, %0 \n\t" | |||||
| "jnz 1b \n\t" | |||||
| :"+g"(h), "+S"(pixels) | |||||
| :"D"(block), "r"((long)line_size) | |||||
| :REG_a, "memory"); | |||||
| "subl $2, %0 \n\t" | |||||
| "jnz 1b \n\t" | |||||
| :"+g"(h), "+S"(pixels) | |||||
| :"D"(block), "r"((long)line_size) | |||||
| :REG_a, "memory"); | |||||
| } | } | ||||
| // avg_pixels | // avg_pixels | ||||
| @@ -301,16 +301,16 @@ static void attribute_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pi | |||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| JUMPALIGN(); | JUMPALIGN(); | ||||
| do { | do { | ||||
| __asm __volatile( | |||||
| "movd %0, %%mm0 \n\t" | |||||
| "movd %1, %%mm1 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| "movd %%mm2, %0 \n\t" | |||||
| :"+m"(*block) | |||||
| :"m"(*pixels) | |||||
| :"memory"); | |||||
| pixels += line_size; | |||||
| block += line_size; | |||||
| __asm __volatile( | |||||
| "movd %0, %%mm0 \n\t" | |||||
| "movd %1, %%mm1 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| "movd %%mm2, %0 \n\t" | |||||
| :"+m"(*block) | |||||
| :"m"(*pixels) | |||||
| :"memory"); | |||||
| pixels += line_size; | |||||
| block += line_size; | |||||
| } | } | ||||
| while (--h); | while (--h); | ||||
| } | } | ||||
| @@ -321,16 +321,16 @@ static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_si | |||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| JUMPALIGN(); | JUMPALIGN(); | ||||
| do { | do { | ||||
| __asm __volatile( | |||||
| "movq %0, %%mm0 \n\t" | |||||
| "movq %1, %%mm1 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| "movq %%mm2, %0 \n\t" | |||||
| :"+m"(*block) | |||||
| :"m"(*pixels) | |||||
| :"memory"); | |||||
| pixels += line_size; | |||||
| block += line_size; | |||||
| __asm __volatile( | |||||
| "movq %0, %%mm0 \n\t" | |||||
| "movq %1, %%mm1 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| "movq %%mm2, %0 \n\t" | |||||
| :"+m"(*block) | |||||
| :"m"(*pixels) | |||||
| :"memory"); | |||||
| pixels += line_size; | |||||
| block += line_size; | |||||
| } | } | ||||
| while (--h); | while (--h); | ||||
| } | } | ||||
| @@ -340,20 +340,20 @@ static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_s | |||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| JUMPALIGN(); | JUMPALIGN(); | ||||
| do { | do { | ||||
| __asm __volatile( | |||||
| "movq %0, %%mm0 \n\t" | |||||
| "movq %1, %%mm1 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| "movq %%mm2, %0 \n\t" | |||||
| "movq 8%0, %%mm0 \n\t" | |||||
| "movq 8%1, %%mm1 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| "movq %%mm2, 8%0 \n\t" | |||||
| :"+m"(*block) | |||||
| :"m"(*pixels) | |||||
| :"memory"); | |||||
| pixels += line_size; | |||||
| block += line_size; | |||||
| __asm __volatile( | |||||
| "movq %0, %%mm0 \n\t" | |||||
| "movq %1, %%mm1 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| "movq %%mm2, %0 \n\t" | |||||
| "movq 8%0, %%mm0 \n\t" | |||||
| "movq 8%1, %%mm1 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| "movq %%mm2, 8%0 \n\t" | |||||
| :"+m"(*block) | |||||
| :"m"(*pixels) | |||||
| :"memory"); | |||||
| pixels += line_size; | |||||
| block += line_size; | |||||
| } | } | ||||
| while (--h); | while (--h); | ||||
| } | } | ||||
| @@ -363,18 +363,18 @@ static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line | |||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| JUMPALIGN(); | JUMPALIGN(); | ||||
| do { | do { | ||||
| __asm __volatile( | |||||
| "movq %1, %%mm0 \n\t" | |||||
| "movq 1%1, %%mm1 \n\t" | |||||
| "movq %0, %%mm3 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||||
| "movq %%mm0, %0 \n\t" | |||||
| :"+m"(*block) | |||||
| :"m"(*pixels) | |||||
| :"memory"); | |||||
| pixels += line_size; | |||||
| block += line_size; | |||||
| __asm __volatile( | |||||
| "movq %1, %%mm0 \n\t" | |||||
| "movq 1%1, %%mm1 \n\t" | |||||
| "movq %0, %%mm3 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||||
| "movq %%mm0, %0 \n\t" | |||||
| :"+m"(*block) | |||||
| :"m"(*pixels) | |||||
| :"memory"); | |||||
| pixels += line_size; | |||||
| block += line_size; | |||||
| } while (--h); | } while (--h); | ||||
| } | } | ||||
| @@ -383,17 +383,17 @@ static __attribute__((unused)) void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t * | |||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| JUMPALIGN(); | JUMPALIGN(); | ||||
| do { | do { | ||||
| __asm __volatile( | |||||
| "movq %1, %%mm0 \n\t" | |||||
| "movq %2, %%mm1 \n\t" | |||||
| "movq %0, %%mm3 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||||
| "movq %%mm0, %0 \n\t" | |||||
| :"+m"(*dst) | |||||
| :"m"(*src1), "m"(*src2) | |||||
| :"memory"); | |||||
| dst += dstStride; | |||||
| __asm __volatile( | |||||
| "movq %1, %%mm0 \n\t" | |||||
| "movq %2, %%mm1 \n\t" | |||||
| "movq %0, %%mm3 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||||
| "movq %%mm0, %0 \n\t" | |||||
| :"+m"(*dst) | |||||
| :"m"(*src1), "m"(*src2) | |||||
| :"memory"); | |||||
| dst += dstStride; | |||||
| src1 += src1Stride; | src1 += src1Stride; | ||||
| src2 += 8; | src2 += 8; | ||||
| } while (--h); | } while (--h); | ||||
| @@ -404,24 +404,24 @@ static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin | |||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| JUMPALIGN(); | JUMPALIGN(); | ||||
| do { | do { | ||||
| __asm __volatile( | |||||
| "movq %1, %%mm0 \n\t" | |||||
| "movq 1%1, %%mm1 \n\t" | |||||
| "movq %0, %%mm3 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||||
| "movq %%mm0, %0 \n\t" | |||||
| "movq 8%1, %%mm0 \n\t" | |||||
| "movq 9%1, %%mm1 \n\t" | |||||
| "movq 8%0, %%mm3 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||||
| "movq %%mm0, 8%0 \n\t" | |||||
| :"+m"(*block) | |||||
| :"m"(*pixels) | |||||
| :"memory"); | |||||
| pixels += line_size; | |||||
| block += line_size; | |||||
| __asm __volatile( | |||||
| "movq %1, %%mm0 \n\t" | |||||
| "movq 1%1, %%mm1 \n\t" | |||||
| "movq %0, %%mm3 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||||
| "movq %%mm0, %0 \n\t" | |||||
| "movq 8%1, %%mm0 \n\t" | |||||
| "movq 9%1, %%mm1 \n\t" | |||||
| "movq 8%0, %%mm3 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||||
| "movq %%mm0, 8%0 \n\t" | |||||
| :"+m"(*block) | |||||
| :"m"(*pixels) | |||||
| :"memory"); | |||||
| pixels += line_size; | |||||
| block += line_size; | |||||
| } while (--h); | } while (--h); | ||||
| } | } | ||||
| @@ -430,23 +430,23 @@ static __attribute__((unused)) void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t | |||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| JUMPALIGN(); | JUMPALIGN(); | ||||
| do { | do { | ||||
| __asm __volatile( | |||||
| "movq %1, %%mm0 \n\t" | |||||
| "movq %2, %%mm1 \n\t" | |||||
| "movq %0, %%mm3 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||||
| "movq %%mm0, %0 \n\t" | |||||
| "movq 8%1, %%mm0 \n\t" | |||||
| "movq 8%2, %%mm1 \n\t" | |||||
| "movq 8%0, %%mm3 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||||
| "movq %%mm0, 8%0 \n\t" | |||||
| :"+m"(*dst) | |||||
| :"m"(*src1), "m"(*src2) | |||||
| :"memory"); | |||||
| dst += dstStride; | |||||
| __asm __volatile( | |||||
| "movq %1, %%mm0 \n\t" | |||||
| "movq %2, %%mm1 \n\t" | |||||
| "movq %0, %%mm3 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||||
| "movq %%mm0, %0 \n\t" | |||||
| "movq 8%1, %%mm0 \n\t" | |||||
| "movq 8%2, %%mm1 \n\t" | |||||
| "movq 8%0, %%mm3 \n\t" | |||||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||||
| "movq %%mm0, 8%0 \n\t" | |||||
| :"+m"(*dst) | |||||
| :"m"(*src1), "m"(*src2) | |||||
| :"memory"); | |||||
| dst += dstStride; | |||||
| src1 += src1Stride; | src1 += src1Stride; | ||||
| src2 += 16; | src2 += 16; | ||||
| } while (--h); | } while (--h); | ||||
| @@ -456,39 +456,39 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line | |||||
| { | { | ||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| __asm __volatile( | __asm __volatile( | ||||
| "lea (%3, %3), %%"REG_a" \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| ".balign 8 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %3), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" | |||||
| PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) | |||||
| "movq (%2), %%mm3 \n\t" | |||||
| PAVGB(%%mm3, %%mm4, %%mm0, %%mm6) | |||||
| "movq (%2, %3), %%mm3 \n\t" | |||||
| PAVGB(%%mm3, %%mm5, %%mm1, %%mm6) | |||||
| "movq %%mm0, (%2) \n\t" | |||||
| "movq %%mm1, (%2, %3) \n\t" | |||||
| "add %%"REG_a", %1 \n\t" | |||||
| "add %%"REG_a", %2 \n\t" | |||||
| "lea (%3, %3), %%"REG_a" \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| ".balign 8 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %3), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" | |||||
| PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) | |||||
| "movq (%2), %%mm3 \n\t" | |||||
| PAVGB(%%mm3, %%mm4, %%mm0, %%mm6) | |||||
| "movq (%2, %3), %%mm3 \n\t" | |||||
| PAVGB(%%mm3, %%mm5, %%mm1, %%mm6) | |||||
| "movq %%mm0, (%2) \n\t" | |||||
| "movq %%mm1, (%2, %3) \n\t" | |||||
| "add %%"REG_a", %1 \n\t" | |||||
| "add %%"REG_a", %2 \n\t" | |||||
| "movq (%1, %3), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) | |||||
| "movq (%2), %%mm3 \n\t" | |||||
| PAVGB(%%mm3, %%mm4, %%mm2, %%mm6) | |||||
| "movq (%2, %3), %%mm3 \n\t" | |||||
| PAVGB(%%mm3, %%mm5, %%mm1, %%mm6) | |||||
| "movq %%mm2, (%2) \n\t" | |||||
| "movq %%mm1, (%2, %3) \n\t" | |||||
| "add %%"REG_a", %1 \n\t" | |||||
| "add %%"REG_a", %2 \n\t" | |||||
| "movq (%1, %3), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) | |||||
| "movq (%2), %%mm3 \n\t" | |||||
| PAVGB(%%mm3, %%mm4, %%mm2, %%mm6) | |||||
| "movq (%2, %3), %%mm3 \n\t" | |||||
| PAVGB(%%mm3, %%mm5, %%mm1, %%mm6) | |||||
| "movq %%mm2, (%2) \n\t" | |||||
| "movq %%mm1, (%2, %3) \n\t" | |||||
| "add %%"REG_a", %1 \n\t" | |||||
| "add %%"REG_a", %2 \n\t" | |||||
| "subl $4, %0 \n\t" | |||||
| "jnz 1b \n\t" | |||||
| :"+g"(h), "+S"(pixels), "+D"(block) | |||||
| :"r"((long)line_size) | |||||
| :REG_a, "memory"); | |||||
| "subl $4, %0 \n\t" | |||||
| "jnz 1b \n\t" | |||||
| :"+g"(h), "+S"(pixels), "+D"(block) | |||||
| :"r"((long)line_size) | |||||
| :REG_a, "memory"); | |||||
| } | } | ||||
| // this routine is 'slightly' suboptimal but mostly unused | // this routine is 'slightly' suboptimal but mostly unused | ||||
| @@ -497,73 +497,73 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin | |||||
| MOVQ_ZERO(mm7); | MOVQ_ZERO(mm7); | ||||
| SET_RND(mm6); // =2 for rnd and =1 for no_rnd version | SET_RND(mm6); // =2 for rnd and =1 for no_rnd version | ||||
| __asm __volatile( | __asm __volatile( | ||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq 1(%1), %%mm4 \n\t" | |||||
| "movq %%mm0, %%mm1 \n\t" | |||||
| "movq %%mm4, %%mm5 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||||
| "paddusw %%mm0, %%mm4 \n\t" | |||||
| "paddusw %%mm1, %%mm5 \n\t" | |||||
| "xor %%"REG_a", %%"REG_a" \n\t" | |||||
| "add %3, %1 \n\t" | |||||
| ".balign 8 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||||
| "movq %%mm0, %%mm1 \n\t" | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||||
| "paddusw %%mm2, %%mm0 \n\t" | |||||
| "paddusw %%mm3, %%mm1 \n\t" | |||||
| "paddusw %%mm6, %%mm4 \n\t" | |||||
| "paddusw %%mm6, %%mm5 \n\t" | |||||
| "paddusw %%mm0, %%mm4 \n\t" | |||||
| "paddusw %%mm1, %%mm5 \n\t" | |||||
| "psrlw $2, %%mm4 \n\t" | |||||
| "psrlw $2, %%mm5 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "packuswb %%mm5, %%mm4 \n\t" | |||||
| "pcmpeqd %%mm2, %%mm2 \n\t" | |||||
| "paddb %%mm2, %%mm2 \n\t" | |||||
| PAVGB(%%mm3, %%mm4, %%mm5, %%mm2) | |||||
| "movq %%mm5, (%2, %%"REG_a") \n\t" | |||||
| "add %3, %%"REG_a" \n\t" | |||||
| "movq (%1), %%mm0 \n\t" | |||||
| "movq 1(%1), %%mm4 \n\t" | |||||
| "movq %%mm0, %%mm1 \n\t" | |||||
| "movq %%mm4, %%mm5 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||||
| "paddusw %%mm0, %%mm4 \n\t" | |||||
| "paddusw %%mm1, %%mm5 \n\t" | |||||
| "xor %%"REG_a", %%"REG_a" \n\t" | |||||
| "add %3, %1 \n\t" | |||||
| ".balign 8 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||||
| "movq %%mm0, %%mm1 \n\t" | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||||
| "paddusw %%mm2, %%mm0 \n\t" | |||||
| "paddusw %%mm3, %%mm1 \n\t" | |||||
| "paddusw %%mm6, %%mm4 \n\t" | |||||
| "paddusw %%mm6, %%mm5 \n\t" | |||||
| "paddusw %%mm0, %%mm4 \n\t" | |||||
| "paddusw %%mm1, %%mm5 \n\t" | |||||
| "psrlw $2, %%mm4 \n\t" | |||||
| "psrlw $2, %%mm5 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "packuswb %%mm5, %%mm4 \n\t" | |||||
| "pcmpeqd %%mm2, %%mm2 \n\t" | |||||
| "paddb %%mm2, %%mm2 \n\t" | |||||
| PAVGB(%%mm3, %%mm4, %%mm5, %%mm2) | |||||
| "movq %%mm5, (%2, %%"REG_a") \n\t" | |||||
| "add %3, %%"REG_a" \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 | |||||
| "movq 1(%1, %%"REG_a"), %%mm4 \n\t" | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "movq %%mm4, %%mm5 \n\t" | |||||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||||
| "paddusw %%mm2, %%mm4 \n\t" | |||||
| "paddusw %%mm3, %%mm5 \n\t" | |||||
| "paddusw %%mm6, %%mm0 \n\t" | |||||
| "paddusw %%mm6, %%mm1 \n\t" | |||||
| "paddusw %%mm4, %%mm0 \n\t" | |||||
| "paddusw %%mm5, %%mm1 \n\t" | |||||
| "psrlw $2, %%mm0 \n\t" | |||||
| "psrlw $2, %%mm1 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "packuswb %%mm1, %%mm0 \n\t" | |||||
| "pcmpeqd %%mm2, %%mm2 \n\t" | |||||
| "paddb %%mm2, %%mm2 \n\t" | |||||
| PAVGB(%%mm3, %%mm0, %%mm1, %%mm2) | |||||
| "movq %%mm1, (%2, %%"REG_a") \n\t" | |||||
| "add %3, %%"REG_a" \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 | |||||
| "movq 1(%1, %%"REG_a"), %%mm4 \n\t" | |||||
| "movq %%mm2, %%mm3 \n\t" | |||||
| "movq %%mm4, %%mm5 \n\t" | |||||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||||
| "paddusw %%mm2, %%mm4 \n\t" | |||||
| "paddusw %%mm3, %%mm5 \n\t" | |||||
| "paddusw %%mm6, %%mm0 \n\t" | |||||
| "paddusw %%mm6, %%mm1 \n\t" | |||||
| "paddusw %%mm4, %%mm0 \n\t" | |||||
| "paddusw %%mm5, %%mm1 \n\t" | |||||
| "psrlw $2, %%mm0 \n\t" | |||||
| "psrlw $2, %%mm1 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "packuswb %%mm1, %%mm0 \n\t" | |||||
| "pcmpeqd %%mm2, %%mm2 \n\t" | |||||
| "paddb %%mm2, %%mm2 \n\t" | |||||
| PAVGB(%%mm3, %%mm0, %%mm1, %%mm2) | |||||
| "movq %%mm1, (%2, %%"REG_a") \n\t" | |||||
| "add %3, %%"REG_a" \n\t" | |||||
| "subl $2, %0 \n\t" | |||||
| "jnz 1b \n\t" | |||||
| :"+g"(h), "+S"(pixels) | |||||
| :"D"(block), "r"((long)line_size) | |||||
| :REG_a, "memory"); | |||||
| "subl $2, %0 \n\t" | |||||
| "jnz 1b \n\t" | |||||
| :"+g"(h), "+S"(pixels) | |||||
| :"D"(block), "r"((long)line_size) | |||||
| :REG_a, "memory"); | |||||
| } | } | ||||
| //FIXME optimize | //FIXME optimize | ||||
| @@ -30,21 +30,21 @@ | |||||
| // | // | ||||
| ////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////// | ||||
| #define BITS_FRW_ACC 3 //; 2 or 3 for accuracy | |||||
| #define SHIFT_FRW_COL BITS_FRW_ACC | |||||
| #define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3) | |||||
| #define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1)) | |||||
| //#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1)) | |||||
| #define BITS_FRW_ACC 3 //; 2 or 3 for accuracy | |||||
| #define SHIFT_FRW_COL BITS_FRW_ACC | |||||
| #define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3) | |||||
| #define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1)) | |||||
| //#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1)) | |||||
| //concatenated table, for forward DCT transformation | //concatenated table, for forward DCT transformation | ||||
| static const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = { | static const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = { | ||||
| 13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5 | |||||
| 27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5 | |||||
| -21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5 | |||||
| 13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5 | |||||
| 27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5 | |||||
| -21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5 | |||||
| }; | }; | ||||
| static const int16_t ocos_4_16[4] ATTR_ALIGN(8) = { | static const int16_t ocos_4_16[4] ATTR_ALIGN(8) = { | ||||
| 23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5 | |||||
| 23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5 | |||||
| }; | }; | ||||
| static const int64_t fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL; | static const int64_t fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL; | ||||
| @@ -351,62 +351,62 @@ static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset) | |||||
| static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) | static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) | ||||
| { | { | ||||
| asm volatile( | asm volatile( | ||||
| ".macro FDCT_ROW_SSE2_H1 i t \n\t" | |||||
| "movq \\i(%0), %%xmm2 \n\t" | |||||
| "movq \\i+8(%0), %%xmm0 \n\t" | |||||
| "movdqa \\t+32(%1), %%xmm3 \n\t" | |||||
| "movdqa \\t+48(%1), %%xmm7 \n\t" | |||||
| "movdqa \\t(%1), %%xmm4 \n\t" | |||||
| "movdqa \\t+16(%1), %%xmm5 \n\t" | |||||
| ".endm \n\t" | |||||
| ".macro FDCT_ROW_SSE2_H2 i t \n\t" | |||||
| "movq \\i(%0), %%xmm2 \n\t" | |||||
| "movq \\i+8(%0), %%xmm0 \n\t" | |||||
| "movdqa \\t+32(%1), %%xmm3 \n\t" | |||||
| "movdqa \\t+48(%1), %%xmm7 \n\t" | |||||
| ".endm \n\t" | |||||
| ".macro FDCT_ROW_SSE2 i \n\t" | |||||
| "movq %%xmm2, %%xmm1 \n\t" | |||||
| "pshuflw $27, %%xmm0, %%xmm0 \n\t" | |||||
| "paddsw %%xmm0, %%xmm1 \n\t" | |||||
| "psubsw %%xmm0, %%xmm2 \n\t" | |||||
| "punpckldq %%xmm2, %%xmm1 \n\t" | |||||
| "pshufd $78, %%xmm1, %%xmm2 \n\t" | |||||
| "pmaddwd %%xmm2, %%xmm3 \n\t" | |||||
| "pmaddwd %%xmm1, %%xmm7 \n\t" | |||||
| "pmaddwd %%xmm5, %%xmm2 \n\t" | |||||
| "pmaddwd %%xmm4, %%xmm1 \n\t" | |||||
| "paddd %%xmm7, %%xmm3 \n\t" | |||||
| "paddd %%xmm2, %%xmm1 \n\t" | |||||
| "paddd %%xmm6, %%xmm3 \n\t" | |||||
| "paddd %%xmm6, %%xmm1 \n\t" | |||||
| "psrad %3, %%xmm3 \n\t" | |||||
| "psrad %3, %%xmm1 \n\t" | |||||
| "packssdw %%xmm3, %%xmm1 \n\t" | |||||
| "movdqa %%xmm1, \\i(%4) \n\t" | |||||
| ".endm \n\t" | |||||
| "movdqa (%2), %%xmm6 \n\t" | |||||
| "FDCT_ROW_SSE2_H1 0 0 \n\t" | |||||
| "FDCT_ROW_SSE2 0 \n\t" | |||||
| "FDCT_ROW_SSE2_H2 64 0 \n\t" | |||||
| "FDCT_ROW_SSE2 64 \n\t" | |||||
| "FDCT_ROW_SSE2_H1 16 64 \n\t" | |||||
| "FDCT_ROW_SSE2 16 \n\t" | |||||
| "FDCT_ROW_SSE2_H2 112 64 \n\t" | |||||
| "FDCT_ROW_SSE2 112 \n\t" | |||||
| "FDCT_ROW_SSE2_H1 32 128 \n\t" | |||||
| "FDCT_ROW_SSE2 32 \n\t" | |||||
| "FDCT_ROW_SSE2_H2 96 128 \n\t" | |||||
| "FDCT_ROW_SSE2 96 \n\t" | |||||
| "FDCT_ROW_SSE2_H1 48 192 \n\t" | |||||
| "FDCT_ROW_SSE2 48 \n\t" | |||||
| "FDCT_ROW_SSE2_H2 80 192 \n\t" | |||||
| "FDCT_ROW_SSE2 80 \n\t" | |||||
| : | |||||
| : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out) | |||||
| ".macro FDCT_ROW_SSE2_H1 i t \n\t" | |||||
| "movq \\i(%0), %%xmm2 \n\t" | |||||
| "movq \\i+8(%0), %%xmm0 \n\t" | |||||
| "movdqa \\t+32(%1), %%xmm3 \n\t" | |||||
| "movdqa \\t+48(%1), %%xmm7 \n\t" | |||||
| "movdqa \\t(%1), %%xmm4 \n\t" | |||||
| "movdqa \\t+16(%1), %%xmm5 \n\t" | |||||
| ".endm \n\t" | |||||
| ".macro FDCT_ROW_SSE2_H2 i t \n\t" | |||||
| "movq \\i(%0), %%xmm2 \n\t" | |||||
| "movq \\i+8(%0), %%xmm0 \n\t" | |||||
| "movdqa \\t+32(%1), %%xmm3 \n\t" | |||||
| "movdqa \\t+48(%1), %%xmm7 \n\t" | |||||
| ".endm \n\t" | |||||
| ".macro FDCT_ROW_SSE2 i \n\t" | |||||
| "movq %%xmm2, %%xmm1 \n\t" | |||||
| "pshuflw $27, %%xmm0, %%xmm0 \n\t" | |||||
| "paddsw %%xmm0, %%xmm1 \n\t" | |||||
| "psubsw %%xmm0, %%xmm2 \n\t" | |||||
| "punpckldq %%xmm2, %%xmm1 \n\t" | |||||
| "pshufd $78, %%xmm1, %%xmm2 \n\t" | |||||
| "pmaddwd %%xmm2, %%xmm3 \n\t" | |||||
| "pmaddwd %%xmm1, %%xmm7 \n\t" | |||||
| "pmaddwd %%xmm5, %%xmm2 \n\t" | |||||
| "pmaddwd %%xmm4, %%xmm1 \n\t" | |||||
| "paddd %%xmm7, %%xmm3 \n\t" | |||||
| "paddd %%xmm2, %%xmm1 \n\t" | |||||
| "paddd %%xmm6, %%xmm3 \n\t" | |||||
| "paddd %%xmm6, %%xmm1 \n\t" | |||||
| "psrad %3, %%xmm3 \n\t" | |||||
| "psrad %3, %%xmm1 \n\t" | |||||
| "packssdw %%xmm3, %%xmm1 \n\t" | |||||
| "movdqa %%xmm1, \\i(%4) \n\t" | |||||
| ".endm \n\t" | |||||
| "movdqa (%2), %%xmm6 \n\t" | |||||
| "FDCT_ROW_SSE2_H1 0 0 \n\t" | |||||
| "FDCT_ROW_SSE2 0 \n\t" | |||||
| "FDCT_ROW_SSE2_H2 64 0 \n\t" | |||||
| "FDCT_ROW_SSE2 64 \n\t" | |||||
| "FDCT_ROW_SSE2_H1 16 64 \n\t" | |||||
| "FDCT_ROW_SSE2 16 \n\t" | |||||
| "FDCT_ROW_SSE2_H2 112 64 \n\t" | |||||
| "FDCT_ROW_SSE2 112 \n\t" | |||||
| "FDCT_ROW_SSE2_H1 32 128 \n\t" | |||||
| "FDCT_ROW_SSE2 32 \n\t" | |||||
| "FDCT_ROW_SSE2_H2 96 128 \n\t" | |||||
| "FDCT_ROW_SSE2 96 \n\t" | |||||
| "FDCT_ROW_SSE2_H1 48 192 \n\t" | |||||
| "FDCT_ROW_SSE2 48 \n\t" | |||||
| "FDCT_ROW_SSE2_H2 80 192 \n\t" | |||||
| "FDCT_ROW_SSE2 80 \n\t" | |||||
| : | |||||
| : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out) | |||||
| ); | ); | ||||
| } | } | ||||
| @@ -45,8 +45,8 @@ static void print_v4sf(const char *str, __m128 a) | |||||
| void ff_fft_calc_sse(FFTContext *s, FFTComplex *z) | void ff_fft_calc_sse(FFTContext *s, FFTComplex *z) | ||||
| { | { | ||||
| int ln = s->nbits; | int ln = s->nbits; | ||||
| int j, np, np2; | |||||
| int nblocks, nloops; | |||||
| int j, np, np2; | |||||
| int nblocks, nloops; | |||||
| register FFTComplex *p, *q; | register FFTComplex *p, *q; | ||||
| FFTComplex *cptr, *cptr1; | FFTComplex *cptr, *cptr1; | ||||
| int k; | int k; | ||||
| @@ -47,9 +47,9 @@ | |||||
| SUMSUB_BADC( d13, s02, s13, d02 ) | SUMSUB_BADC( d13, s02, s13, d02 ) | ||||
| #define SBUTTERFLY(a,b,t,n)\ | #define SBUTTERFLY(a,b,t,n)\ | ||||
| "movq " #a ", " #t " \n\t" /* abcd */\ | |||||
| "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\ | |||||
| "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\ | |||||
| "movq " #a ", " #t " \n\t" /* abcd */\ | |||||
| "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\ | |||||
| "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\ | |||||
| #define TRANSPOSE4(a,b,c,d,t)\ | #define TRANSPOSE4(a,b,c,d,t)\ | ||||
| SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\ | SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\ | ||||
| @@ -369,73 +369,73 @@ static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a | |||||
| /* motion compensation */ | /* motion compensation */ | ||||
| #define QPEL_H264V(A,B,C,D,E,F,OP)\ | #define QPEL_H264V(A,B,C,D,E,F,OP)\ | ||||
| "movd (%0), "#F" \n\t"\ | |||||
| "movq "#C", %%mm6 \n\t"\ | |||||
| "paddw "#D", %%mm6 \n\t"\ | |||||
| "psllw $2, %%mm6 \n\t"\ | |||||
| "psubw "#B", %%mm6 \n\t"\ | |||||
| "psubw "#E", %%mm6 \n\t"\ | |||||
| "pmullw %4, %%mm6 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "punpcklbw %%mm7, "#F" \n\t"\ | |||||
| "paddw %5, "#A" \n\t"\ | |||||
| "paddw "#F", "#A" \n\t"\ | |||||
| "paddw "#A", %%mm6 \n\t"\ | |||||
| "psraw $5, %%mm6 \n\t"\ | |||||
| "packuswb %%mm6, %%mm6 \n\t"\ | |||||
| "movd (%0), "#F" \n\t"\ | |||||
| "movq "#C", %%mm6 \n\t"\ | |||||
| "paddw "#D", %%mm6 \n\t"\ | |||||
| "psllw $2, %%mm6 \n\t"\ | |||||
| "psubw "#B", %%mm6 \n\t"\ | |||||
| "psubw "#E", %%mm6 \n\t"\ | |||||
| "pmullw %4, %%mm6 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "punpcklbw %%mm7, "#F" \n\t"\ | |||||
| "paddw %5, "#A" \n\t"\ | |||||
| "paddw "#F", "#A" \n\t"\ | |||||
| "paddw "#A", %%mm6 \n\t"\ | |||||
| "psraw $5, %%mm6 \n\t"\ | |||||
| "packuswb %%mm6, %%mm6 \n\t"\ | |||||
| OP(%%mm6, (%1), A, d)\ | OP(%%mm6, (%1), A, d)\ | ||||
| "add %3, %1 \n\t" | |||||
| "add %3, %1 \n\t" | |||||
| #define QPEL_H264HV(A,B,C,D,E,F,OF)\ | #define QPEL_H264HV(A,B,C,D,E,F,OF)\ | ||||
| "movd (%0), "#F" \n\t"\ | |||||
| "movq "#C", %%mm6 \n\t"\ | |||||
| "paddw "#D", %%mm6 \n\t"\ | |||||
| "psllw $2, %%mm6 \n\t"\ | |||||
| "psubw "#B", %%mm6 \n\t"\ | |||||
| "psubw "#E", %%mm6 \n\t"\ | |||||
| "pmullw %3, %%mm6 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "punpcklbw %%mm7, "#F" \n\t"\ | |||||
| "paddw "#F", "#A" \n\t"\ | |||||
| "paddw "#A", %%mm6 \n\t"\ | |||||
| "movq %%mm6, "#OF"(%1) \n\t" | |||||
| "movd (%0), "#F" \n\t"\ | |||||
| "movq "#C", %%mm6 \n\t"\ | |||||
| "paddw "#D", %%mm6 \n\t"\ | |||||
| "psllw $2, %%mm6 \n\t"\ | |||||
| "psubw "#B", %%mm6 \n\t"\ | |||||
| "psubw "#E", %%mm6 \n\t"\ | |||||
| "pmullw %3, %%mm6 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "punpcklbw %%mm7, "#F" \n\t"\ | |||||
| "paddw "#F", "#A" \n\t"\ | |||||
| "paddw "#A", %%mm6 \n\t"\ | |||||
| "movq %%mm6, "#OF"(%1) \n\t" | |||||
| #define QPEL_H264(OPNAME, OP, MMX)\ | #define QPEL_H264(OPNAME, OP, MMX)\ | ||||
| static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||||
| int h=4;\ | int h=4;\ | ||||
| \ | \ | ||||
| asm volatile(\ | asm volatile(\ | ||||
| "pxor %%mm7, %%mm7 \n\t"\ | |||||
| "movq %5, %%mm4 \n\t"\ | |||||
| "movq %6, %%mm5 \n\t"\ | |||||
| "1: \n\t"\ | |||||
| "movd -1(%0), %%mm1 \n\t"\ | |||||
| "movd (%0), %%mm2 \n\t"\ | |||||
| "movd 1(%0), %%mm3 \n\t"\ | |||||
| "movd 2(%0), %%mm0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||||
| "paddw %%mm0, %%mm1 \n\t"\ | |||||
| "paddw %%mm3, %%mm2 \n\t"\ | |||||
| "movd -2(%0), %%mm0 \n\t"\ | |||||
| "movd 3(%0), %%mm3 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||||
| "paddw %%mm3, %%mm0 \n\t"\ | |||||
| "psllw $2, %%mm2 \n\t"\ | |||||
| "psubw %%mm1, %%mm2 \n\t"\ | |||||
| "pmullw %%mm4, %%mm2 \n\t"\ | |||||
| "paddw %%mm5, %%mm0 \n\t"\ | |||||
| "paddw %%mm2, %%mm0 \n\t"\ | |||||
| "psraw $5, %%mm0 \n\t"\ | |||||
| "packuswb %%mm0, %%mm0 \n\t"\ | |||||
| "pxor %%mm7, %%mm7 \n\t"\ | |||||
| "movq %5, %%mm4 \n\t"\ | |||||
| "movq %6, %%mm5 \n\t"\ | |||||
| "1: \n\t"\ | |||||
| "movd -1(%0), %%mm1 \n\t"\ | |||||
| "movd (%0), %%mm2 \n\t"\ | |||||
| "movd 1(%0), %%mm3 \n\t"\ | |||||
| "movd 2(%0), %%mm0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||||
| "paddw %%mm0, %%mm1 \n\t"\ | |||||
| "paddw %%mm3, %%mm2 \n\t"\ | |||||
| "movd -2(%0), %%mm0 \n\t"\ | |||||
| "movd 3(%0), %%mm3 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||||
| "paddw %%mm3, %%mm0 \n\t"\ | |||||
| "psllw $2, %%mm2 \n\t"\ | |||||
| "psubw %%mm1, %%mm2 \n\t"\ | |||||
| "pmullw %%mm4, %%mm2 \n\t"\ | |||||
| "paddw %%mm5, %%mm0 \n\t"\ | |||||
| "paddw %%mm2, %%mm0 \n\t"\ | |||||
| "psraw $5, %%mm0 \n\t"\ | |||||
| "packuswb %%mm0, %%mm0 \n\t"\ | |||||
| OP(%%mm0, (%1),%%mm6, d)\ | OP(%%mm0, (%1),%%mm6, d)\ | ||||
| "add %3, %0 \n\t"\ | |||||
| "add %4, %1 \n\t"\ | |||||
| "decl %2 \n\t"\ | |||||
| " jnz 1b \n\t"\ | |||||
| "add %3, %0 \n\t"\ | |||||
| "add %4, %1 \n\t"\ | |||||
| "decl %2 \n\t"\ | |||||
| " jnz 1b \n\t"\ | |||||
| : "+a"(src), "+c"(dst), "+m"(h)\ | : "+a"(src), "+c"(dst), "+m"(h)\ | ||||
| : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ | : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ | ||||
| : "memory"\ | : "memory"\ | ||||
| @@ -444,22 +444,22 @@ static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i | |||||
| static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||||
| src -= 2*srcStride;\ | src -= 2*srcStride;\ | ||||
| asm volatile(\ | asm volatile(\ | ||||
| "pxor %%mm7, %%mm7 \n\t"\ | |||||
| "movd (%0), %%mm0 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm1 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm2 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm3 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm4 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||||
| "pxor %%mm7, %%mm7 \n\t"\ | |||||
| "movd (%0), %%mm0 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm1 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm2 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm3 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm4 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||||
| QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ | QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ | ||||
| QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ | QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ | ||||
| QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ | QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ | ||||
| @@ -476,22 +476,22 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, | |||||
| src -= 2*srcStride+2;\ | src -= 2*srcStride+2;\ | ||||
| while(w--){\ | while(w--){\ | ||||
| asm volatile(\ | asm volatile(\ | ||||
| "pxor %%mm7, %%mm7 \n\t"\ | |||||
| "movd (%0), %%mm0 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm1 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm2 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm3 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm4 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||||
| "pxor %%mm7, %%mm7 \n\t"\ | |||||
| "movd (%0), %%mm0 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm1 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm2 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm3 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm4 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||||
| QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\ | QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\ | ||||
| QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\ | QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\ | ||||
| QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\ | QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\ | ||||
| @@ -506,28 +506,28 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, | |||||
| }\ | }\ | ||||
| tmp -= 3*4;\ | tmp -= 3*4;\ | ||||
| asm volatile(\ | asm volatile(\ | ||||
| "movq %4, %%mm6 \n\t"\ | |||||
| "1: \n\t"\ | |||||
| "movq (%0), %%mm0 \n\t"\ | |||||
| "paddw 10(%0), %%mm0 \n\t"\ | |||||
| "movq 2(%0), %%mm1 \n\t"\ | |||||
| "paddw 8(%0), %%mm1 \n\t"\ | |||||
| "movq 4(%0), %%mm2 \n\t"\ | |||||
| "paddw 6(%0), %%mm2 \n\t"\ | |||||
| "psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\ | |||||
| "psraw $2, %%mm0 \n\t"/*(a-b)/4 */\ | |||||
| "psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\ | |||||
| "paddsw %%mm2, %%mm0 \n\t"\ | |||||
| "psraw $2, %%mm0 \n\t"/*((a-b)/4-b)/4 */\ | |||||
| "paddw %%mm6, %%mm2 \n\t"\ | |||||
| "paddw %%mm2, %%mm0 \n\t"\ | |||||
| "psraw $6, %%mm0 \n\t"\ | |||||
| "packuswb %%mm0, %%mm0 \n\t"\ | |||||
| "movq %4, %%mm6 \n\t"\ | |||||
| "1: \n\t"\ | |||||
| "movq (%0), %%mm0 \n\t"\ | |||||
| "paddw 10(%0), %%mm0 \n\t"\ | |||||
| "movq 2(%0), %%mm1 \n\t"\ | |||||
| "paddw 8(%0), %%mm1 \n\t"\ | |||||
| "movq 4(%0), %%mm2 \n\t"\ | |||||
| "paddw 6(%0), %%mm2 \n\t"\ | |||||
| "psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\ | |||||
| "psraw $2, %%mm0 \n\t"/*(a-b)/4 */\ | |||||
| "psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\ | |||||
| "paddsw %%mm2, %%mm0 \n\t"\ | |||||
| "psraw $2, %%mm0 \n\t"/*((a-b)/4-b)/4 */\ | |||||
| "paddw %%mm6, %%mm2 \n\t"\ | |||||
| "paddw %%mm2, %%mm0 \n\t"\ | |||||
| "psraw $6, %%mm0 \n\t"\ | |||||
| "packuswb %%mm0, %%mm0 \n\t"\ | |||||
| OP(%%mm0, (%1),%%mm7, d)\ | OP(%%mm0, (%1),%%mm7, d)\ | ||||
| "add $24, %0 \n\t"\ | |||||
| "add %3, %1 \n\t"\ | |||||
| "decl %2 \n\t"\ | |||||
| " jnz 1b \n\t"\ | |||||
| "add $24, %0 \n\t"\ | |||||
| "add %3, %1 \n\t"\ | |||||
| "decl %2 \n\t"\ | |||||
| " jnz 1b \n\t"\ | |||||
| : "+a"(tmp), "+c"(dst), "+m"(h)\ | : "+a"(tmp), "+c"(dst), "+m"(h)\ | ||||
| : "S"((long)dstStride), "m"(ff_pw_32)\ | : "S"((long)dstStride), "m"(ff_pw_32)\ | ||||
| : "memory"\ | : "memory"\ | ||||
| @@ -537,54 +537,54 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, | |||||
| static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||||
| int h=8;\ | int h=8;\ | ||||
| asm volatile(\ | asm volatile(\ | ||||
| "pxor %%mm7, %%mm7 \n\t"\ | |||||
| "movq %5, %%mm6 \n\t"\ | |||||
| "1: \n\t"\ | |||||
| "movq (%0), %%mm0 \n\t"\ | |||||
| "movq 1(%0), %%mm2 \n\t"\ | |||||
| "movq %%mm0, %%mm1 \n\t"\ | |||||
| "movq %%mm2, %%mm3 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||||
| "punpckhbw %%mm7, %%mm1 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||||
| "punpckhbw %%mm7, %%mm3 \n\t"\ | |||||
| "paddw %%mm2, %%mm0 \n\t"\ | |||||
| "paddw %%mm3, %%mm1 \n\t"\ | |||||
| "psllw $2, %%mm0 \n\t"\ | |||||
| "psllw $2, %%mm1 \n\t"\ | |||||
| "movq -1(%0), %%mm2 \n\t"\ | |||||
| "movq 2(%0), %%mm4 \n\t"\ | |||||
| "movq %%mm2, %%mm3 \n\t"\ | |||||
| "movq %%mm4, %%mm5 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||||
| "punpckhbw %%mm7, %%mm3 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||||
| "punpckhbw %%mm7, %%mm5 \n\t"\ | |||||
| "paddw %%mm4, %%mm2 \n\t"\ | |||||
| "paddw %%mm3, %%mm5 \n\t"\ | |||||
| "psubw %%mm2, %%mm0 \n\t"\ | |||||
| "psubw %%mm5, %%mm1 \n\t"\ | |||||
| "pmullw %%mm6, %%mm0 \n\t"\ | |||||
| "pmullw %%mm6, %%mm1 \n\t"\ | |||||
| "movd -2(%0), %%mm2 \n\t"\ | |||||
| "movd 7(%0), %%mm5 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm5 \n\t"\ | |||||
| "paddw %%mm3, %%mm2 \n\t"\ | |||||
| "paddw %%mm5, %%mm4 \n\t"\ | |||||
| "movq %6, %%mm5 \n\t"\ | |||||
| "paddw %%mm5, %%mm2 \n\t"\ | |||||
| "paddw %%mm5, %%mm4 \n\t"\ | |||||
| "paddw %%mm2, %%mm0 \n\t"\ | |||||
| "paddw %%mm4, %%mm1 \n\t"\ | |||||
| "psraw $5, %%mm0 \n\t"\ | |||||
| "psraw $5, %%mm1 \n\t"\ | |||||
| "packuswb %%mm1, %%mm0 \n\t"\ | |||||
| "pxor %%mm7, %%mm7 \n\t"\ | |||||
| "movq %5, %%mm6 \n\t"\ | |||||
| "1: \n\t"\ | |||||
| "movq (%0), %%mm0 \n\t"\ | |||||
| "movq 1(%0), %%mm2 \n\t"\ | |||||
| "movq %%mm0, %%mm1 \n\t"\ | |||||
| "movq %%mm2, %%mm3 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||||
| "punpckhbw %%mm7, %%mm1 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||||
| "punpckhbw %%mm7, %%mm3 \n\t"\ | |||||
| "paddw %%mm2, %%mm0 \n\t"\ | |||||
| "paddw %%mm3, %%mm1 \n\t"\ | |||||
| "psllw $2, %%mm0 \n\t"\ | |||||
| "psllw $2, %%mm1 \n\t"\ | |||||
| "movq -1(%0), %%mm2 \n\t"\ | |||||
| "movq 2(%0), %%mm4 \n\t"\ | |||||
| "movq %%mm2, %%mm3 \n\t"\ | |||||
| "movq %%mm4, %%mm5 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||||
| "punpckhbw %%mm7, %%mm3 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||||
| "punpckhbw %%mm7, %%mm5 \n\t"\ | |||||
| "paddw %%mm4, %%mm2 \n\t"\ | |||||
| "paddw %%mm3, %%mm5 \n\t"\ | |||||
| "psubw %%mm2, %%mm0 \n\t"\ | |||||
| "psubw %%mm5, %%mm1 \n\t"\ | |||||
| "pmullw %%mm6, %%mm0 \n\t"\ | |||||
| "pmullw %%mm6, %%mm1 \n\t"\ | |||||
| "movd -2(%0), %%mm2 \n\t"\ | |||||
| "movd 7(%0), %%mm5 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm5 \n\t"\ | |||||
| "paddw %%mm3, %%mm2 \n\t"\ | |||||
| "paddw %%mm5, %%mm4 \n\t"\ | |||||
| "movq %6, %%mm5 \n\t"\ | |||||
| "paddw %%mm5, %%mm2 \n\t"\ | |||||
| "paddw %%mm5, %%mm4 \n\t"\ | |||||
| "paddw %%mm2, %%mm0 \n\t"\ | |||||
| "paddw %%mm4, %%mm1 \n\t"\ | |||||
| "psraw $5, %%mm0 \n\t"\ | |||||
| "psraw $5, %%mm1 \n\t"\ | |||||
| "packuswb %%mm1, %%mm0 \n\t"\ | |||||
| OP(%%mm0, (%1),%%mm5, q)\ | OP(%%mm0, (%1),%%mm5, q)\ | ||||
| "add %3, %0 \n\t"\ | |||||
| "add %4, %1 \n\t"\ | |||||
| "decl %2 \n\t"\ | |||||
| " jnz 1b \n\t"\ | |||||
| "add %3, %0 \n\t"\ | |||||
| "add %4, %1 \n\t"\ | |||||
| "decl %2 \n\t"\ | |||||
| " jnz 1b \n\t"\ | |||||
| : "+a"(src), "+c"(dst), "+m"(h)\ | : "+a"(src), "+c"(dst), "+m"(h)\ | ||||
| : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ | : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ | ||||
| : "memory"\ | : "memory"\ | ||||
| @@ -597,22 +597,22 @@ static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i | |||||
| \ | \ | ||||
| while(h--){\ | while(h--){\ | ||||
| asm volatile(\ | asm volatile(\ | ||||
| "pxor %%mm7, %%mm7 \n\t"\ | |||||
| "movd (%0), %%mm0 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm1 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm2 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm3 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm4 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||||
| "pxor %%mm7, %%mm7 \n\t"\ | |||||
| "movd (%0), %%mm0 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm1 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm2 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm3 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm4 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||||
| QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ | QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ | ||||
| QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ | QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ | ||||
| QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ | QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ | ||||
| @@ -636,22 +636,22 @@ static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, | |||||
| src -= 2*srcStride+2;\ | src -= 2*srcStride+2;\ | ||||
| while(w--){\ | while(w--){\ | ||||
| asm volatile(\ | asm volatile(\ | ||||
| "pxor %%mm7, %%mm7 \n\t"\ | |||||
| "movd (%0), %%mm0 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm1 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm2 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm3 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm4 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||||
| "pxor %%mm7, %%mm7 \n\t"\ | |||||
| "movd (%0), %%mm0 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm1 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm2 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm3 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "movd (%0), %%mm4 \n\t"\ | |||||
| "add %2, %0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||||
| QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*4)\ | QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*4)\ | ||||
| QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*4)\ | QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*4)\ | ||||
| QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*4)\ | QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*4)\ | ||||
| @@ -670,42 +670,42 @@ static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, | |||||
| }\ | }\ | ||||
| tmp -= 4*4;\ | tmp -= 4*4;\ | ||||
| asm volatile(\ | asm volatile(\ | ||||
| "movq %4, %%mm6 \n\t"\ | |||||
| "1: \n\t"\ | |||||
| "movq (%0), %%mm0 \n\t"\ | |||||
| "movq 8(%0), %%mm3 \n\t"\ | |||||
| "movq 2(%0), %%mm1 \n\t"\ | |||||
| "movq 10(%0), %%mm4 \n\t"\ | |||||
| "paddw %%mm4, %%mm0 \n\t"\ | |||||
| "paddw %%mm3, %%mm1 \n\t"\ | |||||
| "paddw 18(%0), %%mm3 \n\t"\ | |||||
| "paddw 16(%0), %%mm4 \n\t"\ | |||||
| "movq 4(%0), %%mm2 \n\t"\ | |||||
| "movq 12(%0), %%mm5 \n\t"\ | |||||
| "paddw 6(%0), %%mm2 \n\t"\ | |||||
| "paddw 14(%0), %%mm5 \n\t"\ | |||||
| "psubw %%mm1, %%mm0 \n\t"\ | |||||
| "psubw %%mm4, %%mm3 \n\t"\ | |||||
| "psraw $2, %%mm0 \n\t"\ | |||||
| "psraw $2, %%mm3 \n\t"\ | |||||
| "psubw %%mm1, %%mm0 \n\t"\ | |||||
| "psubw %%mm4, %%mm3 \n\t"\ | |||||
| "paddsw %%mm2, %%mm0 \n\t"\ | |||||
| "paddsw %%mm5, %%mm3 \n\t"\ | |||||
| "psraw $2, %%mm0 \n\t"\ | |||||
| "psraw $2, %%mm3 \n\t"\ | |||||
| "paddw %%mm6, %%mm2 \n\t"\ | |||||
| "paddw %%mm6, %%mm5 \n\t"\ | |||||
| "paddw %%mm2, %%mm0 \n\t"\ | |||||
| "paddw %%mm5, %%mm3 \n\t"\ | |||||
| "psraw $6, %%mm0 \n\t"\ | |||||
| "psraw $6, %%mm3 \n\t"\ | |||||
| "packuswb %%mm3, %%mm0 \n\t"\ | |||||
| "movq %4, %%mm6 \n\t"\ | |||||
| "1: \n\t"\ | |||||
| "movq (%0), %%mm0 \n\t"\ | |||||
| "movq 8(%0), %%mm3 \n\t"\ | |||||
| "movq 2(%0), %%mm1 \n\t"\ | |||||
| "movq 10(%0), %%mm4 \n\t"\ | |||||
| "paddw %%mm4, %%mm0 \n\t"\ | |||||
| "paddw %%mm3, %%mm1 \n\t"\ | |||||
| "paddw 18(%0), %%mm3 \n\t"\ | |||||
| "paddw 16(%0), %%mm4 \n\t"\ | |||||
| "movq 4(%0), %%mm2 \n\t"\ | |||||
| "movq 12(%0), %%mm5 \n\t"\ | |||||
| "paddw 6(%0), %%mm2 \n\t"\ | |||||
| "paddw 14(%0), %%mm5 \n\t"\ | |||||
| "psubw %%mm1, %%mm0 \n\t"\ | |||||
| "psubw %%mm4, %%mm3 \n\t"\ | |||||
| "psraw $2, %%mm0 \n\t"\ | |||||
| "psraw $2, %%mm3 \n\t"\ | |||||
| "psubw %%mm1, %%mm0 \n\t"\ | |||||
| "psubw %%mm4, %%mm3 \n\t"\ | |||||
| "paddsw %%mm2, %%mm0 \n\t"\ | |||||
| "paddsw %%mm5, %%mm3 \n\t"\ | |||||
| "psraw $2, %%mm0 \n\t"\ | |||||
| "psraw $2, %%mm3 \n\t"\ | |||||
| "paddw %%mm6, %%mm2 \n\t"\ | |||||
| "paddw %%mm6, %%mm5 \n\t"\ | |||||
| "paddw %%mm2, %%mm0 \n\t"\ | |||||
| "paddw %%mm5, %%mm3 \n\t"\ | |||||
| "psraw $6, %%mm0 \n\t"\ | |||||
| "psraw $6, %%mm3 \n\t"\ | |||||
| "packuswb %%mm3, %%mm0 \n\t"\ | |||||
| OP(%%mm0, (%1),%%mm7, q)\ | OP(%%mm0, (%1),%%mm7, q)\ | ||||
| "add $32, %0 \n\t"\ | |||||
| "add %3, %1 \n\t"\ | |||||
| "decl %2 \n\t"\ | |||||
| " jnz 1b \n\t"\ | |||||
| "add $32, %0 \n\t"\ | |||||
| "add %3, %1 \n\t"\ | |||||
| "decl %2 \n\t"\ | |||||
| " jnz 1b \n\t"\ | |||||
| : "+a"(tmp), "+c"(dst), "+m"(h)\ | : "+a"(tmp), "+c"(dst), "+m"(h)\ | ||||
| : "S"((long)dstStride), "m"(ff_pw_32)\ | : "S"((long)dstStride), "m"(ff_pw_32)\ | ||||
| : "memory"\ | : "memory"\ | ||||
| @@ -862,15 +862,15 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t * | |||||
| }\ | }\ | ||||
| #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t" | |||||
| #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t" | |||||
| #define AVG_3DNOW_OP(a,b,temp, size) \ | #define AVG_3DNOW_OP(a,b,temp, size) \ | ||||
| "mov" #size " " #b ", " #temp " \n\t"\ | |||||
| "pavgusb " #temp ", " #a " \n\t"\ | |||||
| "mov" #size " " #a ", " #b " \n\t" | |||||
| "mov" #size " " #b ", " #temp " \n\t"\ | |||||
| "pavgusb " #temp ", " #a " \n\t"\ | |||||
| "mov" #size " " #a ", " #b " \n\t" | |||||
| #define AVG_MMX2_OP(a,b,temp, size) \ | #define AVG_MMX2_OP(a,b,temp, size) \ | ||||
| "mov" #size " " #b ", " #temp " \n\t"\ | |||||
| "pavgb " #temp ", " #a " \n\t"\ | |||||
| "mov" #size " " #a ", " #b " \n\t" | |||||
| "mov" #size " " #b ", " #temp " \n\t"\ | |||||
| "pavgb " #temp ", " #a " \n\t"\ | |||||
| "mov" #size " " #a ", " #b " \n\t" | |||||
| QPEL_H264(put_, PUT_OP, 3dnow) | QPEL_H264(put_, PUT_OP, 3dnow) | ||||
| QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow) | QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow) | ||||
| @@ -38,7 +38,7 @@ | |||||
| #if 0 | #if 0 | ||||
| /* C row IDCT - its just here to document the MMXEXT and MMX versions */ | /* C row IDCT - its just here to document the MMXEXT and MMX versions */ | ||||
| static inline void idct_row (int16_t * row, int offset, | static inline void idct_row (int16_t * row, int offset, | ||||
| int16_t * table, int32_t * rounder) | |||||
| int16_t * table, int32_t * rounder) | |||||
| { | { | ||||
| int C1, C2, C3, C4, C5, C6, C7; | int C1, C2, C3, C4, C5, C6, C7; | ||||
| int a0, a1, a2, a3, b0, b1, b2, b3; | int a0, a1, a2, a3, b0, b1, b2, b3; | ||||
| @@ -77,241 +77,241 @@ static inline void idct_row (int16_t * row, int offset, | |||||
| /* MMXEXT row IDCT */ | /* MMXEXT row IDCT */ | ||||
| #define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \ | |||||
| c4, c6, c4, c6, \ | |||||
| c1, c3, -c1, -c5, \ | |||||
| c5, c7, c3, -c7, \ | |||||
| c4, -c6, c4, -c6, \ | |||||
| -c4, c2, c4, -c2, \ | |||||
| c5, -c1, c3, -c1, \ | |||||
| c7, c3, c7, -c5 } | |||||
| #define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \ | |||||
| c4, c6, c4, c6, \ | |||||
| c1, c3, -c1, -c5, \ | |||||
| c5, c7, c3, -c7, \ | |||||
| c4, -c6, c4, -c6, \ | |||||
| -c4, c2, c4, -c2, \ | |||||
| c5, -c1, c3, -c1, \ | |||||
| c7, c3, c7, -c5 } | |||||
| static inline void mmxext_row_head (int16_t * row, int offset, const int16_t * table) | static inline void mmxext_row_head (int16_t * row, int offset, const int16_t * table) | ||||
| { | { | ||||
| movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 | |||||
| movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 | |||||
| movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 | |||||
| movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 | |||||
| movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 | |||||
| movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 | |||||
| movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 | |||||
| movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 | |||||
| movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 | |||||
| movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 | |||||
| movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 | |||||
| pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 | |||||
| movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 | |||||
| pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 | |||||
| pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 | |||||
| pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 | |||||
| } | } | ||||
| static inline void mmxext_row (const int16_t * table, const int32_t * rounder) | static inline void mmxext_row (const int16_t * table, const int32_t * rounder) | ||||
| { | { | ||||
| movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1 | |||||
| pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6 | |||||
| movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1 | |||||
| pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6 | |||||
| pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2 | |||||
| pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5 | |||||
| pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2 | |||||
| pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5 | |||||
| movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5 | |||||
| pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 | |||||
| movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5 | |||||
| pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 | |||||
| paddd_m2r (*rounder, mm3); // mm3 += rounder | |||||
| pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7 | |||||
| paddd_m2r (*rounder, mm3); // mm3 += rounder | |||||
| pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7 | |||||
| pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 | |||||
| paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder | |||||
| pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 | |||||
| paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder | |||||
| pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3 | |||||
| movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder | |||||
| pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3 | |||||
| movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder | |||||
| pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7 | |||||
| paddd_r2r (mm7, mm1); // mm1 = b1 b0 | |||||
| pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7 | |||||
| paddd_r2r (mm7, mm1); // mm1 = b1 b0 | |||||
| paddd_m2r (*rounder, mm0); // mm0 += rounder | |||||
| psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder | |||||
| paddd_m2r (*rounder, mm0); // mm0 += rounder | |||||
| psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder | |||||
| psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 | |||||
| paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder | |||||
| psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 | |||||
| paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder | |||||
| paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder | |||||
| psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 | |||||
| paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder | |||||
| psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 | |||||
| paddd_r2r (mm6, mm5); // mm5 = b3 b2 | |||||
| movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder | |||||
| paddd_r2r (mm6, mm5); // mm5 = b3 b2 | |||||
| movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder | |||||
| paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder | |||||
| psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder | |||||
| paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder | |||||
| psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder | |||||
| } | } | ||||
| static inline void mmxext_row_tail (int16_t * row, int store) | static inline void mmxext_row_tail (int16_t * row, int store) | ||||
| { | { | ||||
| psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 | |||||
| psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 | |||||
| psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 | |||||
| psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 | |||||
| packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 | |||||
| packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 | |||||
| packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 | |||||
| packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 | |||||
| movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 | |||||
| pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 | |||||
| movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 | |||||
| pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 | |||||
| /* slot */ | /* slot */ | ||||
| movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 | |||||
| movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 | |||||
| } | } | ||||
| static inline void mmxext_row_mid (int16_t * row, int store, | static inline void mmxext_row_mid (int16_t * row, int store, | ||||
| int offset, const int16_t * table) | |||||
| int offset, const int16_t * table) | |||||
| { | { | ||||
| movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 | |||||
| psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 | |||||
| movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 | |||||
| psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 | |||||
| movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 | |||||
| psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 | |||||
| movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 | |||||
| psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 | |||||
| packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 | |||||
| movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 | |||||
| packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 | |||||
| movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 | |||||
| packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 | |||||
| movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 | |||||
| packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 | |||||
| movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 | |||||
| movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 | |||||
| pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 | |||||
| movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 | |||||
| pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 | |||||
| movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 | |||||
| movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 | |||||
| movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 | |||||
| movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 | |||||
| pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 | |||||
| pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 | |||||
| movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 | |||||
| pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 | |||||
| movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 | |||||
| pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 | |||||
| } | } | ||||
| /* MMX row IDCT */ | /* MMX row IDCT */ | ||||
| #define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \ | |||||
| c4, c6, -c4, -c2, \ | |||||
| c1, c3, c3, -c7, \ | |||||
| c5, c7, -c1, -c5, \ | |||||
| c4, -c6, c4, -c2, \ | |||||
| -c4, c2, c4, -c6, \ | |||||
| c5, -c1, c7, -c5, \ | |||||
| c7, c3, c3, -c1 } | |||||
| #define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \ | |||||
| c4, c6, -c4, -c2, \ | |||||
| c1, c3, c3, -c7, \ | |||||
| c5, c7, -c1, -c5, \ | |||||
| c4, -c6, c4, -c2, \ | |||||
| -c4, c2, c4, -c6, \ | |||||
| c5, -c1, c7, -c5, \ | |||||
| c7, c3, c3, -c1 } | |||||
| static inline void mmx_row_head (int16_t * row, int offset, const int16_t * table) | static inline void mmx_row_head (int16_t * row, int offset, const int16_t * table) | ||||
| { | { | ||||
| movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 | |||||
| movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 | |||||
| movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 | |||||
| movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 | |||||
| movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 | |||||
| movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 | |||||
| movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 | |||||
| movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 | |||||
| movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 | |||||
| movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 | |||||
| punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 | |||||
| punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 | |||||
| movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 | |||||
| pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 | |||||
| movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 | |||||
| pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 | |||||
| movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 | |||||
| punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 | |||||
| movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 | |||||
| punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 | |||||
| } | } | ||||
| static inline void mmx_row (const int16_t * table, const int32_t * rounder) | static inline void mmx_row (const int16_t * table, const int32_t * rounder) | ||||
| { | { | ||||
| pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 | |||||
| punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1 | |||||
| pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 | |||||
| punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1 | |||||
| pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2 | |||||
| punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5 | |||||
| pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2 | |||||
| punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5 | |||||
| movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5 | |||||
| pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3 | |||||
| movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5 | |||||
| pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3 | |||||
| paddd_m2r (*rounder, mm3); // mm3 += rounder | |||||
| pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 | |||||
| paddd_m2r (*rounder, mm3); // mm3 += rounder | |||||
| pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 | |||||
| pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 | |||||
| paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder | |||||
| pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 | |||||
| paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder | |||||
| pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3 | |||||
| movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder | |||||
| pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3 | |||||
| movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder | |||||
| pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7 | |||||
| paddd_r2r (mm7, mm1); // mm1 = b1 b0 | |||||
| pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7 | |||||
| paddd_r2r (mm7, mm1); // mm1 = b1 b0 | |||||
| paddd_m2r (*rounder, mm0); // mm0 += rounder | |||||
| psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder | |||||
| paddd_m2r (*rounder, mm0); // mm0 += rounder | |||||
| psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder | |||||
| psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 | |||||
| paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder | |||||
| psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 | |||||
| paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder | |||||
| paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder | |||||
| psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 | |||||
| paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder | |||||
| psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 | |||||
| paddd_r2r (mm6, mm5); // mm5 = b3 b2 | |||||
| movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder | |||||
| paddd_r2r (mm6, mm5); // mm5 = b3 b2 | |||||
| movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder | |||||
| paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder | |||||
| psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder | |||||
| paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder | |||||
| psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder | |||||
| } | } | ||||
| static inline void mmx_row_tail (int16_t * row, int store) | static inline void mmx_row_tail (int16_t * row, int store) | ||||
| { | { | ||||
| psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 | |||||
| psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 | |||||
| psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 | |||||
| psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 | |||||
| packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 | |||||
| packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 | |||||
| packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 | |||||
| packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 | |||||
| movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 | |||||
| movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5 | |||||
| movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 | |||||
| movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5 | |||||
| pslld_i2r (16, mm7); // mm7 = y7 0 y5 0 | |||||
| pslld_i2r (16, mm7); // mm7 = y7 0 y5 0 | |||||
| psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4 | |||||
| psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4 | |||||
| por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4 | |||||
| por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4 | |||||
| /* slot */ | /* slot */ | ||||
| movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 | |||||
| movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 | |||||
| } | } | ||||
| static inline void mmx_row_mid (int16_t * row, int store, | static inline void mmx_row_mid (int16_t * row, int store, | ||||
| int offset, const int16_t * table) | |||||
| int offset, const int16_t * table) | |||||
| { | { | ||||
| movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 | |||||
| psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 | |||||
| movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 | |||||
| psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 | |||||
| movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 | |||||
| psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 | |||||
| movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 | |||||
| psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 | |||||
| packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 | |||||
| movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 | |||||
| packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 | |||||
| movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 | |||||
| packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 | |||||
| movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 | |||||
| packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 | |||||
| movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 | |||||
| movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 | |||||
| movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5 | |||||
| movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 | |||||
| movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5 | |||||
| punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 | |||||
| psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4 | |||||
| punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 | |||||
| psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4 | |||||
| movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 | |||||
| pslld_i2r (16, mm1); // mm1 = y7 0 y5 0 | |||||
| movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 | |||||
| pslld_i2r (16, mm1); // mm1 = y7 0 y5 0 | |||||
| movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 | |||||
| por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4 | |||||
| movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 | |||||
| por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4 | |||||
| movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 | |||||
| punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 | |||||
| movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 | |||||
| punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 | |||||
| movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 | |||||
| pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 | |||||
| movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 | |||||
| pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 | |||||
| } | } | ||||
| @@ -403,132 +403,132 @@ static inline void idct_col (int16_t * col, int offset) | |||||
| /* column code adapted from peter gubanov */ | /* column code adapted from peter gubanov */ | ||||
| /* http://www.elecard.com/peter/idct.shtml */ | /* http://www.elecard.com/peter/idct.shtml */ | ||||
| movq_m2r (*_T1, mm0); // mm0 = T1 | |||||
| movq_m2r (*_T1, mm0); // mm0 = T1 | |||||
| movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1 | |||||
| movq_r2r (mm0, mm2); // mm2 = T1 | |||||
| movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1 | |||||
| movq_r2r (mm0, mm2); // mm2 = T1 | |||||
| movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7 | |||||
| pmulhw_r2r (mm1, mm0); // mm0 = T1*x1 | |||||
| movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7 | |||||
| pmulhw_r2r (mm1, mm0); // mm0 = T1*x1 | |||||
| movq_m2r (*_T3, mm5); // mm5 = T3 | |||||
| pmulhw_r2r (mm4, mm2); // mm2 = T1*x7 | |||||
| movq_m2r (*_T3, mm5); // mm5 = T3 | |||||
| pmulhw_r2r (mm4, mm2); // mm2 = T1*x7 | |||||
| movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5 | |||||
| movq_r2r (mm5, mm7); // mm7 = T3-1 | |||||
| movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5 | |||||
| movq_r2r (mm5, mm7); // mm7 = T3-1 | |||||
| movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3 | |||||
| psubsw_r2r (mm4, mm0); // mm0 = v17 | |||||
| movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3 | |||||
| psubsw_r2r (mm4, mm0); // mm0 = v17 | |||||
| movq_m2r (*_T2, mm4); // mm4 = T2 | |||||
| pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3 | |||||
| movq_m2r (*_T2, mm4); // mm4 = T2 | |||||
| pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3 | |||||
| paddsw_r2r (mm2, mm1); // mm1 = u17 | |||||
| pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5 | |||||
| paddsw_r2r (mm2, mm1); // mm1 = u17 | |||||
| pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5 | |||||
| /* slot */ | /* slot */ | ||||
| movq_r2r (mm4, mm2); // mm2 = T2 | |||||
| paddsw_r2r (mm3, mm5); // mm5 = T3*x3 | |||||
| movq_r2r (mm4, mm2); // mm2 = T2 | |||||
| paddsw_r2r (mm3, mm5); // mm5 = T3*x3 | |||||
| pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2 | pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2 | ||||
| paddsw_r2r (mm6, mm7); // mm7 = T3*x5 | |||||
| paddsw_r2r (mm6, mm7); // mm7 = T3*x5 | |||||
| psubsw_r2r (mm6, mm5); // mm5 = v35 | |||||
| paddsw_r2r (mm3, mm7); // mm7 = u35 | |||||
| psubsw_r2r (mm6, mm5); // mm5 = v35 | |||||
| paddsw_r2r (mm3, mm7); // mm7 = u35 | |||||
| movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6 | |||||
| movq_r2r (mm0, mm6); // mm6 = v17 | |||||
| movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6 | |||||
| movq_r2r (mm0, mm6); // mm6 = v17 | |||||
| pmulhw_r2r (mm3, mm2); // mm2 = T2*x6 | |||||
| psubsw_r2r (mm5, mm0); // mm0 = b3 | |||||
| pmulhw_r2r (mm3, mm2); // mm2 = T2*x6 | |||||
| psubsw_r2r (mm5, mm0); // mm0 = b3 | |||||
| psubsw_r2r (mm3, mm4); // mm4 = v26 | |||||
| paddsw_r2r (mm6, mm5); // mm5 = v12 | |||||
| psubsw_r2r (mm3, mm4); // mm4 = v26 | |||||
| paddsw_r2r (mm6, mm5); // mm5 = v12 | |||||
| movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0 | |||||
| movq_r2r (mm1, mm6); // mm6 = u17 | |||||
| movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0 | |||||
| movq_r2r (mm1, mm6); // mm6 = u17 | |||||
| paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26 | paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26 | ||||
| paddsw_r2r (mm7, mm6); // mm6 = b0 | |||||
| paddsw_r2r (mm7, mm6); // mm6 = b0 | |||||
| psubsw_r2r (mm7, mm1); // mm1 = u12 | |||||
| movq_r2r (mm1, mm7); // mm7 = u12 | |||||
| psubsw_r2r (mm7, mm1); // mm1 = u12 | |||||
| movq_r2r (mm1, mm7); // mm7 = u12 | |||||
| movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0 | |||||
| paddsw_r2r (mm5, mm1); // mm1 = u12+v12 | |||||
| movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0 | |||||
| paddsw_r2r (mm5, mm1); // mm1 = u12+v12 | |||||
| movq_m2r (*_C4, mm0); // mm0 = C4/2 | |||||
| psubsw_r2r (mm5, mm7); // mm7 = u12-v12 | |||||
| movq_m2r (*_C4, mm0); // mm0 = C4/2 | |||||
| psubsw_r2r (mm5, mm7); // mm7 = u12-v12 | |||||
| movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1 | |||||
| pmulhw_r2r (mm0, mm1); // mm1 = b1/2 | |||||
| movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1 | |||||
| pmulhw_r2r (mm0, mm1); // mm1 = b1/2 | |||||
| movq_r2r (mm4, mm6); // mm6 = v26 | |||||
| pmulhw_r2r (mm0, mm7); // mm7 = b2/2 | |||||
| movq_r2r (mm4, mm6); // mm6 = v26 | |||||
| pmulhw_r2r (mm0, mm7); // mm7 = b2/2 | |||||
| movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4 | |||||
| movq_r2r (mm3, mm0); // mm0 = x0 | |||||
| movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4 | |||||
| movq_r2r (mm3, mm0); // mm0 = x0 | |||||
| psubsw_r2r (mm5, mm3); // mm3 = v04 | |||||
| paddsw_r2r (mm5, mm0); // mm0 = u04 | |||||
| psubsw_r2r (mm5, mm3); // mm3 = v04 | |||||
| paddsw_r2r (mm5, mm0); // mm0 = u04 | |||||
| paddsw_r2r (mm3, mm4); // mm4 = a1 | |||||
| movq_r2r (mm0, mm5); // mm5 = u04 | |||||
| paddsw_r2r (mm3, mm4); // mm4 = a1 | |||||
| movq_r2r (mm0, mm5); // mm5 = u04 | |||||
| psubsw_r2r (mm6, mm3); // mm3 = a2 | |||||
| paddsw_r2r (mm2, mm5); // mm5 = a0 | |||||
| psubsw_r2r (mm6, mm3); // mm3 = a2 | |||||
| paddsw_r2r (mm2, mm5); // mm5 = a0 | |||||
| paddsw_r2r (mm1, mm1); // mm1 = b1 | |||||
| psubsw_r2r (mm2, mm0); // mm0 = a3 | |||||
| paddsw_r2r (mm1, mm1); // mm1 = b1 | |||||
| psubsw_r2r (mm2, mm0); // mm0 = a3 | |||||
| paddsw_r2r (mm7, mm7); // mm7 = b2 | |||||
| movq_r2r (mm3, mm2); // mm2 = a2 | |||||
| paddsw_r2r (mm7, mm7); // mm7 = b2 | |||||
| movq_r2r (mm3, mm2); // mm2 = a2 | |||||
| movq_r2r (mm4, mm6); // mm6 = a1 | |||||
| paddsw_r2r (mm7, mm3); // mm3 = a2+b2 | |||||
| movq_r2r (mm4, mm6); // mm6 = a1 | |||||
| paddsw_r2r (mm7, mm3); // mm3 = a2+b2 | |||||
| psraw_i2r (COL_SHIFT, mm3); // mm3 = y2 | |||||
| paddsw_r2r (mm1, mm4); // mm4 = a1+b1 | |||||
| psraw_i2r (COL_SHIFT, mm3); // mm3 = y2 | |||||
| paddsw_r2r (mm1, mm4); // mm4 = a1+b1 | |||||
| psraw_i2r (COL_SHIFT, mm4); // mm4 = y1 | |||||
| psubsw_r2r (mm1, mm6); // mm6 = a1-b1 | |||||
| psraw_i2r (COL_SHIFT, mm4); // mm4 = y1 | |||||
| psubsw_r2r (mm1, mm6); // mm6 = a1-b1 | |||||
| movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0 | |||||
| psubsw_r2r (mm7, mm2); // mm2 = a2-b2 | |||||
| movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0 | |||||
| psubsw_r2r (mm7, mm2); // mm2 = a2-b2 | |||||
| psraw_i2r (COL_SHIFT, mm6); // mm6 = y6 | |||||
| movq_r2r (mm5, mm7); // mm7 = a0 | |||||
| psraw_i2r (COL_SHIFT, mm6); // mm6 = y6 | |||||
| movq_r2r (mm5, mm7); // mm7 = a0 | |||||
| movq_r2m (mm4, *(col+offset+1*8)); // save y1 | |||||
| psraw_i2r (COL_SHIFT, mm2); // mm2 = y5 | |||||
| movq_r2m (mm4, *(col+offset+1*8)); // save y1 | |||||
| psraw_i2r (COL_SHIFT, mm2); // mm2 = y5 | |||||
| movq_r2m (mm3, *(col+offset+2*8)); // save y2 | |||||
| paddsw_r2r (mm1, mm5); // mm5 = a0+b0 | |||||
| movq_r2m (mm3, *(col+offset+2*8)); // save y2 | |||||
| paddsw_r2r (mm1, mm5); // mm5 = a0+b0 | |||||
| movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3 | |||||
| psubsw_r2r (mm1, mm7); // mm7 = a0-b0 | |||||
| movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3 | |||||
| psubsw_r2r (mm1, mm7); // mm7 = a0-b0 | |||||
| psraw_i2r (COL_SHIFT, mm5); // mm5 = y0 | |||||
| movq_r2r (mm0, mm3); // mm3 = a3 | |||||
| psraw_i2r (COL_SHIFT, mm5); // mm5 = y0 | |||||
| movq_r2r (mm0, mm3); // mm3 = a3 | |||||
| movq_r2m (mm2, *(col+offset+5*8)); // save y5 | |||||
| psubsw_r2r (mm4, mm3); // mm3 = a3-b3 | |||||
| movq_r2m (mm2, *(col+offset+5*8)); // save y5 | |||||
| psubsw_r2r (mm4, mm3); // mm3 = a3-b3 | |||||
| psraw_i2r (COL_SHIFT, mm7); // mm7 = y7 | |||||
| paddsw_r2r (mm0, mm4); // mm4 = a3+b3 | |||||
| psraw_i2r (COL_SHIFT, mm7); // mm7 = y7 | |||||
| paddsw_r2r (mm0, mm4); // mm4 = a3+b3 | |||||
| movq_r2m (mm5, *(col+offset+0*8)); // save y0 | |||||
| psraw_i2r (COL_SHIFT, mm3); // mm3 = y4 | |||||
| movq_r2m (mm5, *(col+offset+0*8)); // save y0 | |||||
| psraw_i2r (COL_SHIFT, mm3); // mm3 = y4 | |||||
| movq_r2m (mm6, *(col+offset+6*8)); // save y6 | |||||
| psraw_i2r (COL_SHIFT, mm4); // mm4 = y3 | |||||
| movq_r2m (mm6, *(col+offset+6*8)); // save y6 | |||||
| psraw_i2r (COL_SHIFT, mm4); // mm4 = y3 | |||||
| movq_r2m (mm7, *(col+offset+7*8)); // save y7 | |||||
| movq_r2m (mm7, *(col+offset+7*8)); // save y7 | |||||
| movq_r2m (mm3, *(col+offset+4*8)); // save y4 | |||||
| movq_r2m (mm3, *(col+offset+4*8)); // save y4 | |||||
| movq_r2m (mm4, *(col+offset+3*8)); // save y3 | |||||
| movq_r2m (mm4, *(col+offset+3*8)); // save y3 | |||||
| #undef T1 | #undef T1 | ||||
| #undef T2 | #undef T2 | ||||
| @@ -540,61 +540,61 @@ static const int32_t rounder0[] ATTR_ALIGN(8) = | |||||
| rounder ((1 << (COL_SHIFT - 1)) - 0.5); | rounder ((1 << (COL_SHIFT - 1)) - 0.5); | ||||
| static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); | static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); | ||||
| static const int32_t rounder1[] ATTR_ALIGN(8) = | static const int32_t rounder1[] ATTR_ALIGN(8) = | ||||
| rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ | |||||
| rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ | |||||
| static const int32_t rounder7[] ATTR_ALIGN(8) = | static const int32_t rounder7[] ATTR_ALIGN(8) = | ||||
| rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ | |||||
| rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ | |||||
| static const int32_t rounder2[] ATTR_ALIGN(8) = | static const int32_t rounder2[] ATTR_ALIGN(8) = | ||||
| rounder (0.60355339059); /* C2 * (C6+C2)/2 */ | |||||
| rounder (0.60355339059); /* C2 * (C6+C2)/2 */ | |||||
| static const int32_t rounder6[] ATTR_ALIGN(8) = | static const int32_t rounder6[] ATTR_ALIGN(8) = | ||||
| rounder (-0.25); /* C2 * (C6-C2)/2 */ | |||||
| rounder (-0.25); /* C2 * (C6-C2)/2 */ | |||||
| static const int32_t rounder3[] ATTR_ALIGN(8) = | static const int32_t rounder3[] ATTR_ALIGN(8) = | ||||
| rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ | |||||
| rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ | |||||
| static const int32_t rounder5[] ATTR_ALIGN(8) = | static const int32_t rounder5[] ATTR_ALIGN(8) = | ||||
| rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ | |||||
| rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ | |||||
| #undef COL_SHIFT | #undef COL_SHIFT | ||||
| #undef ROW_SHIFT | #undef ROW_SHIFT | ||||
| #define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ | |||||
| void idct (int16_t * block) \ | |||||
| { \ | |||||
| static const int16_t table04[] ATTR_ALIGN(16) = \ | |||||
| table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ | |||||
| static const int16_t table17[] ATTR_ALIGN(16) = \ | |||||
| table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ | |||||
| static const int16_t table26[] ATTR_ALIGN(16) = \ | |||||
| table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ | |||||
| static const int16_t table35[] ATTR_ALIGN(16) = \ | |||||
| table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ | |||||
| \ | |||||
| idct_row_head (block, 0*8, table04); \ | |||||
| idct_row (table04, rounder0); \ | |||||
| idct_row_mid (block, 0*8, 4*8, table04); \ | |||||
| idct_row (table04, rounder4); \ | |||||
| idct_row_mid (block, 4*8, 1*8, table17); \ | |||||
| idct_row (table17, rounder1); \ | |||||
| idct_row_mid (block, 1*8, 7*8, table17); \ | |||||
| idct_row (table17, rounder7); \ | |||||
| idct_row_mid (block, 7*8, 2*8, table26); \ | |||||
| idct_row (table26, rounder2); \ | |||||
| idct_row_mid (block, 2*8, 6*8, table26); \ | |||||
| idct_row (table26, rounder6); \ | |||||
| idct_row_mid (block, 6*8, 3*8, table35); \ | |||||
| idct_row (table35, rounder3); \ | |||||
| idct_row_mid (block, 3*8, 5*8, table35); \ | |||||
| idct_row (table35, rounder5); \ | |||||
| idct_row_tail (block, 5*8); \ | |||||
| \ | |||||
| idct_col (block, 0); \ | |||||
| idct_col (block, 4); \ | |||||
| #define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ | |||||
| void idct (int16_t * block) \ | |||||
| { \ | |||||
| static const int16_t table04[] ATTR_ALIGN(16) = \ | |||||
| table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ | |||||
| static const int16_t table17[] ATTR_ALIGN(16) = \ | |||||
| table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ | |||||
| static const int16_t table26[] ATTR_ALIGN(16) = \ | |||||
| table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ | |||||
| static const int16_t table35[] ATTR_ALIGN(16) = \ | |||||
| table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ | |||||
| \ | |||||
| idct_row_head (block, 0*8, table04); \ | |||||
| idct_row (table04, rounder0); \ | |||||
| idct_row_mid (block, 0*8, 4*8, table04); \ | |||||
| idct_row (table04, rounder4); \ | |||||
| idct_row_mid (block, 4*8, 1*8, table17); \ | |||||
| idct_row (table17, rounder1); \ | |||||
| idct_row_mid (block, 1*8, 7*8, table17); \ | |||||
| idct_row (table17, rounder7); \ | |||||
| idct_row_mid (block, 7*8, 2*8, table26); \ | |||||
| idct_row (table26, rounder2); \ | |||||
| idct_row_mid (block, 2*8, 6*8, table26); \ | |||||
| idct_row (table26, rounder6); \ | |||||
| idct_row_mid (block, 6*8, 3*8, table35); \ | |||||
| idct_row (table35, rounder3); \ | |||||
| idct_row_mid (block, 3*8, 5*8, table35); \ | |||||
| idct_row (table35, rounder5); \ | |||||
| idct_row_tail (block, 5*8); \ | |||||
| \ | |||||
| idct_col (block, 0); \ | |||||
| idct_col (block, 4); \ | |||||
| } | } | ||||
| void ff_mmx_idct(DCTELEM *block); | void ff_mmx_idct(DCTELEM *block); | ||||
| void ff_mmxext_idct(DCTELEM *block); | void ff_mmxext_idct(DCTELEM *block); | ||||
| declare_idct (ff_mmxext_idct, mmxext_table, | declare_idct (ff_mmxext_idct, mmxext_table, | ||||
| mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid) | |||||
| mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid) | |||||
| declare_idct (ff_mmx_idct, mmx_table, | declare_idct (ff_mmx_idct, mmx_table, | ||||
| mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid) | |||||
| mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid) | |||||
| @@ -27,257 +27,257 @@ | |||||
| * values by ULL, lest they be truncated by the compiler) | * values by ULL, lest they be truncated by the compiler) | ||||
| */ | */ | ||||
| typedef union { | |||||
| long long q; /* Quadword (64-bit) value */ | |||||
| unsigned long long uq; /* Unsigned Quadword */ | |||||
| int d[2]; /* 2 Doubleword (32-bit) values */ | |||||
| unsigned int ud[2]; /* 2 Unsigned Doubleword */ | |||||
| short w[4]; /* 4 Word (16-bit) values */ | |||||
| unsigned short uw[4]; /* 4 Unsigned Word */ | |||||
| char b[8]; /* 8 Byte (8-bit) values */ | |||||
| unsigned char ub[8]; /* 8 Unsigned Byte */ | |||||
| float s[2]; /* Single-precision (32-bit) value */ | |||||
| } mmx_t; /* On an 8-byte (64-bit) boundary */ | |||||
| #define mmx_i2r(op,imm,reg) \ | |||||
| __asm__ __volatile__ (#op " %0, %%" #reg \ | |||||
| : /* nothing */ \ | |||||
| : "i" (imm) ) | |||||
| #define mmx_m2r(op,mem,reg) \ | |||||
| __asm__ __volatile__ (#op " %0, %%" #reg \ | |||||
| : /* nothing */ \ | |||||
| : "m" (mem)) | |||||
| #define mmx_r2m(op,reg,mem) \ | |||||
| __asm__ __volatile__ (#op " %%" #reg ", %0" \ | |||||
| : "=m" (mem) \ | |||||
| : /* nothing */ ) | |||||
| #define mmx_r2r(op,regs,regd) \ | |||||
| __asm__ __volatile__ (#op " %" #regs ", %" #regd) | |||||
| #define emms() __asm__ __volatile__ ("emms") | |||||
| #define movd_m2r(var,reg) mmx_m2r (movd, var, reg) | |||||
| #define movd_r2m(reg,var) mmx_r2m (movd, reg, var) | |||||
| #define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd) | |||||
| #define movq_m2r(var,reg) mmx_m2r (movq, var, reg) | |||||
| #define movq_r2m(reg,var) mmx_r2m (movq, reg, var) | |||||
| #define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd) | |||||
| #define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg) | |||||
| #define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd) | |||||
| #define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg) | |||||
| #define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd) | |||||
| #define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg) | |||||
| #define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd) | |||||
| #define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg) | |||||
| #define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd) | |||||
| #define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg) | |||||
| #define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd) | |||||
| #define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg) | |||||
| #define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd) | |||||
| #define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg) | |||||
| #define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd) | |||||
| #define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg) | |||||
| #define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd) | |||||
| #define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg) | |||||
| #define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd) | |||||
| #define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg) | |||||
| #define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd) | |||||
| #define pand_m2r(var,reg) mmx_m2r (pand, var, reg) | |||||
| #define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd) | |||||
| #define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg) | |||||
| #define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd) | |||||
| #define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg) | |||||
| #define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd) | |||||
| #define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg) | |||||
| #define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd) | |||||
| #define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg) | |||||
| #define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd) | |||||
| #define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg) | |||||
| #define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd) | |||||
| #define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg) | |||||
| #define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd) | |||||
| #define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg) | |||||
| #define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd) | |||||
| #define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg) | |||||
| #define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd) | |||||
| #define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg) | |||||
| #define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd) | |||||
| #define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg) | |||||
| #define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd) | |||||
| #define por_m2r(var,reg) mmx_m2r (por, var, reg) | |||||
| #define por_r2r(regs,regd) mmx_r2r (por, regs, regd) | |||||
| #define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg) | |||||
| #define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg) | |||||
| #define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd) | |||||
| #define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg) | |||||
| #define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg) | |||||
| #define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd) | |||||
| #define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg) | |||||
| #define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg) | |||||
| #define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd) | |||||
| #define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg) | |||||
| #define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg) | |||||
| #define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd) | |||||
| #define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg) | |||||
| #define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg) | |||||
| #define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd) | |||||
| #define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg) | |||||
| #define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg) | |||||
| #define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd) | |||||
| #define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg) | |||||
| #define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg) | |||||
| #define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd) | |||||
| #define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg) | |||||
| #define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg) | |||||
| #define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd) | |||||
| #define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg) | |||||
| #define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd) | |||||
| #define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg) | |||||
| #define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd) | |||||
| #define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg) | |||||
| #define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd) | |||||
| #define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg) | |||||
| #define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd) | |||||
| #define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg) | |||||
| #define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd) | |||||
| #define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg) | |||||
| #define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd) | |||||
| #define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg) | |||||
| #define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd) | |||||
| #define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg) | |||||
| #define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd) | |||||
| #define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg) | |||||
| #define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd) | |||||
| #define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg) | |||||
| #define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd) | |||||
| #define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg) | |||||
| #define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd) | |||||
| #define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg) | |||||
| #define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd) | |||||
| #define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg) | |||||
| #define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd) | |||||
| #define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg) | |||||
| #define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd) | |||||
| typedef union { | |||||
| long long q; /* Quadword (64-bit) value */ | |||||
| unsigned long long uq; /* Unsigned Quadword */ | |||||
| int d[2]; /* 2 Doubleword (32-bit) values */ | |||||
| unsigned int ud[2]; /* 2 Unsigned Doubleword */ | |||||
| short w[4]; /* 4 Word (16-bit) values */ | |||||
| unsigned short uw[4]; /* 4 Unsigned Word */ | |||||
| char b[8]; /* 8 Byte (8-bit) values */ | |||||
| unsigned char ub[8]; /* 8 Unsigned Byte */ | |||||
| float s[2]; /* Single-precision (32-bit) value */ | |||||
| } mmx_t; /* On an 8-byte (64-bit) boundary */ | |||||
| #define mmx_i2r(op,imm,reg) \ | |||||
| __asm__ __volatile__ (#op " %0, %%" #reg \ | |||||
| : /* nothing */ \ | |||||
| : "i" (imm) ) | |||||
| #define mmx_m2r(op,mem,reg) \ | |||||
| __asm__ __volatile__ (#op " %0, %%" #reg \ | |||||
| : /* nothing */ \ | |||||
| : "m" (mem)) | |||||
| #define mmx_r2m(op,reg,mem) \ | |||||
| __asm__ __volatile__ (#op " %%" #reg ", %0" \ | |||||
| : "=m" (mem) \ | |||||
| : /* nothing */ ) | |||||
| #define mmx_r2r(op,regs,regd) \ | |||||
| __asm__ __volatile__ (#op " %" #regs ", %" #regd) | |||||
| #define emms() __asm__ __volatile__ ("emms") | |||||
| #define movd_m2r(var,reg) mmx_m2r (movd, var, reg) | |||||
| #define movd_r2m(reg,var) mmx_r2m (movd, reg, var) | |||||
| #define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd) | |||||
| #define movq_m2r(var,reg) mmx_m2r (movq, var, reg) | |||||
| #define movq_r2m(reg,var) mmx_r2m (movq, reg, var) | |||||
| #define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd) | |||||
| #define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg) | |||||
| #define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd) | |||||
| #define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg) | |||||
| #define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd) | |||||
| #define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg) | |||||
| #define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd) | |||||
| #define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg) | |||||
| #define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd) | |||||
| #define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg) | |||||
| #define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd) | |||||
| #define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg) | |||||
| #define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd) | |||||
| #define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg) | |||||
| #define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd) | |||||
| #define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg) | |||||
| #define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd) | |||||
| #define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg) | |||||
| #define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd) | |||||
| #define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg) | |||||
| #define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd) | |||||
| #define pand_m2r(var,reg) mmx_m2r (pand, var, reg) | |||||
| #define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd) | |||||
| #define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg) | |||||
| #define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd) | |||||
| #define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg) | |||||
| #define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd) | |||||
| #define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg) | |||||
| #define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd) | |||||
| #define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg) | |||||
| #define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd) | |||||
| #define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg) | |||||
| #define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd) | |||||
| #define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg) | |||||
| #define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd) | |||||
| #define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg) | |||||
| #define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd) | |||||
| #define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg) | |||||
| #define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd) | |||||
| #define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg) | |||||
| #define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd) | |||||
| #define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg) | |||||
| #define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd) | |||||
| #define por_m2r(var,reg) mmx_m2r (por, var, reg) | |||||
| #define por_r2r(regs,regd) mmx_r2r (por, regs, regd) | |||||
| #define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg) | |||||
| #define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg) | |||||
| #define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd) | |||||
| #define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg) | |||||
| #define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg) | |||||
| #define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd) | |||||
| #define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg) | |||||
| #define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg) | |||||
| #define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd) | |||||
| #define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg) | |||||
| #define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg) | |||||
| #define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd) | |||||
| #define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg) | |||||
| #define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg) | |||||
| #define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd) | |||||
| #define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg) | |||||
| #define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg) | |||||
| #define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd) | |||||
| #define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg) | |||||
| #define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg) | |||||
| #define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd) | |||||
| #define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg) | |||||
| #define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg) | |||||
| #define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd) | |||||
| #define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg) | |||||
| #define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd) | |||||
| #define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg) | |||||
| #define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd) | |||||
| #define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg) | |||||
| #define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd) | |||||
| #define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg) | |||||
| #define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd) | |||||
| #define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg) | |||||
| #define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd) | |||||
| #define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg) | |||||
| #define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd) | |||||
| #define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg) | |||||
| #define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd) | |||||
| #define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg) | |||||
| #define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd) | |||||
| #define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg) | |||||
| #define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd) | |||||
| #define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg) | |||||
| #define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd) | |||||
| #define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg) | |||||
| #define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd) | |||||
| #define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg) | |||||
| #define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd) | |||||
| #define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg) | |||||
| #define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd) | |||||
| #define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg) | |||||
| #define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd) | |||||
| /* 3DNOW extensions */ | /* 3DNOW extensions */ | ||||
| #define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg) | |||||
| #define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd) | |||||
| #define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg) | |||||
| #define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd) | |||||
| /* AMD MMX extensions - also available in intel SSE */ | /* AMD MMX extensions - also available in intel SSE */ | ||||
| #define mmx_m2ri(op,mem,reg,imm) \ | |||||
| #define mmx_m2ri(op,mem,reg,imm) \ | |||||
| __asm__ __volatile__ (#op " %1, %0, %%" #reg \ | __asm__ __volatile__ (#op " %1, %0, %%" #reg \ | ||||
| : /* nothing */ \ | : /* nothing */ \ | ||||
| : "X" (mem), "X" (imm)) | : "X" (mem), "X" (imm)) | ||||
| #define mmx_r2ri(op,regs,regd,imm) \ | |||||
| #define mmx_r2ri(op,regs,regd,imm) \ | |||||
| __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ | __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ | ||||
| : /* nothing */ \ | : /* nothing */ \ | ||||
| : "X" (imm) ) | : "X" (imm) ) | ||||
| #define mmx_fetch(mem,hint) \ | |||||
| __asm__ __volatile__ ("prefetch" #hint " %0" \ | |||||
| : /* nothing */ \ | |||||
| : "X" (mem)) | |||||
| #define mmx_fetch(mem,hint) \ | |||||
| __asm__ __volatile__ ("prefetch" #hint " %0" \ | |||||
| : /* nothing */ \ | |||||
| : "X" (mem)) | |||||
| #define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg) | |||||
| #define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg) | |||||
| #define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var) | |||||
| #define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var) | |||||
| #define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg) | |||||
| #define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd) | |||||
| #define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg) | |||||
| #define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd) | |||||
| #define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg) | |||||
| #define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd) | |||||
| #define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg) | |||||
| #define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd) | |||||
| #define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm) | |||||
| #define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm) | |||||
| #define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm) | |||||
| #define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm) | |||||
| #define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg) | |||||
| #define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd) | |||||
| #define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg) | |||||
| #define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd) | |||||
| #define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg) | |||||
| #define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd) | |||||
| #define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg) | |||||
| #define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd) | |||||
| #define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg) | |||||
| #define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd) | |||||
| #define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg) | |||||
| #define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd) | |||||
| #define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg) | |||||
| #define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd) | |||||
| #define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg) | |||||
| #define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd) | |||||
| #define pmovmskb(mmreg,reg) \ | |||||
| __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg) | |||||
| #define pmovmskb(mmreg,reg) \ | |||||
| __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg) | |||||
| #define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg) | |||||
| #define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd) | |||||
| #define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg) | |||||
| #define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd) | |||||
| #define prefetcht0(mem) mmx_fetch (mem, t0) | |||||
| #define prefetcht1(mem) mmx_fetch (mem, t1) | |||||
| #define prefetcht2(mem) mmx_fetch (mem, t2) | |||||
| #define prefetchnta(mem) mmx_fetch (mem, nta) | |||||
| #define prefetcht0(mem) mmx_fetch (mem, t0) | |||||
| #define prefetcht1(mem) mmx_fetch (mem, t1) | |||||
| #define prefetcht2(mem) mmx_fetch (mem, t2) | |||||
| #define prefetchnta(mem) mmx_fetch (mem, nta) | |||||
| #define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg) | |||||
| #define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd) | |||||
| #define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg) | |||||
| #define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd) | |||||
| #define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm) | |||||
| #define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm) | |||||
| #define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm) | |||||
| #define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm) | |||||
| #define sfence() __asm__ __volatile__ ("sfence\n\t") | |||||
| #define sfence() __asm__ __volatile__ ("sfence\n\t") | |||||
| /* SSE2 */ | /* SSE2 */ | ||||
| #define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm) | |||||
| #define pshufhw_r2r(regs,regd,imm) mmx_r2ri(pshufhw, regs, regd, imm) | |||||
| #define pshuflw_m2r(var,reg,imm) mmx_m2ri(pshuflw, var, reg, imm) | |||||
| #define pshuflw_r2r(regs,regd,imm) mmx_r2ri(pshuflw, regs, regd, imm) | |||||
| #define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm) | |||||
| #define pshufhw_r2r(regs,regd,imm) mmx_r2ri(pshufhw, regs, regd, imm) | |||||
| #define pshuflw_m2r(var,reg,imm) mmx_m2ri(pshuflw, var, reg, imm) | |||||
| #define pshuflw_r2r(regs,regd,imm) mmx_r2ri(pshuflw, regs, regd, imm) | |||||
| #define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm) | |||||
| #define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm) | |||||
| #define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg) | |||||
| #define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var) | |||||
| #define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd) | |||||
| #define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg) | |||||
| #define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var) | |||||
| #define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd) | |||||
| #define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg) | |||||
| #define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var) | |||||
| #define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd) | |||||
| #define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg) | |||||
| #define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var) | |||||
| #define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd) | |||||
| #define pmullw_r2m(reg,var) mmx_r2m (pmullw, reg, var) | |||||
| #define pmullw_r2m(reg,var) mmx_r2m (pmullw, reg, var) | |||||
| #define pslldq_i2r(imm,reg) mmx_i2r (pslldq, imm, reg) | |||||
| #define psrldq_i2r(imm,reg) mmx_i2r (psrldq, imm, reg) | |||||
| #define pslldq_i2r(imm,reg) mmx_i2r (pslldq, imm, reg) | |||||
| #define psrldq_i2r(imm,reg) mmx_i2r (psrldq, imm, reg) | |||||
| #define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd) | |||||
| #define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd) | |||||
| #define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd) | |||||
| #define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd) | |||||
| #endif /* AVCODEC_I386MMX_H */ | #endif /* AVCODEC_I386MMX_H */ | ||||
| @@ -34,33 +34,33 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||||
| { | { | ||||
| long len= -(stride*h); | long len= -(stride*h); | ||||
| asm volatile( | asm volatile( | ||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm4 \n\t" | |||||
| "add %3, %%"REG_a" \n\t" | |||||
| "psubusb %%mm0, %%mm2 \n\t" | |||||
| "psubusb %%mm4, %%mm0 \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm5 \n\t" | |||||
| "psubusb %%mm1, %%mm3 \n\t" | |||||
| "psubusb %%mm5, %%mm1 \n\t" | |||||
| "por %%mm2, %%mm0 \n\t" | |||||
| "por %%mm1, %%mm3 \n\t" | |||||
| "movq %%mm0, %%mm1 \n\t" | |||||
| "movq %%mm3, %%mm2 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||||
| "punpcklbw %%mm7, %%mm3 \n\t" | |||||
| "punpckhbw %%mm7, %%mm2 \n\t" | |||||
| "paddw %%mm1, %%mm0 \n\t" | |||||
| "paddw %%mm3, %%mm2 \n\t" | |||||
| "paddw %%mm2, %%mm0 \n\t" | |||||
| "paddw %%mm0, %%mm6 \n\t" | |||||
| "add %3, %%"REG_a" \n\t" | |||||
| " js 1b \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm4 \n\t" | |||||
| "add %3, %%"REG_a" \n\t" | |||||
| "psubusb %%mm0, %%mm2 \n\t" | |||||
| "psubusb %%mm4, %%mm0 \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm5 \n\t" | |||||
| "psubusb %%mm1, %%mm3 \n\t" | |||||
| "psubusb %%mm5, %%mm1 \n\t" | |||||
| "por %%mm2, %%mm0 \n\t" | |||||
| "por %%mm1, %%mm3 \n\t" | |||||
| "movq %%mm0, %%mm1 \n\t" | |||||
| "movq %%mm3, %%mm2 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||||
| "punpcklbw %%mm7, %%mm3 \n\t" | |||||
| "punpckhbw %%mm7, %%mm2 \n\t" | |||||
| "paddw %%mm1, %%mm0 \n\t" | |||||
| "paddw %%mm3, %%mm2 \n\t" | |||||
| "paddw %%mm2, %%mm0 \n\t" | |||||
| "paddw %%mm0, %%mm6 \n\t" | |||||
| "add %3, %%"REG_a" \n\t" | |||||
| " js 1b \n\t" | |||||
| : "+a" (len) | : "+a" (len) | ||||
| : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride) | : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride) | ||||
| ); | ); | ||||
| @@ -70,19 +70,19 @@ static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||||
| { | { | ||||
| long len= -(stride*h); | long len= -(stride*h); | ||||
| asm volatile( | asm volatile( | ||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||||
| "psadbw %%mm2, %%mm0 \n\t" | |||||
| "add %3, %%"REG_a" \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "psadbw %%mm1, %%mm3 \n\t" | |||||
| "paddw %%mm3, %%mm0 \n\t" | |||||
| "paddw %%mm0, %%mm6 \n\t" | |||||
| "add %3, %%"REG_a" \n\t" | |||||
| " js 1b \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||||
| "psadbw %%mm2, %%mm0 \n\t" | |||||
| "add %3, %%"REG_a" \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "psadbw %%mm1, %%mm3 \n\t" | |||||
| "paddw %%mm3, %%mm0 \n\t" | |||||
| "paddw %%mm0, %%mm6 \n\t" | |||||
| "add %3, %%"REG_a" \n\t" | |||||
| " js 1b \n\t" | |||||
| : "+a" (len) | : "+a" (len) | ||||
| : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride) | : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride) | ||||
| ); | ); | ||||
| @@ -92,23 +92,23 @@ static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, in | |||||
| { | { | ||||
| long len= -(stride*h); | long len= -(stride*h); | ||||
| asm volatile( | asm volatile( | ||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||||
| "pavgb %%mm2, %%mm0 \n\t" | |||||
| "movq (%3, %%"REG_a"), %%mm2 \n\t" | |||||
| "psadbw %%mm2, %%mm0 \n\t" | |||||
| "add %4, %%"REG_a" \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "pavgb %%mm1, %%mm3 \n\t" | |||||
| "movq (%3, %%"REG_a"), %%mm1 \n\t" | |||||
| "psadbw %%mm1, %%mm3 \n\t" | |||||
| "paddw %%mm3, %%mm0 \n\t" | |||||
| "paddw %%mm0, %%mm6 \n\t" | |||||
| "add %4, %%"REG_a" \n\t" | |||||
| " js 1b \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||||
| "pavgb %%mm2, %%mm0 \n\t" | |||||
| "movq (%3, %%"REG_a"), %%mm2 \n\t" | |||||
| "psadbw %%mm2, %%mm0 \n\t" | |||||
| "add %4, %%"REG_a" \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "pavgb %%mm1, %%mm3 \n\t" | |||||
| "movq (%3, %%"REG_a"), %%mm1 \n\t" | |||||
| "psadbw %%mm1, %%mm3 \n\t" | |||||
| "paddw %%mm3, %%mm0 \n\t" | |||||
| "paddw %%mm0, %%mm6 \n\t" | |||||
| "add %4, %%"REG_a" \n\t" | |||||
| " js 1b \n\t" | |||||
| : "+a" (len) | : "+a" (len) | ||||
| : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride) | : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride) | ||||
| ); | ); | ||||
| @@ -118,34 +118,34 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||||
| { //FIXME reuse src | { //FIXME reuse src | ||||
| long len= -(stride*h); | long len= -(stride*h); | ||||
| asm volatile( | asm volatile( | ||||
| ".balign 16 \n\t" | |||||
| "movq "MANGLE(bone)", %%mm5 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||||
| "movq 1(%1, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq 1(%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "pavgb %%mm2, %%mm0 \n\t" | |||||
| "pavgb %%mm1, %%mm3 \n\t" | |||||
| "psubusb %%mm5, %%mm3 \n\t" | |||||
| "pavgb %%mm3, %%mm0 \n\t" | |||||
| "movq (%3, %%"REG_a"), %%mm2 \n\t" | |||||
| "psadbw %%mm2, %%mm0 \n\t" | |||||
| "add %4, %%"REG_a" \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||||
| "movq 1(%2, %%"REG_a"), %%mm4 \n\t" | |||||
| "pavgb %%mm3, %%mm1 \n\t" | |||||
| "pavgb %%mm4, %%mm2 \n\t" | |||||
| "psubusb %%mm5, %%mm2 \n\t" | |||||
| "pavgb %%mm1, %%mm2 \n\t" | |||||
| "movq (%3, %%"REG_a"), %%mm1 \n\t" | |||||
| "psadbw %%mm1, %%mm2 \n\t" | |||||
| "paddw %%mm2, %%mm0 \n\t" | |||||
| "paddw %%mm0, %%mm6 \n\t" | |||||
| "add %4, %%"REG_a" \n\t" | |||||
| " js 1b \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "movq "MANGLE(bone)", %%mm5 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||||
| "movq 1(%1, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq 1(%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "pavgb %%mm2, %%mm0 \n\t" | |||||
| "pavgb %%mm1, %%mm3 \n\t" | |||||
| "psubusb %%mm5, %%mm3 \n\t" | |||||
| "pavgb %%mm3, %%mm0 \n\t" | |||||
| "movq (%3, %%"REG_a"), %%mm2 \n\t" | |||||
| "psadbw %%mm2, %%mm0 \n\t" | |||||
| "add %4, %%"REG_a" \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||||
| "movq 1(%2, %%"REG_a"), %%mm4 \n\t" | |||||
| "pavgb %%mm3, %%mm1 \n\t" | |||||
| "pavgb %%mm4, %%mm2 \n\t" | |||||
| "psubusb %%mm5, %%mm2 \n\t" | |||||
| "pavgb %%mm1, %%mm2 \n\t" | |||||
| "movq (%3, %%"REG_a"), %%mm1 \n\t" | |||||
| "psadbw %%mm1, %%mm2 \n\t" | |||||
| "paddw %%mm2, %%mm0 \n\t" | |||||
| "paddw %%mm0, %%mm6 \n\t" | |||||
| "add %4, %%"REG_a" \n\t" | |||||
| " js 1b \n\t" | |||||
| : "+a" (len) | : "+a" (len) | ||||
| : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride) | : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride) | ||||
| ); | ); | ||||
| @@ -155,35 +155,35 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int | |||||
| { | { | ||||
| long len= -(stride*h); | long len= -(stride*h); | ||||
| asm volatile( | asm volatile( | ||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpcklbw %%mm7, %%mm1 \n\t" | |||||
| "punpckhbw %%mm7, %%mm2 \n\t" | |||||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||||
| "paddw %%mm0, %%mm1 \n\t" | |||||
| "paddw %%mm2, %%mm3 \n\t" | |||||
| "movq (%3, %%"REG_a"), %%mm4 \n\t" | |||||
| "movq (%3, %%"REG_a"), %%mm2 \n\t" | |||||
| "paddw %%mm5, %%mm1 \n\t" | |||||
| "paddw %%mm5, %%mm3 \n\t" | |||||
| "psrlw $1, %%mm1 \n\t" | |||||
| "psrlw $1, %%mm3 \n\t" | |||||
| "packuswb %%mm3, %%mm1 \n\t" | |||||
| "psubusb %%mm1, %%mm4 \n\t" | |||||
| "psubusb %%mm2, %%mm1 \n\t" | |||||
| "por %%mm4, %%mm1 \n\t" | |||||
| "movq %%mm1, %%mm0 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||||
| "paddw %%mm1, %%mm0 \n\t" | |||||
| "paddw %%mm0, %%mm6 \n\t" | |||||
| "add %4, %%"REG_a" \n\t" | |||||
| " js 1b \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpcklbw %%mm7, %%mm1 \n\t" | |||||
| "punpckhbw %%mm7, %%mm2 \n\t" | |||||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||||
| "paddw %%mm0, %%mm1 \n\t" | |||||
| "paddw %%mm2, %%mm3 \n\t" | |||||
| "movq (%3, %%"REG_a"), %%mm4 \n\t" | |||||
| "movq (%3, %%"REG_a"), %%mm2 \n\t" | |||||
| "paddw %%mm5, %%mm1 \n\t" | |||||
| "paddw %%mm5, %%mm3 \n\t" | |||||
| "psrlw $1, %%mm1 \n\t" | |||||
| "psrlw $1, %%mm3 \n\t" | |||||
| "packuswb %%mm3, %%mm1 \n\t" | |||||
| "psubusb %%mm1, %%mm4 \n\t" | |||||
| "psubusb %%mm2, %%mm1 \n\t" | |||||
| "por %%mm4, %%mm1 \n\t" | |||||
| "movq %%mm1, %%mm0 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||||
| "paddw %%mm1, %%mm0 \n\t" | |||||
| "paddw %%mm0, %%mm6 \n\t" | |||||
| "add %4, %%"REG_a" \n\t" | |||||
| " js 1b \n\t" | |||||
| : "+a" (len) | : "+a" (len) | ||||
| : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride) | : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride) | ||||
| ); | ); | ||||
| @@ -193,47 +193,47 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||||
| { | { | ||||
| long len= -(stride*h); | long len= -(stride*h); | ||||
| asm volatile( | asm volatile( | ||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq %%mm0, %%mm4 \n\t" | |||||
| "movq %%mm1, %%mm2 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpcklbw %%mm7, %%mm1 \n\t" | |||||
| "punpckhbw %%mm7, %%mm4 \n\t" | |||||
| "punpckhbw %%mm7, %%mm2 \n\t" | |||||
| "paddw %%mm1, %%mm0 \n\t" | |||||
| "paddw %%mm2, %%mm4 \n\t" | |||||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||||
| "movq 1(%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "movq %%mm2, %%mm1 \n\t" | |||||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||||
| "paddw %%mm0, %%mm2 \n\t" | |||||
| "paddw %%mm4, %%mm1 \n\t" | |||||
| "movq %%mm3, %%mm4 \n\t" | |||||
| "punpcklbw %%mm7, %%mm3 \n\t" | |||||
| "punpckhbw %%mm7, %%mm4 \n\t" | |||||
| "paddw %%mm3, %%mm2 \n\t" | |||||
| "paddw %%mm4, %%mm1 \n\t" | |||||
| "movq (%3, %%"REG_a"), %%mm3 \n\t" | |||||
| "movq (%3, %%"REG_a"), %%mm4 \n\t" | |||||
| "paddw %%mm5, %%mm2 \n\t" | |||||
| "paddw %%mm5, %%mm1 \n\t" | |||||
| "psrlw $2, %%mm2 \n\t" | |||||
| "psrlw $2, %%mm1 \n\t" | |||||
| "packuswb %%mm1, %%mm2 \n\t" | |||||
| "psubusb %%mm2, %%mm3 \n\t" | |||||
| "psubusb %%mm4, %%mm2 \n\t" | |||||
| "por %%mm3, %%mm2 \n\t" | |||||
| "movq %%mm2, %%mm0 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpckhbw %%mm7, %%mm2 \n\t" | |||||
| "paddw %%mm2, %%mm0 \n\t" | |||||
| "paddw %%mm0, %%mm6 \n\t" | |||||
| "add %4, %%"REG_a" \n\t" | |||||
| " js 1b \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq (%2, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq %%mm0, %%mm4 \n\t" | |||||
| "movq %%mm1, %%mm2 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpcklbw %%mm7, %%mm1 \n\t" | |||||
| "punpckhbw %%mm7, %%mm4 \n\t" | |||||
| "punpckhbw %%mm7, %%mm2 \n\t" | |||||
| "paddw %%mm1, %%mm0 \n\t" | |||||
| "paddw %%mm2, %%mm4 \n\t" | |||||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||||
| "movq 1(%2, %%"REG_a"), %%mm3 \n\t" | |||||
| "movq %%mm2, %%mm1 \n\t" | |||||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||||
| "paddw %%mm0, %%mm2 \n\t" | |||||
| "paddw %%mm4, %%mm1 \n\t" | |||||
| "movq %%mm3, %%mm4 \n\t" | |||||
| "punpcklbw %%mm7, %%mm3 \n\t" | |||||
| "punpckhbw %%mm7, %%mm4 \n\t" | |||||
| "paddw %%mm3, %%mm2 \n\t" | |||||
| "paddw %%mm4, %%mm1 \n\t" | |||||
| "movq (%3, %%"REG_a"), %%mm3 \n\t" | |||||
| "movq (%3, %%"REG_a"), %%mm4 \n\t" | |||||
| "paddw %%mm5, %%mm2 \n\t" | |||||
| "paddw %%mm5, %%mm1 \n\t" | |||||
| "psrlw $2, %%mm2 \n\t" | |||||
| "psrlw $2, %%mm1 \n\t" | |||||
| "packuswb %%mm1, %%mm2 \n\t" | |||||
| "psubusb %%mm2, %%mm3 \n\t" | |||||
| "psubusb %%mm4, %%mm2 \n\t" | |||||
| "por %%mm3, %%mm2 \n\t" | |||||
| "movq %%mm2, %%mm0 \n\t" | |||||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||||
| "punpckhbw %%mm7, %%mm2 \n\t" | |||||
| "paddw %%mm2, %%mm0 \n\t" | |||||
| "paddw %%mm0, %%mm6 \n\t" | |||||
| "add %4, %%"REG_a" \n\t" | |||||
| " js 1b \n\t" | |||||
| : "+a" (len) | : "+a" (len) | ||||
| : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride) | : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride) | ||||
| ); | ); | ||||
| @@ -243,13 +243,13 @@ static inline int sum_mmx(void) | |||||
| { | { | ||||
| int ret; | int ret; | ||||
| asm volatile( | asm volatile( | ||||
| "movq %%mm6, %%mm0 \n\t" | |||||
| "psrlq $32, %%mm6 \n\t" | |||||
| "paddw %%mm0, %%mm6 \n\t" | |||||
| "movq %%mm6, %%mm0 \n\t" | |||||
| "psrlq $16, %%mm6 \n\t" | |||||
| "paddw %%mm0, %%mm6 \n\t" | |||||
| "movd %%mm6, %0 \n\t" | |||||
| "movq %%mm6, %%mm0 \n\t" | |||||
| "psrlq $32, %%mm6 \n\t" | |||||
| "paddw %%mm0, %%mm6 \n\t" | |||||
| "movq %%mm6, %%mm0 \n\t" | |||||
| "psrlq $16, %%mm6 \n\t" | |||||
| "paddw %%mm0, %%mm6 \n\t" | |||||
| "movd %%mm6, %0 \n\t" | |||||
| : "=r" (ret) | : "=r" (ret) | ||||
| ); | ); | ||||
| return ret&0xFFFF; | return ret&0xFFFF; | ||||
| @@ -259,7 +259,7 @@ static inline int sum_mmx2(void) | |||||
| { | { | ||||
| int ret; | int ret; | ||||
| asm volatile( | asm volatile( | ||||
| "movd %%mm6, %0 \n\t" | |||||
| "movd %%mm6, %0 \n\t" | |||||
| : "=r" (ret) | : "=r" (ret) | ||||
| ); | ); | ||||
| return ret; | return ret; | ||||
| @@ -270,8 +270,8 @@ static inline int sum_mmx2(void) | |||||
| static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | ||||
| {\ | {\ | ||||
| assert(h==8);\ | assert(h==8);\ | ||||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||||
| "pxor %%mm6, %%mm6 \n\t":);\ | |||||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||||
| "pxor %%mm6, %%mm6 \n\t":);\ | |||||
| \ | \ | ||||
| sad8_1_ ## suf(blk1, blk2, stride, 8);\ | sad8_1_ ## suf(blk1, blk2, stride, 8);\ | ||||
| \ | \ | ||||
| @@ -280,9 +280,9 @@ static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h | |||||
| static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | ||||
| {\ | {\ | ||||
| assert(h==8);\ | assert(h==8);\ | ||||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||||
| "pxor %%mm6, %%mm6 \n\t"\ | |||||
| "movq %0, %%mm5 \n\t"\ | |||||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||||
| "pxor %%mm6, %%mm6 \n\t"\ | |||||
| "movq %0, %%mm5 \n\t"\ | |||||
| :: "m"(round_tab[1]) \ | :: "m"(round_tab[1]) \ | ||||
| );\ | );\ | ||||
| \ | \ | ||||
| @@ -294,9 +294,9 @@ static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in | |||||
| static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | ||||
| {\ | {\ | ||||
| assert(h==8);\ | assert(h==8);\ | ||||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||||
| "pxor %%mm6, %%mm6 \n\t"\ | |||||
| "movq %0, %%mm5 \n\t"\ | |||||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||||
| "pxor %%mm6, %%mm6 \n\t"\ | |||||
| "movq %0, %%mm5 \n\t"\ | |||||
| :: "m"(round_tab[1]) \ | :: "m"(round_tab[1]) \ | ||||
| );\ | );\ | ||||
| \ | \ | ||||
| @@ -308,9 +308,9 @@ static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in | |||||
| static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | ||||
| {\ | {\ | ||||
| assert(h==8);\ | assert(h==8);\ | ||||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||||
| "pxor %%mm6, %%mm6 \n\t"\ | |||||
| "movq %0, %%mm5 \n\t"\ | |||||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||||
| "pxor %%mm6, %%mm6 \n\t"\ | |||||
| "movq %0, %%mm5 \n\t"\ | |||||
| :: "m"(round_tab[2]) \ | :: "m"(round_tab[2]) \ | ||||
| );\ | );\ | ||||
| \ | \ | ||||
| @@ -321,8 +321,8 @@ static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i | |||||
| \ | \ | ||||
| static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | ||||
| {\ | {\ | ||||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||||
| "pxor %%mm6, %%mm6 \n\t":);\ | |||||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||||
| "pxor %%mm6, %%mm6 \n\t":);\ | |||||
| \ | \ | ||||
| sad8_1_ ## suf(blk1 , blk2 , stride, h);\ | sad8_1_ ## suf(blk1 , blk2 , stride, h);\ | ||||
| sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\ | sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\ | ||||
| @@ -331,9 +331,9 @@ static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int | |||||
| }\ | }\ | ||||
| static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | ||||
| {\ | {\ | ||||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||||
| "pxor %%mm6, %%mm6 \n\t"\ | |||||
| "movq %0, %%mm5 \n\t"\ | |||||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||||
| "pxor %%mm6, %%mm6 \n\t"\ | |||||
| "movq %0, %%mm5 \n\t"\ | |||||
| :: "m"(round_tab[1]) \ | :: "m"(round_tab[1]) \ | ||||
| );\ | );\ | ||||
| \ | \ | ||||
| @@ -344,9 +344,9 @@ static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i | |||||
| }\ | }\ | ||||
| static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | ||||
| {\ | {\ | ||||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||||
| "pxor %%mm6, %%mm6 \n\t"\ | |||||
| "movq %0, %%mm5 \n\t"\ | |||||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||||
| "pxor %%mm6, %%mm6 \n\t"\ | |||||
| "movq %0, %%mm5 \n\t"\ | |||||
| :: "m"(round_tab[1]) \ | :: "m"(round_tab[1]) \ | ||||
| );\ | );\ | ||||
| \ | \ | ||||
| @@ -357,9 +357,9 @@ static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i | |||||
| }\ | }\ | ||||
| static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | ||||
| {\ | {\ | ||||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||||
| "pxor %%mm6, %%mm6 \n\t"\ | |||||
| "movq %0, %%mm5 \n\t"\ | |||||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||||
| "pxor %%mm6, %%mm6 \n\t"\ | |||||
| "movq %0, %%mm5 \n\t"\ | |||||
| :: "m"(round_tab[2]) \ | :: "m"(round_tab[2]) \ | ||||
| );\ | );\ | ||||
| \ | \ | ||||
| @@ -384,15 +384,15 @@ void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx) | |||||
| c->pix_abs[1][2] = sad8_y2_mmx; | c->pix_abs[1][2] = sad8_y2_mmx; | ||||
| c->pix_abs[1][3] = sad8_xy2_mmx; | c->pix_abs[1][3] = sad8_xy2_mmx; | ||||
| c->sad[0]= sad16_mmx; | |||||
| c->sad[0]= sad16_mmx; | |||||
| c->sad[1]= sad8_mmx; | c->sad[1]= sad8_mmx; | ||||
| } | } | ||||
| if (mm_flags & MM_MMXEXT) { | if (mm_flags & MM_MMXEXT) { | ||||
| c->pix_abs[0][0] = sad16_mmx2; | |||||
| c->pix_abs[1][0] = sad8_mmx2; | |||||
| c->pix_abs[0][0] = sad16_mmx2; | |||||
| c->pix_abs[1][0] = sad8_mmx2; | |||||
| c->sad[0]= sad16_mmx2; | |||||
| c->sad[1]= sad8_mmx2; | |||||
| c->sad[0]= sad16_mmx2; | |||||
| c->sad[1]= sad8_mmx2; | |||||
| if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | ||||
| c->pix_abs[0][1] = sad16_x2_mmx2; | c->pix_abs[0][1] = sad16_x2_mmx2; | ||||
| @@ -57,52 +57,52 @@ static void dct_unquantize_h263_intra_mmx(MpegEncContext *s, | |||||
| nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; | nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; | ||||
| //printf("%d %d ", qmul, qadd); | //printf("%d %d ", qmul, qadd); | ||||
| asm volatile( | asm volatile( | ||||
| "movd %1, %%mm6 \n\t" //qmul | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "movd %2, %%mm5 \n\t" //qadd | |||||
| "pxor %%mm7, %%mm7 \n\t" | |||||
| "packssdw %%mm5, %%mm5 \n\t" | |||||
| "packssdw %%mm5, %%mm5 \n\t" | |||||
| "psubw %%mm5, %%mm7 \n\t" | |||||
| "pxor %%mm4, %%mm4 \n\t" | |||||
| ".balign 16\n\t" | |||||
| "1: \n\t" | |||||
| "movq (%0, %3), %%mm0 \n\t" | |||||
| "movq 8(%0, %3), %%mm1 \n\t" | |||||
| "pmullw %%mm6, %%mm0 \n\t" | |||||
| "pmullw %%mm6, %%mm1 \n\t" | |||||
| "movq (%0, %3), %%mm2 \n\t" | |||||
| "movq 8(%0, %3), %%mm3 \n\t" | |||||
| "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "paddw %%mm7, %%mm0 \n\t" | |||||
| "paddw %%mm7, %%mm1 \n\t" | |||||
| "pxor %%mm0, %%mm2 \n\t" | |||||
| "pxor %%mm1, %%mm3 \n\t" | |||||
| "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 | |||||
| "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 | |||||
| "pandn %%mm2, %%mm0 \n\t" | |||||
| "pandn %%mm3, %%mm1 \n\t" | |||||
| "movq %%mm0, (%0, %3) \n\t" | |||||
| "movq %%mm1, 8(%0, %3) \n\t" | |||||
| "add $16, %3 \n\t" | |||||
| "jng 1b \n\t" | |||||
| ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) | |||||
| : "memory" | |||||
| ); | |||||
| "movd %1, %%mm6 \n\t" //qmul | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "movd %2, %%mm5 \n\t" //qadd | |||||
| "pxor %%mm7, %%mm7 \n\t" | |||||
| "packssdw %%mm5, %%mm5 \n\t" | |||||
| "packssdw %%mm5, %%mm5 \n\t" | |||||
| "psubw %%mm5, %%mm7 \n\t" | |||||
| "pxor %%mm4, %%mm4 \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%0, %3), %%mm0 \n\t" | |||||
| "movq 8(%0, %3), %%mm1 \n\t" | |||||
| "pmullw %%mm6, %%mm0 \n\t" | |||||
| "pmullw %%mm6, %%mm1 \n\t" | |||||
| "movq (%0, %3), %%mm2 \n\t" | |||||
| "movq 8(%0, %3), %%mm3 \n\t" | |||||
| "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "paddw %%mm7, %%mm0 \n\t" | |||||
| "paddw %%mm7, %%mm1 \n\t" | |||||
| "pxor %%mm0, %%mm2 \n\t" | |||||
| "pxor %%mm1, %%mm3 \n\t" | |||||
| "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 | |||||
| "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 | |||||
| "pandn %%mm2, %%mm0 \n\t" | |||||
| "pandn %%mm3, %%mm1 \n\t" | |||||
| "movq %%mm0, (%0, %3) \n\t" | |||||
| "movq %%mm1, 8(%0, %3) \n\t" | |||||
| "add $16, %3 \n\t" | |||||
| "jng 1b \n\t" | |||||
| ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) | |||||
| : "memory" | |||||
| ); | |||||
| block[0]= level; | block[0]= level; | ||||
| } | } | ||||
| @@ -120,52 +120,52 @@ static void dct_unquantize_h263_inter_mmx(MpegEncContext *s, | |||||
| nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; | nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; | ||||
| //printf("%d %d ", qmul, qadd); | //printf("%d %d ", qmul, qadd); | ||||
| asm volatile( | asm volatile( | ||||
| "movd %1, %%mm6 \n\t" //qmul | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "movd %2, %%mm5 \n\t" //qadd | |||||
| "pxor %%mm7, %%mm7 \n\t" | |||||
| "packssdw %%mm5, %%mm5 \n\t" | |||||
| "packssdw %%mm5, %%mm5 \n\t" | |||||
| "psubw %%mm5, %%mm7 \n\t" | |||||
| "pxor %%mm4, %%mm4 \n\t" | |||||
| ".balign 16\n\t" | |||||
| "1: \n\t" | |||||
| "movq (%0, %3), %%mm0 \n\t" | |||||
| "movq 8(%0, %3), %%mm1 \n\t" | |||||
| "pmullw %%mm6, %%mm0 \n\t" | |||||
| "pmullw %%mm6, %%mm1 \n\t" | |||||
| "movq (%0, %3), %%mm2 \n\t" | |||||
| "movq 8(%0, %3), %%mm3 \n\t" | |||||
| "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "paddw %%mm7, %%mm0 \n\t" | |||||
| "paddw %%mm7, %%mm1 \n\t" | |||||
| "pxor %%mm0, %%mm2 \n\t" | |||||
| "pxor %%mm1, %%mm3 \n\t" | |||||
| "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 | |||||
| "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 | |||||
| "pandn %%mm2, %%mm0 \n\t" | |||||
| "pandn %%mm3, %%mm1 \n\t" | |||||
| "movq %%mm0, (%0, %3) \n\t" | |||||
| "movq %%mm1, 8(%0, %3) \n\t" | |||||
| "add $16, %3 \n\t" | |||||
| "jng 1b \n\t" | |||||
| ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) | |||||
| : "memory" | |||||
| ); | |||||
| "movd %1, %%mm6 \n\t" //qmul | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "movd %2, %%mm5 \n\t" //qadd | |||||
| "pxor %%mm7, %%mm7 \n\t" | |||||
| "packssdw %%mm5, %%mm5 \n\t" | |||||
| "packssdw %%mm5, %%mm5 \n\t" | |||||
| "psubw %%mm5, %%mm7 \n\t" | |||||
| "pxor %%mm4, %%mm4 \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%0, %3), %%mm0 \n\t" | |||||
| "movq 8(%0, %3), %%mm1 \n\t" | |||||
| "pmullw %%mm6, %%mm0 \n\t" | |||||
| "pmullw %%mm6, %%mm1 \n\t" | |||||
| "movq (%0, %3), %%mm2 \n\t" | |||||
| "movq 8(%0, %3), %%mm3 \n\t" | |||||
| "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "paddw %%mm7, %%mm0 \n\t" | |||||
| "paddw %%mm7, %%mm1 \n\t" | |||||
| "pxor %%mm0, %%mm2 \n\t" | |||||
| "pxor %%mm1, %%mm3 \n\t" | |||||
| "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 | |||||
| "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 | |||||
| "pandn %%mm2, %%mm0 \n\t" | |||||
| "pandn %%mm3, %%mm1 \n\t" | |||||
| "movq %%mm0, (%0, %3) \n\t" | |||||
| "movq %%mm1, 8(%0, %3) \n\t" | |||||
| "add $16, %3 \n\t" | |||||
| "jng 1b \n\t" | |||||
| ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) | |||||
| : "memory" | |||||
| ); | |||||
| } | } | ||||
| @@ -216,54 +216,54 @@ static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s, | |||||
| /* XXX: only mpeg1 */ | /* XXX: only mpeg1 */ | ||||
| quant_matrix = s->intra_matrix; | quant_matrix = s->intra_matrix; | ||||
| asm volatile( | asm volatile( | ||||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||||
| "psrlw $15, %%mm7 \n\t" | |||||
| "movd %2, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "mov %3, %%"REG_a" \n\t" | |||||
| ".balign 16\n\t" | |||||
| "1: \n\t" | |||||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm4 \n\t" | |||||
| "movq 8(%1, %%"REG_a"), %%mm5 \n\t" | |||||
| "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] | |||||
| "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] | |||||
| "pxor %%mm2, %%mm2 \n\t" | |||||
| "pxor %%mm3, %%mm3 \n\t" | |||||
| "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) | |||||
| "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) | |||||
| "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q | |||||
| "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q | |||||
| "pxor %%mm4, %%mm4 \n\t" | |||||
| "pxor %%mm5, %%mm5 \n\t" // FIXME slow | |||||
| "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | |||||
| "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | |||||
| "psraw $3, %%mm0 \n\t" | |||||
| "psraw $3, %%mm1 \n\t" | |||||
| "psubw %%mm7, %%mm0 \n\t" | |||||
| "psubw %%mm7, %%mm1 \n\t" | |||||
| "por %%mm7, %%mm0 \n\t" | |||||
| "por %%mm7, %%mm1 \n\t" | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "psubw %%mm2, %%mm0 \n\t" | |||||
| "psubw %%mm3, %%mm1 \n\t" | |||||
| "pandn %%mm0, %%mm4 \n\t" | |||||
| "pandn %%mm1, %%mm5 \n\t" | |||||
| "movq %%mm4, (%0, %%"REG_a") \n\t" | |||||
| "movq %%mm5, 8(%0, %%"REG_a") \n\t" | |||||
| "add $16, %%"REG_a" \n\t" | |||||
| "js 1b \n\t" | |||||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) | |||||
| : "%"REG_a, "memory" | |||||
| ); | |||||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||||
| "psrlw $15, %%mm7 \n\t" | |||||
| "movd %2, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "mov %3, %%"REG_a" \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm4 \n\t" | |||||
| "movq 8(%1, %%"REG_a"), %%mm5 \n\t" | |||||
| "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] | |||||
| "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] | |||||
| "pxor %%mm2, %%mm2 \n\t" | |||||
| "pxor %%mm3, %%mm3 \n\t" | |||||
| "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) | |||||
| "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) | |||||
| "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q | |||||
| "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q | |||||
| "pxor %%mm4, %%mm4 \n\t" | |||||
| "pxor %%mm5, %%mm5 \n\t" // FIXME slow | |||||
| "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | |||||
| "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | |||||
| "psraw $3, %%mm0 \n\t" | |||||
| "psraw $3, %%mm1 \n\t" | |||||
| "psubw %%mm7, %%mm0 \n\t" | |||||
| "psubw %%mm7, %%mm1 \n\t" | |||||
| "por %%mm7, %%mm0 \n\t" | |||||
| "por %%mm7, %%mm1 \n\t" | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "psubw %%mm2, %%mm0 \n\t" | |||||
| "psubw %%mm3, %%mm1 \n\t" | |||||
| "pandn %%mm0, %%mm4 \n\t" | |||||
| "pandn %%mm1, %%mm5 \n\t" | |||||
| "movq %%mm4, (%0, %%"REG_a") \n\t" | |||||
| "movq %%mm5, 8(%0, %%"REG_a") \n\t" | |||||
| "add $16, %%"REG_a" \n\t" | |||||
| "js 1b \n\t" | |||||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) | |||||
| : "%"REG_a, "memory" | |||||
| ); | |||||
| block[0]= block0; | block[0]= block0; | ||||
| } | } | ||||
| @@ -279,58 +279,58 @@ static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s, | |||||
| quant_matrix = s->inter_matrix; | quant_matrix = s->inter_matrix; | ||||
| asm volatile( | asm volatile( | ||||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||||
| "psrlw $15, %%mm7 \n\t" | |||||
| "movd %2, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "mov %3, %%"REG_a" \n\t" | |||||
| ".balign 16\n\t" | |||||
| "1: \n\t" | |||||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm4 \n\t" | |||||
| "movq 8(%1, %%"REG_a"), %%mm5 \n\t" | |||||
| "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] | |||||
| "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] | |||||
| "pxor %%mm2, %%mm2 \n\t" | |||||
| "pxor %%mm3, %%mm3 \n\t" | |||||
| "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) | |||||
| "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) | |||||
| "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2 | |||||
| "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2 | |||||
| "paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1 | |||||
| "paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1 | |||||
| "pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q | |||||
| "pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q | |||||
| "pxor %%mm4, %%mm4 \n\t" | |||||
| "pxor %%mm5, %%mm5 \n\t" // FIXME slow | |||||
| "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | |||||
| "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | |||||
| "psraw $4, %%mm0 \n\t" | |||||
| "psraw $4, %%mm1 \n\t" | |||||
| "psubw %%mm7, %%mm0 \n\t" | |||||
| "psubw %%mm7, %%mm1 \n\t" | |||||
| "por %%mm7, %%mm0 \n\t" | |||||
| "por %%mm7, %%mm1 \n\t" | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "psubw %%mm2, %%mm0 \n\t" | |||||
| "psubw %%mm3, %%mm1 \n\t" | |||||
| "pandn %%mm0, %%mm4 \n\t" | |||||
| "pandn %%mm1, %%mm5 \n\t" | |||||
| "movq %%mm4, (%0, %%"REG_a") \n\t" | |||||
| "movq %%mm5, 8(%0, %%"REG_a") \n\t" | |||||
| "add $16, %%"REG_a" \n\t" | |||||
| "js 1b \n\t" | |||||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) | |||||
| : "%"REG_a, "memory" | |||||
| ); | |||||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||||
| "psrlw $15, %%mm7 \n\t" | |||||
| "movd %2, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "mov %3, %%"REG_a" \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm4 \n\t" | |||||
| "movq 8(%1, %%"REG_a"), %%mm5 \n\t" | |||||
| "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] | |||||
| "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] | |||||
| "pxor %%mm2, %%mm2 \n\t" | |||||
| "pxor %%mm3, %%mm3 \n\t" | |||||
| "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) | |||||
| "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) | |||||
| "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2 | |||||
| "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2 | |||||
| "paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1 | |||||
| "paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1 | |||||
| "pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q | |||||
| "pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q | |||||
| "pxor %%mm4, %%mm4 \n\t" | |||||
| "pxor %%mm5, %%mm5 \n\t" // FIXME slow | |||||
| "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | |||||
| "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | |||||
| "psraw $4, %%mm0 \n\t" | |||||
| "psraw $4, %%mm1 \n\t" | |||||
| "psubw %%mm7, %%mm0 \n\t" | |||||
| "psubw %%mm7, %%mm1 \n\t" | |||||
| "por %%mm7, %%mm0 \n\t" | |||||
| "por %%mm7, %%mm1 \n\t" | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "psubw %%mm2, %%mm0 \n\t" | |||||
| "psubw %%mm3, %%mm1 \n\t" | |||||
| "pandn %%mm0, %%mm4 \n\t" | |||||
| "pandn %%mm1, %%mm5 \n\t" | |||||
| "movq %%mm4, (%0, %%"REG_a") \n\t" | |||||
| "movq %%mm5, 8(%0, %%"REG_a") \n\t" | |||||
| "add $16, %%"REG_a" \n\t" | |||||
| "js 1b \n\t" | |||||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) | |||||
| : "%"REG_a, "memory" | |||||
| ); | |||||
| } | } | ||||
| static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s, | static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s, | ||||
| @@ -351,50 +351,50 @@ static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s, | |||||
| block0 = block[0] * s->c_dc_scale; | block0 = block[0] * s->c_dc_scale; | ||||
| quant_matrix = s->intra_matrix; | quant_matrix = s->intra_matrix; | ||||
| asm volatile( | asm volatile( | ||||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||||
| "psrlw $15, %%mm7 \n\t" | |||||
| "movd %2, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "mov %3, %%"REG_a" \n\t" | |||||
| ".balign 16\n\t" | |||||
| "1: \n\t" | |||||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm4 \n\t" | |||||
| "movq 8(%1, %%"REG_a"), %%mm5 \n\t" | |||||
| "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] | |||||
| "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] | |||||
| "pxor %%mm2, %%mm2 \n\t" | |||||
| "pxor %%mm3, %%mm3 \n\t" | |||||
| "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) | |||||
| "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) | |||||
| "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q | |||||
| "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q | |||||
| "pxor %%mm4, %%mm4 \n\t" | |||||
| "pxor %%mm5, %%mm5 \n\t" // FIXME slow | |||||
| "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | |||||
| "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | |||||
| "psraw $3, %%mm0 \n\t" | |||||
| "psraw $3, %%mm1 \n\t" | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "psubw %%mm2, %%mm0 \n\t" | |||||
| "psubw %%mm3, %%mm1 \n\t" | |||||
| "pandn %%mm0, %%mm4 \n\t" | |||||
| "pandn %%mm1, %%mm5 \n\t" | |||||
| "movq %%mm4, (%0, %%"REG_a") \n\t" | |||||
| "movq %%mm5, 8(%0, %%"REG_a") \n\t" | |||||
| "add $16, %%"REG_a" \n\t" | |||||
| "jng 1b \n\t" | |||||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) | |||||
| : "%"REG_a, "memory" | |||||
| ); | |||||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||||
| "psrlw $15, %%mm7 \n\t" | |||||
| "movd %2, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "mov %3, %%"REG_a" \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm4 \n\t" | |||||
| "movq 8(%1, %%"REG_a"), %%mm5 \n\t" | |||||
| "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] | |||||
| "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] | |||||
| "pxor %%mm2, %%mm2 \n\t" | |||||
| "pxor %%mm3, %%mm3 \n\t" | |||||
| "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) | |||||
| "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) | |||||
| "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q | |||||
| "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q | |||||
| "pxor %%mm4, %%mm4 \n\t" | |||||
| "pxor %%mm5, %%mm5 \n\t" // FIXME slow | |||||
| "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | |||||
| "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | |||||
| "psraw $3, %%mm0 \n\t" | |||||
| "psraw $3, %%mm1 \n\t" | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "psubw %%mm2, %%mm0 \n\t" | |||||
| "psubw %%mm3, %%mm1 \n\t" | |||||
| "pandn %%mm0, %%mm4 \n\t" | |||||
| "pandn %%mm1, %%mm5 \n\t" | |||||
| "movq %%mm4, (%0, %%"REG_a") \n\t" | |||||
| "movq %%mm5, 8(%0, %%"REG_a") \n\t" | |||||
| "add $16, %%"REG_a" \n\t" | |||||
| "jng 1b \n\t" | |||||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) | |||||
| : "%"REG_a, "memory" | |||||
| ); | |||||
| block[0]= block0; | block[0]= block0; | ||||
| //Note, we dont do mismatch control for intra as errors cannot accumulate | //Note, we dont do mismatch control for intra as errors cannot accumulate | ||||
| } | } | ||||
| @@ -412,68 +412,68 @@ static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s, | |||||
| quant_matrix = s->inter_matrix; | quant_matrix = s->inter_matrix; | ||||
| asm volatile( | asm volatile( | ||||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||||
| "psrlq $48, %%mm7 \n\t" | |||||
| "movd %2, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "mov %3, %%"REG_a" \n\t" | |||||
| ".balign 16\n\t" | |||||
| "1: \n\t" | |||||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm4 \n\t" | |||||
| "movq 8(%1, %%"REG_a"), %%mm5 \n\t" | |||||
| "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] | |||||
| "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] | |||||
| "pxor %%mm2, %%mm2 \n\t" | |||||
| "pxor %%mm3, %%mm3 \n\t" | |||||
| "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) | |||||
| "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) | |||||
| "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2 | |||||
| "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2 | |||||
| "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*2*q | |||||
| "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*2*q | |||||
| "paddw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q | |||||
| "paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q | |||||
| "pxor %%mm4, %%mm4 \n\t" | |||||
| "pxor %%mm5, %%mm5 \n\t" // FIXME slow | |||||
| "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | |||||
| "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | |||||
| "psrlw $4, %%mm0 \n\t" | |||||
| "psrlw $4, %%mm1 \n\t" | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "psubw %%mm2, %%mm0 \n\t" | |||||
| "psubw %%mm3, %%mm1 \n\t" | |||||
| "pandn %%mm0, %%mm4 \n\t" | |||||
| "pandn %%mm1, %%mm5 \n\t" | |||||
| "pxor %%mm4, %%mm7 \n\t" | |||||
| "pxor %%mm5, %%mm7 \n\t" | |||||
| "movq %%mm4, (%0, %%"REG_a") \n\t" | |||||
| "movq %%mm5, 8(%0, %%"REG_a") \n\t" | |||||
| "add $16, %%"REG_a" \n\t" | |||||
| "jng 1b \n\t" | |||||
| "movd 124(%0, %3), %%mm0 \n\t" | |||||
| "movq %%mm7, %%mm6 \n\t" | |||||
| "psrlq $32, %%mm7 \n\t" | |||||
| "pxor %%mm6, %%mm7 \n\t" | |||||
| "movq %%mm7, %%mm6 \n\t" | |||||
| "psrlq $16, %%mm7 \n\t" | |||||
| "pxor %%mm6, %%mm7 \n\t" | |||||
| "pslld $31, %%mm7 \n\t" | |||||
| "psrlq $15, %%mm7 \n\t" | |||||
| "pxor %%mm7, %%mm0 \n\t" | |||||
| "movd %%mm0, 124(%0, %3) \n\t" | |||||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs) | |||||
| : "%"REG_a, "memory" | |||||
| ); | |||||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||||
| "psrlq $48, %%mm7 \n\t" | |||||
| "movd %2, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "packssdw %%mm6, %%mm6 \n\t" | |||||
| "mov %3, %%"REG_a" \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq (%1, %%"REG_a"), %%mm4 \n\t" | |||||
| "movq 8(%1, %%"REG_a"), %%mm5 \n\t" | |||||
| "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] | |||||
| "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] | |||||
| "pxor %%mm2, %%mm2 \n\t" | |||||
| "pxor %%mm3, %%mm3 \n\t" | |||||
| "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) | |||||
| "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) | |||||
| "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2 | |||||
| "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2 | |||||
| "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*2*q | |||||
| "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*2*q | |||||
| "paddw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q | |||||
| "paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q | |||||
| "pxor %%mm4, %%mm4 \n\t" | |||||
| "pxor %%mm5, %%mm5 \n\t" // FIXME slow | |||||
| "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | |||||
| "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | |||||
| "psrlw $4, %%mm0 \n\t" | |||||
| "psrlw $4, %%mm1 \n\t" | |||||
| "pxor %%mm2, %%mm0 \n\t" | |||||
| "pxor %%mm3, %%mm1 \n\t" | |||||
| "psubw %%mm2, %%mm0 \n\t" | |||||
| "psubw %%mm3, %%mm1 \n\t" | |||||
| "pandn %%mm0, %%mm4 \n\t" | |||||
| "pandn %%mm1, %%mm5 \n\t" | |||||
| "pxor %%mm4, %%mm7 \n\t" | |||||
| "pxor %%mm5, %%mm7 \n\t" | |||||
| "movq %%mm4, (%0, %%"REG_a") \n\t" | |||||
| "movq %%mm5, 8(%0, %%"REG_a") \n\t" | |||||
| "add $16, %%"REG_a" \n\t" | |||||
| "jng 1b \n\t" | |||||
| "movd 124(%0, %3), %%mm0 \n\t" | |||||
| "movq %%mm7, %%mm6 \n\t" | |||||
| "psrlq $32, %%mm7 \n\t" | |||||
| "pxor %%mm6, %%mm7 \n\t" | |||||
| "movq %%mm7, %%mm6 \n\t" | |||||
| "psrlq $16, %%mm7 \n\t" | |||||
| "pxor %%mm6, %%mm7 \n\t" | |||||
| "pslld $31, %%mm7 \n\t" | |||||
| "psrlq $15, %%mm7 \n\t" | |||||
| "pxor %%mm7, %%mm0 \n\t" | |||||
| "movd %%mm0, 124(%0, %3) \n\t" | |||||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs) | |||||
| : "%"REG_a, "memory" | |||||
| ); | |||||
| } | } | ||||
| /* draw the edges of width 'w' of an image of size width, height | /* draw the edges of width 'w' of an image of size width, height | ||||
| @@ -488,79 +488,79 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) | |||||
| ptr = buf; | ptr = buf; | ||||
| if(w==8) | if(w==8) | ||||
| { | { | ||||
| asm volatile( | |||||
| "1: \n\t" | |||||
| "movd (%0), %%mm0 \n\t" | |||||
| "punpcklbw %%mm0, %%mm0 \n\t" | |||||
| "punpcklwd %%mm0, %%mm0 \n\t" | |||||
| "punpckldq %%mm0, %%mm0 \n\t" | |||||
| "movq %%mm0, -8(%0) \n\t" | |||||
| "movq -8(%0, %2), %%mm1 \n\t" | |||||
| "punpckhbw %%mm1, %%mm1 \n\t" | |||||
| "punpckhwd %%mm1, %%mm1 \n\t" | |||||
| "punpckhdq %%mm1, %%mm1 \n\t" | |||||
| "movq %%mm1, (%0, %2) \n\t" | |||||
| "add %1, %0 \n\t" | |||||
| "cmp %3, %0 \n\t" | |||||
| " jb 1b \n\t" | |||||
| : "+r" (ptr) | |||||
| : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) | |||||
| ); | |||||
| asm volatile( | |||||
| "1: \n\t" | |||||
| "movd (%0), %%mm0 \n\t" | |||||
| "punpcklbw %%mm0, %%mm0 \n\t" | |||||
| "punpcklwd %%mm0, %%mm0 \n\t" | |||||
| "punpckldq %%mm0, %%mm0 \n\t" | |||||
| "movq %%mm0, -8(%0) \n\t" | |||||
| "movq -8(%0, %2), %%mm1 \n\t" | |||||
| "punpckhbw %%mm1, %%mm1 \n\t" | |||||
| "punpckhwd %%mm1, %%mm1 \n\t" | |||||
| "punpckhdq %%mm1, %%mm1 \n\t" | |||||
| "movq %%mm1, (%0, %2) \n\t" | |||||
| "add %1, %0 \n\t" | |||||
| "cmp %3, %0 \n\t" | |||||
| " jb 1b \n\t" | |||||
| : "+r" (ptr) | |||||
| : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) | |||||
| ); | |||||
| } | } | ||||
| else | else | ||||
| { | { | ||||
| asm volatile( | |||||
| "1: \n\t" | |||||
| "movd (%0), %%mm0 \n\t" | |||||
| "punpcklbw %%mm0, %%mm0 \n\t" | |||||
| "punpcklwd %%mm0, %%mm0 \n\t" | |||||
| "punpckldq %%mm0, %%mm0 \n\t" | |||||
| "movq %%mm0, -8(%0) \n\t" | |||||
| "movq %%mm0, -16(%0) \n\t" | |||||
| "movq -8(%0, %2), %%mm1 \n\t" | |||||
| "punpckhbw %%mm1, %%mm1 \n\t" | |||||
| "punpckhwd %%mm1, %%mm1 \n\t" | |||||
| "punpckhdq %%mm1, %%mm1 \n\t" | |||||
| "movq %%mm1, (%0, %2) \n\t" | |||||
| "movq %%mm1, 8(%0, %2) \n\t" | |||||
| "add %1, %0 \n\t" | |||||
| "cmp %3, %0 \n\t" | |||||
| " jb 1b \n\t" | |||||
| : "+r" (ptr) | |||||
| : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) | |||||
| ); | |||||
| asm volatile( | |||||
| "1: \n\t" | |||||
| "movd (%0), %%mm0 \n\t" | |||||
| "punpcklbw %%mm0, %%mm0 \n\t" | |||||
| "punpcklwd %%mm0, %%mm0 \n\t" | |||||
| "punpckldq %%mm0, %%mm0 \n\t" | |||||
| "movq %%mm0, -8(%0) \n\t" | |||||
| "movq %%mm0, -16(%0) \n\t" | |||||
| "movq -8(%0, %2), %%mm1 \n\t" | |||||
| "punpckhbw %%mm1, %%mm1 \n\t" | |||||
| "punpckhwd %%mm1, %%mm1 \n\t" | |||||
| "punpckhdq %%mm1, %%mm1 \n\t" | |||||
| "movq %%mm1, (%0, %2) \n\t" | |||||
| "movq %%mm1, 8(%0, %2) \n\t" | |||||
| "add %1, %0 \n\t" | |||||
| "cmp %3, %0 \n\t" | |||||
| " jb 1b \n\t" | |||||
| : "+r" (ptr) | |||||
| : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) | |||||
| ); | |||||
| } | } | ||||
| for(i=0;i<w;i+=4) { | for(i=0;i<w;i+=4) { | ||||
| /* top and bottom (and hopefully also the corners) */ | /* top and bottom (and hopefully also the corners) */ | ||||
| ptr= buf - (i + 1) * wrap - w; | |||||
| asm volatile( | |||||
| "1: \n\t" | |||||
| "movq (%1, %0), %%mm0 \n\t" | |||||
| "movq %%mm0, (%0) \n\t" | |||||
| "movq %%mm0, (%0, %2) \n\t" | |||||
| "movq %%mm0, (%0, %2, 2) \n\t" | |||||
| "movq %%mm0, (%0, %3) \n\t" | |||||
| "add $8, %0 \n\t" | |||||
| "cmp %4, %0 \n\t" | |||||
| " jb 1b \n\t" | |||||
| : "+r" (ptr) | |||||
| : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w) | |||||
| ); | |||||
| ptr= last_line + (i + 1) * wrap - w; | |||||
| asm volatile( | |||||
| "1: \n\t" | |||||
| "movq (%1, %0), %%mm0 \n\t" | |||||
| "movq %%mm0, (%0) \n\t" | |||||
| "movq %%mm0, (%0, %2) \n\t" | |||||
| "movq %%mm0, (%0, %2, 2) \n\t" | |||||
| "movq %%mm0, (%0, %3) \n\t" | |||||
| "add $8, %0 \n\t" | |||||
| "cmp %4, %0 \n\t" | |||||
| " jb 1b \n\t" | |||||
| : "+r" (ptr) | |||||
| : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w) | |||||
| ); | |||||
| ptr= buf - (i + 1) * wrap - w; | |||||
| asm volatile( | |||||
| "1: \n\t" | |||||
| "movq (%1, %0), %%mm0 \n\t" | |||||
| "movq %%mm0, (%0) \n\t" | |||||
| "movq %%mm0, (%0, %2) \n\t" | |||||
| "movq %%mm0, (%0, %2, 2) \n\t" | |||||
| "movq %%mm0, (%0, %3) \n\t" | |||||
| "add $8, %0 \n\t" | |||||
| "cmp %4, %0 \n\t" | |||||
| " jb 1b \n\t" | |||||
| : "+r" (ptr) | |||||
| : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w) | |||||
| ); | |||||
| ptr= last_line + (i + 1) * wrap - w; | |||||
| asm volatile( | |||||
| "1: \n\t" | |||||
| "movq (%1, %0), %%mm0 \n\t" | |||||
| "movq %%mm0, (%0) \n\t" | |||||
| "movq %%mm0, (%0, %2) \n\t" | |||||
| "movq %%mm0, (%0, %2, 2) \n\t" | |||||
| "movq %%mm0, (%0, %3) \n\t" | |||||
| "add $8, %0 \n\t" | |||||
| "cmp %4, %0 \n\t" | |||||
| " jb 1b \n\t" | |||||
| : "+r" (ptr) | |||||
| : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w) | |||||
| ); | |||||
| } | } | ||||
| } | } | ||||
| @@ -572,47 +572,47 @@ static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){ | |||||
| s->dct_count[intra]++; | s->dct_count[intra]++; | ||||
| asm volatile( | asm volatile( | ||||
| "pxor %%mm7, %%mm7 \n\t" | |||||
| "1: \n\t" | |||||
| "pxor %%mm0, %%mm0 \n\t" | |||||
| "pxor %%mm1, %%mm1 \n\t" | |||||
| "movq (%0), %%mm2 \n\t" | |||||
| "movq 8(%0), %%mm3 \n\t" | |||||
| "pcmpgtw %%mm2, %%mm0 \n\t" | |||||
| "pcmpgtw %%mm3, %%mm1 \n\t" | |||||
| "pxor %%mm0, %%mm2 \n\t" | |||||
| "pxor %%mm1, %%mm3 \n\t" | |||||
| "psubw %%mm0, %%mm2 \n\t" | |||||
| "psubw %%mm1, %%mm3 \n\t" | |||||
| "movq %%mm2, %%mm4 \n\t" | |||||
| "movq %%mm3, %%mm5 \n\t" | |||||
| "psubusw (%2), %%mm2 \n\t" | |||||
| "psubusw 8(%2), %%mm3 \n\t" | |||||
| "pxor %%mm0, %%mm2 \n\t" | |||||
| "pxor %%mm1, %%mm3 \n\t" | |||||
| "psubw %%mm0, %%mm2 \n\t" | |||||
| "psubw %%mm1, %%mm3 \n\t" | |||||
| "movq %%mm2, (%0) \n\t" | |||||
| "movq %%mm3, 8(%0) \n\t" | |||||
| "movq %%mm4, %%mm2 \n\t" | |||||
| "movq %%mm5, %%mm3 \n\t" | |||||
| "punpcklwd %%mm7, %%mm4 \n\t" | |||||
| "punpckhwd %%mm7, %%mm2 \n\t" | |||||
| "punpcklwd %%mm7, %%mm5 \n\t" | |||||
| "punpckhwd %%mm7, %%mm3 \n\t" | |||||
| "paddd (%1), %%mm4 \n\t" | |||||
| "paddd 8(%1), %%mm2 \n\t" | |||||
| "paddd 16(%1), %%mm5 \n\t" | |||||
| "paddd 24(%1), %%mm3 \n\t" | |||||
| "movq %%mm4, (%1) \n\t" | |||||
| "movq %%mm2, 8(%1) \n\t" | |||||
| "movq %%mm5, 16(%1) \n\t" | |||||
| "movq %%mm3, 24(%1) \n\t" | |||||
| "add $16, %0 \n\t" | |||||
| "add $32, %1 \n\t" | |||||
| "add $16, %2 \n\t" | |||||
| "cmp %3, %0 \n\t" | |||||
| " jb 1b \n\t" | |||||
| "pxor %%mm7, %%mm7 \n\t" | |||||
| "1: \n\t" | |||||
| "pxor %%mm0, %%mm0 \n\t" | |||||
| "pxor %%mm1, %%mm1 \n\t" | |||||
| "movq (%0), %%mm2 \n\t" | |||||
| "movq 8(%0), %%mm3 \n\t" | |||||
| "pcmpgtw %%mm2, %%mm0 \n\t" | |||||
| "pcmpgtw %%mm3, %%mm1 \n\t" | |||||
| "pxor %%mm0, %%mm2 \n\t" | |||||
| "pxor %%mm1, %%mm3 \n\t" | |||||
| "psubw %%mm0, %%mm2 \n\t" | |||||
| "psubw %%mm1, %%mm3 \n\t" | |||||
| "movq %%mm2, %%mm4 \n\t" | |||||
| "movq %%mm3, %%mm5 \n\t" | |||||
| "psubusw (%2), %%mm2 \n\t" | |||||
| "psubusw 8(%2), %%mm3 \n\t" | |||||
| "pxor %%mm0, %%mm2 \n\t" | |||||
| "pxor %%mm1, %%mm3 \n\t" | |||||
| "psubw %%mm0, %%mm2 \n\t" | |||||
| "psubw %%mm1, %%mm3 \n\t" | |||||
| "movq %%mm2, (%0) \n\t" | |||||
| "movq %%mm3, 8(%0) \n\t" | |||||
| "movq %%mm4, %%mm2 \n\t" | |||||
| "movq %%mm5, %%mm3 \n\t" | |||||
| "punpcklwd %%mm7, %%mm4 \n\t" | |||||
| "punpckhwd %%mm7, %%mm2 \n\t" | |||||
| "punpcklwd %%mm7, %%mm5 \n\t" | |||||
| "punpckhwd %%mm7, %%mm3 \n\t" | |||||
| "paddd (%1), %%mm4 \n\t" | |||||
| "paddd 8(%1), %%mm2 \n\t" | |||||
| "paddd 16(%1), %%mm5 \n\t" | |||||
| "paddd 24(%1), %%mm3 \n\t" | |||||
| "movq %%mm4, (%1) \n\t" | |||||
| "movq %%mm2, 8(%1) \n\t" | |||||
| "movq %%mm5, 16(%1) \n\t" | |||||
| "movq %%mm3, 24(%1) \n\t" | |||||
| "add $16, %0 \n\t" | |||||
| "add $32, %1 \n\t" | |||||
| "add $16, %2 \n\t" | |||||
| "cmp %3, %0 \n\t" | |||||
| " jb 1b \n\t" | |||||
| : "+r" (block), "+r" (sum), "+r" (offset) | : "+r" (block), "+r" (sum), "+r" (offset) | ||||
| : "r"(block+64) | : "r"(block+64) | ||||
| ); | ); | ||||
| @@ -626,47 +626,47 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){ | |||||
| s->dct_count[intra]++; | s->dct_count[intra]++; | ||||
| asm volatile( | asm volatile( | ||||
| "pxor %%xmm7, %%xmm7 \n\t" | |||||
| "1: \n\t" | |||||
| "pxor %%xmm0, %%xmm0 \n\t" | |||||
| "pxor %%xmm1, %%xmm1 \n\t" | |||||
| "movdqa (%0), %%xmm2 \n\t" | |||||
| "movdqa 16(%0), %%xmm3 \n\t" | |||||
| "pcmpgtw %%xmm2, %%xmm0 \n\t" | |||||
| "pcmpgtw %%xmm3, %%xmm1 \n\t" | |||||
| "pxor %%xmm0, %%xmm2 \n\t" | |||||
| "pxor %%xmm1, %%xmm3 \n\t" | |||||
| "psubw %%xmm0, %%xmm2 \n\t" | |||||
| "psubw %%xmm1, %%xmm3 \n\t" | |||||
| "movdqa %%xmm2, %%xmm4 \n\t" | |||||
| "movdqa %%xmm3, %%xmm5 \n\t" | |||||
| "psubusw (%2), %%xmm2 \n\t" | |||||
| "psubusw 16(%2), %%xmm3 \n\t" | |||||
| "pxor %%xmm0, %%xmm2 \n\t" | |||||
| "pxor %%xmm1, %%xmm3 \n\t" | |||||
| "psubw %%xmm0, %%xmm2 \n\t" | |||||
| "psubw %%xmm1, %%xmm3 \n\t" | |||||
| "movdqa %%xmm2, (%0) \n\t" | |||||
| "movdqa %%xmm3, 16(%0) \n\t" | |||||
| "movdqa %%xmm4, %%xmm6 \n\t" | |||||
| "movdqa %%xmm5, %%xmm0 \n\t" | |||||
| "punpcklwd %%xmm7, %%xmm4 \n\t" | |||||
| "punpckhwd %%xmm7, %%xmm6 \n\t" | |||||
| "punpcklwd %%xmm7, %%xmm5 \n\t" | |||||
| "punpckhwd %%xmm7, %%xmm0 \n\t" | |||||
| "paddd (%1), %%xmm4 \n\t" | |||||
| "paddd 16(%1), %%xmm6 \n\t" | |||||
| "paddd 32(%1), %%xmm5 \n\t" | |||||
| "paddd 48(%1), %%xmm0 \n\t" | |||||
| "movdqa %%xmm4, (%1) \n\t" | |||||
| "movdqa %%xmm6, 16(%1) \n\t" | |||||
| "movdqa %%xmm5, 32(%1) \n\t" | |||||
| "movdqa %%xmm0, 48(%1) \n\t" | |||||
| "add $32, %0 \n\t" | |||||
| "add $64, %1 \n\t" | |||||
| "add $32, %2 \n\t" | |||||
| "cmp %3, %0 \n\t" | |||||
| " jb 1b \n\t" | |||||
| "pxor %%xmm7, %%xmm7 \n\t" | |||||
| "1: \n\t" | |||||
| "pxor %%xmm0, %%xmm0 \n\t" | |||||
| "pxor %%xmm1, %%xmm1 \n\t" | |||||
| "movdqa (%0), %%xmm2 \n\t" | |||||
| "movdqa 16(%0), %%xmm3 \n\t" | |||||
| "pcmpgtw %%xmm2, %%xmm0 \n\t" | |||||
| "pcmpgtw %%xmm3, %%xmm1 \n\t" | |||||
| "pxor %%xmm0, %%xmm2 \n\t" | |||||
| "pxor %%xmm1, %%xmm3 \n\t" | |||||
| "psubw %%xmm0, %%xmm2 \n\t" | |||||
| "psubw %%xmm1, %%xmm3 \n\t" | |||||
| "movdqa %%xmm2, %%xmm4 \n\t" | |||||
| "movdqa %%xmm3, %%xmm5 \n\t" | |||||
| "psubusw (%2), %%xmm2 \n\t" | |||||
| "psubusw 16(%2), %%xmm3 \n\t" | |||||
| "pxor %%xmm0, %%xmm2 \n\t" | |||||
| "pxor %%xmm1, %%xmm3 \n\t" | |||||
| "psubw %%xmm0, %%xmm2 \n\t" | |||||
| "psubw %%xmm1, %%xmm3 \n\t" | |||||
| "movdqa %%xmm2, (%0) \n\t" | |||||
| "movdqa %%xmm3, 16(%0) \n\t" | |||||
| "movdqa %%xmm4, %%xmm6 \n\t" | |||||
| "movdqa %%xmm5, %%xmm0 \n\t" | |||||
| "punpcklwd %%xmm7, %%xmm4 \n\t" | |||||
| "punpckhwd %%xmm7, %%xmm6 \n\t" | |||||
| "punpcklwd %%xmm7, %%xmm5 \n\t" | |||||
| "punpckhwd %%xmm7, %%xmm0 \n\t" | |||||
| "paddd (%1), %%xmm4 \n\t" | |||||
| "paddd 16(%1), %%xmm6 \n\t" | |||||
| "paddd 32(%1), %%xmm5 \n\t" | |||||
| "paddd 48(%1), %%xmm0 \n\t" | |||||
| "movdqa %%xmm4, (%1) \n\t" | |||||
| "movdqa %%xmm6, 16(%1) \n\t" | |||||
| "movdqa %%xmm5, 32(%1) \n\t" | |||||
| "movdqa %%xmm0, 48(%1) \n\t" | |||||
| "add $32, %0 \n\t" | |||||
| "add $64, %1 \n\t" | |||||
| "add $32, %2 \n\t" | |||||
| "cmp %3, %0 \n\t" | |||||
| " jb 1b \n\t" | |||||
| : "+r" (block), "+r" (sum), "+r" (offset) | : "+r" (block), "+r" (sum), "+r" (offset) | ||||
| : "r"(block+64) | : "r"(block+64) | ||||
| ); | ); | ||||
| @@ -705,10 +705,10 @@ void MPV_common_init_mmx(MpegEncContext *s) | |||||
| draw_edges = draw_edges_mmx; | draw_edges = draw_edges_mmx; | ||||
| if (mm_flags & MM_SSE2) { | if (mm_flags & MM_SSE2) { | ||||
| s->denoise_dct= denoise_dct_sse2; | |||||
| } else { | |||||
| s->denoise_dct= denoise_dct_mmx; | |||||
| } | |||||
| s->denoise_dct= denoise_dct_sse2; | |||||
| } else { | |||||
| s->denoise_dct= denoise_dct_mmx; | |||||
| } | |||||
| if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ | if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ | ||||
| if(mm_flags & MM_SSE2){ | if(mm_flags & MM_SSE2){ | ||||
| @@ -21,26 +21,26 @@ | |||||
| #undef PMAXW | #undef PMAXW | ||||
| #ifdef HAVE_MMX2 | #ifdef HAVE_MMX2 | ||||
| #define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t" | #define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t" | ||||
| #define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t" | |||||
| #define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t" | |||||
| #define PMAX(a,b) \ | #define PMAX(a,b) \ | ||||
| "pshufw $0x0E," #a ", " #b " \n\t"\ | |||||
| PMAXW(b, a)\ | |||||
| "pshufw $0x01," #a ", " #b " \n\t"\ | |||||
| PMAXW(b, a) | |||||
| "pshufw $0x0E," #a ", " #b " \n\t"\ | |||||
| PMAXW(b, a)\ | |||||
| "pshufw $0x01," #a ", " #b " \n\t"\ | |||||
| PMAXW(b, a) | |||||
| #else | #else | ||||
| #define SPREADW(a) \ | #define SPREADW(a) \ | ||||
| "punpcklwd " #a ", " #a " \n\t"\ | |||||
| "punpcklwd " #a ", " #a " \n\t" | |||||
| "punpcklwd " #a ", " #a " \n\t"\ | |||||
| "punpcklwd " #a ", " #a " \n\t" | |||||
| #define PMAXW(a,b) \ | #define PMAXW(a,b) \ | ||||
| "psubusw " #a ", " #b " \n\t"\ | |||||
| "paddw " #a ", " #b " \n\t" | |||||
| "psubusw " #a ", " #b " \n\t"\ | |||||
| "paddw " #a ", " #b " \n\t" | |||||
| #define PMAX(a,b) \ | #define PMAX(a,b) \ | ||||
| "movq " #a ", " #b " \n\t"\ | |||||
| "psrlq $32, " #a " \n\t"\ | |||||
| PMAXW(b, a)\ | |||||
| "movq " #a ", " #b " \n\t"\ | |||||
| "psrlq $16, " #a " \n\t"\ | |||||
| PMAXW(b, a) | |||||
| "movq " #a ", " #b " \n\t"\ | |||||
| "psrlq $32, " #a " \n\t"\ | |||||
| PMAXW(b, a)\ | |||||
| "movq " #a ", " #b " \n\t"\ | |||||
| "psrlq $16, " #a " \n\t"\ | |||||
| PMAXW(b, a) | |||||
| #endif | #endif | ||||
| @@ -71,18 +71,18 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | |||||
| if (!s->h263_aic) { | if (!s->h263_aic) { | ||||
| #if 1 | #if 1 | ||||
| asm volatile ( | asm volatile ( | ||||
| "mul %%ecx \n\t" | |||||
| : "=d" (level), "=a"(dummy) | |||||
| : "a" ((block[0]>>2) + q), "c" (inverse[q<<1]) | |||||
| "mul %%ecx \n\t" | |||||
| : "=d" (level), "=a"(dummy) | |||||
| : "a" ((block[0]>>2) + q), "c" (inverse[q<<1]) | |||||
| ); | ); | ||||
| #else | #else | ||||
| asm volatile ( | asm volatile ( | ||||
| "xorl %%edx, %%edx \n\t" | |||||
| "divw %%cx \n\t" | |||||
| "movzwl %%ax, %%eax \n\t" | |||||
| : "=a" (level) | |||||
| : "a" ((block[0]>>2) + q), "c" (q<<1) | |||||
| : "%edx" | |||||
| "xorl %%edx, %%edx \n\t" | |||||
| "divw %%cx \n\t" | |||||
| "movzwl %%ax, %%eax \n\t" | |||||
| : "=a" (level) | |||||
| : "a" ((block[0]>>2) + q), "c" (q<<1) | |||||
| : "%edx" | |||||
| ); | ); | ||||
| #endif | #endif | ||||
| } else | } else | ||||
| @@ -103,94 +103,94 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | |||||
| if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){ | if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){ | ||||
| asm volatile( | asm volatile( | ||||
| "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 | |||||
| "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 | |||||
| SPREADW(%%mm3) | SPREADW(%%mm3) | ||||
| "pxor %%mm7, %%mm7 \n\t" // 0 | |||||
| "pxor %%mm4, %%mm4 \n\t" // 0 | |||||
| "movq (%2), %%mm5 \n\t" // qmat[0] | |||||
| "pxor %%mm6, %%mm6 \n\t" | |||||
| "psubw (%3), %%mm6 \n\t" // -bias[0] | |||||
| "mov $-128, %%"REG_a" \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "pxor %%mm1, %%mm1 \n\t" // 0 | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] | |||||
| "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 | |||||
| "pxor %%mm1, %%mm0 \n\t" | |||||
| "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) | |||||
| "psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] | |||||
| "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 | |||||
| "por %%mm0, %%mm4 \n\t" | |||||
| "pxor %%mm1, %%mm0 \n\t" | |||||
| "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) | |||||
| "movq %%mm0, (%5, %%"REG_a") \n\t" | |||||
| "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 | |||||
| "movq (%4, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq %%mm7, (%1, %%"REG_a") \n\t" // 0 | |||||
| "pandn %%mm1, %%mm0 \n\t" | |||||
| PMAXW(%%mm0, %%mm3) | |||||
| "add $8, %%"REG_a" \n\t" | |||||
| " js 1b \n\t" | |||||
| PMAX(%%mm3, %%mm0) | |||||
| "movd %%mm3, %%"REG_a" \n\t" | |||||
| "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 | |||||
| : "+a" (last_non_zero_p1) | |||||
| "pxor %%mm7, %%mm7 \n\t" // 0 | |||||
| "pxor %%mm4, %%mm4 \n\t" // 0 | |||||
| "movq (%2), %%mm5 \n\t" // qmat[0] | |||||
| "pxor %%mm6, %%mm6 \n\t" | |||||
| "psubw (%3), %%mm6 \n\t" // -bias[0] | |||||
| "mov $-128, %%"REG_a" \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "pxor %%mm1, %%mm1 \n\t" // 0 | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] | |||||
| "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 | |||||
| "pxor %%mm1, %%mm0 \n\t" | |||||
| "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) | |||||
| "psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] | |||||
| "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 | |||||
| "por %%mm0, %%mm4 \n\t" | |||||
| "pxor %%mm1, %%mm0 \n\t" | |||||
| "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) | |||||
| "movq %%mm0, (%5, %%"REG_a") \n\t" | |||||
| "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 | |||||
| "movq (%4, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq %%mm7, (%1, %%"REG_a") \n\t" // 0 | |||||
| "pandn %%mm1, %%mm0 \n\t" | |||||
| PMAXW(%%mm0, %%mm3) | |||||
| "add $8, %%"REG_a" \n\t" | |||||
| " js 1b \n\t" | |||||
| PMAX(%%mm3, %%mm0) | |||||
| "movd %%mm3, %%"REG_a" \n\t" | |||||
| "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 | |||||
| : "+a" (last_non_zero_p1) | |||||
| : "r" (block+64), "r" (qmat), "r" (bias), | : "r" (block+64), "r" (qmat), "r" (bias), | ||||
| "r" (inv_zigzag_direct16+64), "r" (temp_block+64) | "r" (inv_zigzag_direct16+64), "r" (temp_block+64) | ||||
| ); | ); | ||||
| // note the asm is split cuz gcc doesnt like that many operands ... | // note the asm is split cuz gcc doesnt like that many operands ... | ||||
| asm volatile( | asm volatile( | ||||
| "movd %1, %%mm1 \n\t" // max_qcoeff | |||||
| SPREADW(%%mm1) | |||||
| "psubusw %%mm1, %%mm4 \n\t" | |||||
| "packuswb %%mm4, %%mm4 \n\t" | |||||
| "movd %%mm4, %0 \n\t" // *overflow | |||||
| "movd %1, %%mm1 \n\t" // max_qcoeff | |||||
| SPREADW(%%mm1) | |||||
| "psubusw %%mm1, %%mm4 \n\t" | |||||
| "packuswb %%mm4, %%mm4 \n\t" | |||||
| "movd %%mm4, %0 \n\t" // *overflow | |||||
| : "=g" (*overflow) | : "=g" (*overflow) | ||||
| : "g" (s->max_qcoeff) | : "g" (s->max_qcoeff) | ||||
| ); | ); | ||||
| }else{ // FMT_H263 | }else{ // FMT_H263 | ||||
| asm volatile( | asm volatile( | ||||
| "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 | |||||
| "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 | |||||
| SPREADW(%%mm3) | SPREADW(%%mm3) | ||||
| "pxor %%mm7, %%mm7 \n\t" // 0 | |||||
| "pxor %%mm4, %%mm4 \n\t" // 0 | |||||
| "mov $-128, %%"REG_a" \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "pxor %%mm1, %%mm1 \n\t" // 0 | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] | |||||
| "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 | |||||
| "pxor %%mm1, %%mm0 \n\t" | |||||
| "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) | |||||
| "movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0] | |||||
| "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] | |||||
| "movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i] | |||||
| "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 | |||||
| "por %%mm0, %%mm4 \n\t" | |||||
| "pxor %%mm1, %%mm0 \n\t" | |||||
| "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) | |||||
| "movq %%mm0, (%5, %%"REG_a") \n\t" | |||||
| "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 | |||||
| "movq (%4, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq %%mm7, (%1, %%"REG_a") \n\t" // 0 | |||||
| "pandn %%mm1, %%mm0 \n\t" | |||||
| PMAXW(%%mm0, %%mm3) | |||||
| "add $8, %%"REG_a" \n\t" | |||||
| " js 1b \n\t" | |||||
| PMAX(%%mm3, %%mm0) | |||||
| "movd %%mm3, %%"REG_a" \n\t" | |||||
| "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 | |||||
| : "+a" (last_non_zero_p1) | |||||
| "pxor %%mm7, %%mm7 \n\t" // 0 | |||||
| "pxor %%mm4, %%mm4 \n\t" // 0 | |||||
| "mov $-128, %%"REG_a" \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| "pxor %%mm1, %%mm1 \n\t" // 0 | |||||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] | |||||
| "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 | |||||
| "pxor %%mm1, %%mm0 \n\t" | |||||
| "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) | |||||
| "movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0] | |||||
| "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] | |||||
| "movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i] | |||||
| "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 | |||||
| "por %%mm0, %%mm4 \n\t" | |||||
| "pxor %%mm1, %%mm0 \n\t" | |||||
| "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) | |||||
| "movq %%mm0, (%5, %%"REG_a") \n\t" | |||||
| "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 | |||||
| "movq (%4, %%"REG_a"), %%mm1 \n\t" | |||||
| "movq %%mm7, (%1, %%"REG_a") \n\t" // 0 | |||||
| "pandn %%mm1, %%mm0 \n\t" | |||||
| PMAXW(%%mm0, %%mm3) | |||||
| "add $8, %%"REG_a" \n\t" | |||||
| " js 1b \n\t" | |||||
| PMAX(%%mm3, %%mm0) | |||||
| "movd %%mm3, %%"REG_a" \n\t" | |||||
| "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 | |||||
| : "+a" (last_non_zero_p1) | |||||
| : "r" (block+64), "r" (qmat+64), "r" (bias+64), | : "r" (block+64), "r" (qmat+64), "r" (bias+64), | ||||
| "r" (inv_zigzag_direct16+64), "r" (temp_block+64) | "r" (inv_zigzag_direct16+64), "r" (temp_block+64) | ||||
| ); | ); | ||||
| // note the asm is split cuz gcc doesnt like that many operands ... | // note the asm is split cuz gcc doesnt like that many operands ... | ||||
| asm volatile( | asm volatile( | ||||
| "movd %1, %%mm1 \n\t" // max_qcoeff | |||||
| SPREADW(%%mm1) | |||||
| "psubusw %%mm1, %%mm4 \n\t" | |||||
| "packuswb %%mm4, %%mm4 \n\t" | |||||
| "movd %%mm4, %0 \n\t" // *overflow | |||||
| "movd %1, %%mm1 \n\t" // max_qcoeff | |||||
| SPREADW(%%mm1) | |||||
| "psubusw %%mm1, %%mm4 \n\t" | |||||
| "packuswb %%mm4, %%mm4 \n\t" | |||||
| "movd %%mm4, %0 \n\t" // *overflow | |||||
| : "=g" (*overflow) | : "=g" (*overflow) | ||||
| : "g" (s->max_qcoeff) | : "g" (s->max_qcoeff) | ||||
| ); | ); | ||||
| @@ -257,13 +257,13 @@ enum PixelFormat avcodec_get_pix_fmt(const char* name) | |||||
| for (i=0; i < PIX_FMT_NB; i++) | for (i=0; i < PIX_FMT_NB; i++) | ||||
| if (!strcmp(pix_fmt_info[i].name, name)) | if (!strcmp(pix_fmt_info[i].name, name)) | ||||
| break; | |||||
| break; | |||||
| return i; | return i; | ||||
| } | } | ||||
| /* Picture field are filled with 'ptr' addresses. Also return size */ | /* Picture field are filled with 'ptr' addresses. Also return size */ | ||||
| int avpicture_fill(AVPicture *picture, uint8_t *ptr, | int avpicture_fill(AVPicture *picture, uint8_t *ptr, | ||||
| int pix_fmt, int width, int height) | |||||
| int pix_fmt, int width, int height) | |||||
| { | { | ||||
| int size, w2, h2, size2; | int size, w2, h2, size2; | ||||
| PixFmtInfo *pinfo; | PixFmtInfo *pinfo; | ||||
| @@ -373,36 +373,36 @@ int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height, | |||||
| pix_fmt == PIX_FMT_RGB565 || | pix_fmt == PIX_FMT_RGB565 || | ||||
| pix_fmt == PIX_FMT_RGB555) | pix_fmt == PIX_FMT_RGB555) | ||||
| w = width * 2; | w = width * 2; | ||||
| else if (pix_fmt == PIX_FMT_UYVY411) | |||||
| w = width + width/2; | |||||
| else if (pix_fmt == PIX_FMT_PAL8) | |||||
| w = width; | |||||
| else | |||||
| w = width * (pf->depth * pf->nb_channels / 8); | |||||
| data_planes = 1; | |||||
| h = height; | |||||
| else if (pix_fmt == PIX_FMT_UYVY411) | |||||
| w = width + width/2; | |||||
| else if (pix_fmt == PIX_FMT_PAL8) | |||||
| w = width; | |||||
| else | |||||
| w = width * (pf->depth * pf->nb_channels / 8); | |||||
| data_planes = 1; | |||||
| h = height; | |||||
| } else { | } else { | ||||
| data_planes = pf->nb_channels; | data_planes = pf->nb_channels; | ||||
| w = (width*pf->depth + 7)/8; | |||||
| h = height; | |||||
| w = (width*pf->depth + 7)/8; | |||||
| h = height; | |||||
| } | } | ||||
| for (i=0; i<data_planes; i++) { | for (i=0; i<data_planes; i++) { | ||||
| if (i == 1) { | if (i == 1) { | ||||
| w = width >> pf->x_chroma_shift; | |||||
| h = height >> pf->y_chroma_shift; | |||||
| } | |||||
| w = width >> pf->x_chroma_shift; | |||||
| h = height >> pf->y_chroma_shift; | |||||
| } | |||||
| s = src->data[i]; | s = src->data[i]; | ||||
| for(j=0; j<h; j++) { | |||||
| memcpy(dest, s, w); | |||||
| dest += w; | |||||
| s += src->linesize[i]; | |||||
| } | |||||
| for(j=0; j<h; j++) { | |||||
| memcpy(dest, s, w); | |||||
| dest += w; | |||||
| s += src->linesize[i]; | |||||
| } | |||||
| } | } | ||||
| if (pf->pixel_type == FF_PIXEL_PALETTE) | if (pf->pixel_type == FF_PIXEL_PALETTE) | ||||
| memcpy((unsigned char *)(((size_t)dest + 3) & ~3), src->data[1], 256 * 4); | |||||
| memcpy((unsigned char *)(((size_t)dest + 3) & ~3), src->data[1], 256 * 4); | |||||
| return size; | return size; | ||||
| } | } | ||||
| @@ -486,9 +486,9 @@ static int avg_bits_per_pixel(int pix_fmt) | |||||
| case PIX_FMT_RGB555: | case PIX_FMT_RGB555: | ||||
| bits = 16; | bits = 16; | ||||
| break; | break; | ||||
| case PIX_FMT_UYVY411: | |||||
| bits = 12; | |||||
| break; | |||||
| case PIX_FMT_UYVY411: | |||||
| bits = 12; | |||||
| break; | |||||
| default: | default: | ||||
| bits = pf->depth * pf->nb_channels; | bits = pf->depth * pf->nb_channels; | ||||
| break; | break; | ||||
| @@ -604,9 +604,9 @@ void img_copy(AVPicture *dst, const AVPicture *src, | |||||
| case PIX_FMT_RGB555: | case PIX_FMT_RGB555: | ||||
| bits = 16; | bits = 16; | ||||
| break; | break; | ||||
| case PIX_FMT_UYVY411: | |||||
| bits = 12; | |||||
| break; | |||||
| case PIX_FMT_UYVY411: | |||||
| bits = 12; | |||||
| break; | |||||
| default: | default: | ||||
| bits = pf->depth * pf->nb_channels; | bits = pf->depth * pf->nb_channels; | ||||
| break; | break; | ||||
| @@ -910,11 +910,11 @@ static void uyvy411_to_yuv411p(AVPicture *dst, const AVPicture *src, | |||||
| cr = cr1; | cr = cr1; | ||||
| for(w = width; w >= 4; w -= 4) { | for(w = width; w >= 4; w -= 4) { | ||||
| cb[0] = p[0]; | cb[0] = p[0]; | ||||
| lum[0] = p[1]; | |||||
| lum[0] = p[1]; | |||||
| lum[1] = p[2]; | lum[1] = p[2]; | ||||
| cr[0] = p[3]; | cr[0] = p[3]; | ||||
| lum[2] = p[4]; | |||||
| lum[3] = p[5]; | |||||
| lum[2] = p[4]; | |||||
| lum[3] = p[5]; | |||||
| p += 6; | p += 6; | ||||
| lum += 4; | lum += 4; | ||||
| cb++; | cb++; | ||||
| @@ -996,7 +996,7 @@ static void yuv420p_to_uyvy422(AVPicture *dst, const AVPicture *src, | |||||
| #define SCALEBITS 10 | #define SCALEBITS 10 | ||||
| #define ONE_HALF (1 << (SCALEBITS - 1)) | #define ONE_HALF (1 << (SCALEBITS - 1)) | ||||
| #define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5)) | |||||
| #define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5)) | |||||
| #define YUV_TO_RGB1_CCIR(cb1, cr1)\ | #define YUV_TO_RGB1_CCIR(cb1, cr1)\ | ||||
| {\ | {\ | ||||
| @@ -1046,7 +1046,7 @@ static void yuv420p_to_uyvy422(AVPicture *dst, const AVPicture *src, | |||||
| static inline int C_JPEG_TO_CCIR(int y) { | static inline int C_JPEG_TO_CCIR(int y) { | ||||
| y = (((y - 128) * FIX(112.0/127.0) + (ONE_HALF + (128 << SCALEBITS))) >> SCALEBITS); | y = (((y - 128) * FIX(112.0/127.0) + (ONE_HALF + (128 << SCALEBITS))) >> SCALEBITS); | ||||
| if (y < 16) | if (y < 16) | ||||
| y = 16; | |||||
| y = 16; | |||||
| return y; | return y; | ||||
| } | } | ||||
| @@ -1681,7 +1681,7 @@ static void gray_to_monoblack(AVPicture *dst, const AVPicture *src, | |||||
| typedef struct ConvertEntry { | typedef struct ConvertEntry { | ||||
| void (*convert)(AVPicture *dst, | void (*convert)(AVPicture *dst, | ||||
| const AVPicture *src, int width, int height); | |||||
| const AVPicture *src, int width, int height); | |||||
| } ConvertEntry; | } ConvertEntry; | ||||
| /* Add each new convertion function in this table. In order to be able | /* Add each new convertion function in this table. In order to be able | ||||
| @@ -1721,7 +1721,7 @@ static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = { | |||||
| [PIX_FMT_RGBA32] = { | [PIX_FMT_RGBA32] = { | ||||
| .convert = yuv420p_to_rgba32 | .convert = yuv420p_to_rgba32 | ||||
| }, | }, | ||||
| [PIX_FMT_UYVY422] = { | |||||
| [PIX_FMT_UYVY422] = { | |||||
| .convert = yuv420p_to_uyvy422, | .convert = yuv420p_to_uyvy422, | ||||
| }, | }, | ||||
| }, | }, | ||||
| @@ -2224,7 +2224,7 @@ static int get_alpha_info_pal8(const AVPicture *src, int width, int height) | |||||
| * @return ored mask of FF_ALPHA_xxx constants | * @return ored mask of FF_ALPHA_xxx constants | ||||
| */ | */ | ||||
| int img_get_alpha_info(const AVPicture *src, | int img_get_alpha_info(const AVPicture *src, | ||||
| int pix_fmt, int width, int height) | |||||
| int pix_fmt, int width, int height) | |||||
| { | { | ||||
| PixFmtInfo *pf = &pix_fmt_info[pix_fmt]; | PixFmtInfo *pf = &pix_fmt_info[pix_fmt]; | ||||
| int ret; | int ret; | ||||
| @@ -2300,10 +2300,10 @@ int img_get_alpha_info(const AVPicture *src, | |||||
| /* filter parameters: [-1 4 2 4 -1] // 8 */ | /* filter parameters: [-1 4 2 4 -1] // 8 */ | ||||
| static void deinterlace_line(uint8_t *dst, | static void deinterlace_line(uint8_t *dst, | ||||
| const uint8_t *lum_m4, const uint8_t *lum_m3, | |||||
| const uint8_t *lum_m2, const uint8_t *lum_m1, | |||||
| const uint8_t *lum, | |||||
| int size) | |||||
| const uint8_t *lum_m4, const uint8_t *lum_m3, | |||||
| const uint8_t *lum_m2, const uint8_t *lum_m1, | |||||
| const uint8_t *lum, | |||||
| int size) | |||||
| { | { | ||||
| #ifndef HAVE_MMX | #ifndef HAVE_MMX | ||||
| uint8_t *cm = cropTbl + MAX_NEG_CROP; | uint8_t *cm = cropTbl + MAX_NEG_CROP; | ||||
| @@ -2421,7 +2421,7 @@ static void deinterlace_bottom_field(uint8_t *dst, int dst_wrap, | |||||
| } | } | ||||
| static void deinterlace_bottom_field_inplace(uint8_t *src1, int src_wrap, | static void deinterlace_bottom_field_inplace(uint8_t *src1, int src_wrap, | ||||
| int width, int height) | |||||
| int width, int height) | |||||
| { | { | ||||
| uint8_t *src_m1, *src_0, *src_p1, *src_p2; | uint8_t *src_m1, *src_0, *src_p1, *src_p2; | ||||
| int y; | int y; | ||||
| @@ -2455,7 +2455,7 @@ int avpicture_deinterlace(AVPicture *dst, const AVPicture *src, | |||||
| if (pix_fmt != PIX_FMT_YUV420P && | if (pix_fmt != PIX_FMT_YUV420P && | ||||
| pix_fmt != PIX_FMT_YUV422P && | pix_fmt != PIX_FMT_YUV422P && | ||||
| pix_fmt != PIX_FMT_YUV444P && | pix_fmt != PIX_FMT_YUV444P && | ||||
| pix_fmt != PIX_FMT_YUV411P) | |||||
| pix_fmt != PIX_FMT_YUV411P) | |||||
| return -1; | return -1; | ||||
| if ((width & 3) != 0 || (height & 3) != 0) | if ((width & 3) != 0 || (height & 3) != 0) | ||||
| return -1; | return -1; | ||||
| @@ -821,7 +821,7 @@ static void glue(RGB_NAME, _to_pal8)(AVPicture *dst, const AVPicture *src, | |||||
| #ifdef RGBA_IN | #ifdef RGBA_IN | ||||
| static int glue(get_alpha_info_, RGB_NAME)(const AVPicture *src, | static int glue(get_alpha_info_, RGB_NAME)(const AVPicture *src, | ||||
| int width, int height) | |||||
| int width, int height) | |||||
| { | { | ||||
| const unsigned char *p; | const unsigned char *p; | ||||
| int src_wrap, ret, x, y; | int src_wrap, ret, x, y; | ||||
| @@ -64,8 +64,8 @@ static inline int get_phase(int pos) | |||||
| /* This function must be optimized */ | /* This function must be optimized */ | ||||
| static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src, | static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src, | ||||
| int src_width, int src_start, int src_incr, | |||||
| int16_t *filters) | |||||
| int src_width, int src_start, int src_incr, | |||||
| int16_t *filters) | |||||
| { | { | ||||
| int src_pos, phase, sum, i; | int src_pos, phase, sum, i; | ||||
| const uint8_t *s; | const uint8_t *s; | ||||
| @@ -108,7 +108,7 @@ static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src, | |||||
| /* This function must be optimized */ | /* This function must be optimized */ | ||||
| static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src, | static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src, | ||||
| int wrap, int16_t *filter) | |||||
| int wrap, int16_t *filter) | |||||
| { | { | ||||
| int sum, i; | int sum, i; | ||||
| const uint8_t *s; | const uint8_t *s; | ||||
| @@ -167,7 +167,7 @@ static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src, | |||||
| /* XXX: do four pixels at a time */ | /* XXX: do four pixels at a time */ | ||||
| static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, | static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, | ||||
| const uint8_t *src, int src_width, | |||||
| const uint8_t *src, int src_width, | |||||
| int src_start, int src_incr, int16_t *filters) | int src_start, int src_incr, int16_t *filters) | ||||
| { | { | ||||
| int src_pos, phase; | int src_pos, phase; | ||||
| @@ -212,7 +212,7 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, | |||||
| } | } | ||||
| static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, | static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, | ||||
| int wrap, int16_t *filter) | |||||
| int wrap, int16_t *filter) | |||||
| { | { | ||||
| int sum, i, v; | int sum, i, v; | ||||
| const uint8_t *s; | const uint8_t *s; | ||||
| @@ -277,18 +277,18 @@ static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, | |||||
| #endif | #endif | ||||
| #ifdef HAVE_ALTIVEC | #ifdef HAVE_ALTIVEC | ||||
| typedef union { | |||||
| typedef union { | |||||
| vector unsigned char v; | vector unsigned char v; | ||||
| unsigned char c[16]; | unsigned char c[16]; | ||||
| } vec_uc_t; | } vec_uc_t; | ||||
| typedef union { | |||||
| typedef union { | |||||
| vector signed short v; | vector signed short v; | ||||
| signed short s[8]; | signed short s[8]; | ||||
| } vec_ss_t; | } vec_ss_t; | ||||
| void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, | void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, | ||||
| int wrap, int16_t *filter) | |||||
| int wrap, int16_t *filter) | |||||
| { | { | ||||
| int sum, i; | int sum, i; | ||||
| const uint8_t *s; | const uint8_t *s; | ||||
| @@ -405,7 +405,7 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, | |||||
| /* slow version to handle limit cases. Does not need optimisation */ | /* slow version to handle limit cases. Does not need optimisation */ | ||||
| static void h_resample_slow(uint8_t *dst, int dst_width, | static void h_resample_slow(uint8_t *dst, int dst_width, | ||||
| const uint8_t *src, int src_width, | |||||
| const uint8_t *src, int src_width, | |||||
| int src_start, int src_incr, int16_t *filters) | int src_start, int src_incr, int16_t *filters) | ||||
| { | { | ||||
| int src_pos, phase, sum, j, v, i; | int src_pos, phase, sum, j, v, i; | ||||
| @@ -441,8 +441,8 @@ static void h_resample_slow(uint8_t *dst, int dst_width, | |||||
| } | } | ||||
| static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src, | static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src, | ||||
| int src_width, int src_start, int src_incr, | |||||
| int16_t *filters) | |||||
| int src_width, int src_start, int src_incr, | |||||
| int16_t *filters) | |||||
| { | { | ||||
| int n, src_end; | int n, src_end; | ||||
| @@ -559,7 +559,7 @@ ImgReSampleContext *img_resample_full_init(int owidth, int oheight, | |||||
| ImgReSampleContext *s; | ImgReSampleContext *s; | ||||
| if (!owidth || !oheight || !iwidth || !iheight) | if (!owidth || !oheight || !iwidth || !iheight) | ||||
| return NULL; | |||||
| return NULL; | |||||
| s = av_mallocz(sizeof(ImgReSampleContext)); | s = av_mallocz(sizeof(ImgReSampleContext)); | ||||
| if (!s) | if (!s) | ||||
| @@ -70,13 +70,13 @@ static void build_modpred(Indeo3DecodeContext *s) | |||||
| for (i=0; i < 128; ++i) { | for (i=0; i < 128; ++i) { | ||||
| s->ModPred[i+0*128] = (i > 126) ? 254 : 2*((i + 1) - ((i + 1) % 2)); | s->ModPred[i+0*128] = (i > 126) ? 254 : 2*((i + 1) - ((i + 1) % 2)); | ||||
| s->ModPred[i+1*128] = (i == 7) ? 20 : ((i == 119 || i == 120) | s->ModPred[i+1*128] = (i == 7) ? 20 : ((i == 119 || i == 120) | ||||
| ? 236 : 2*((i + 2) - ((i + 1) % 3))); | |||||
| ? 236 : 2*((i + 2) - ((i + 1) % 3))); | |||||
| s->ModPred[i+2*128] = (i > 125) ? 248 : 2*((i + 2) - ((i + 2) % 4)); | s->ModPred[i+2*128] = (i > 125) ? 248 : 2*((i + 2) - ((i + 2) % 4)); | ||||
| s->ModPred[i+3*128] = 2*((i + 1) - ((i - 3) % 5)); | |||||
| s->ModPred[i+3*128] = 2*((i + 1) - ((i - 3) % 5)); | |||||
| s->ModPred[i+4*128] = (i == 8) ? 20 : 2*((i + 1) - ((i - 3) % 6)); | s->ModPred[i+4*128] = (i == 8) ? 20 : 2*((i + 1) - ((i - 3) % 6)); | ||||
| s->ModPred[i+5*128] = 2*((i + 4) - ((i + 3) % 7)); | |||||
| s->ModPred[i+5*128] = 2*((i + 4) - ((i + 3) % 7)); | |||||
| s->ModPred[i+6*128] = (i > 123) ? 240 : 2*((i + 4) - ((i + 4) % 8)); | s->ModPred[i+6*128] = (i > 123) ? 240 : 2*((i + 4) - ((i + 4) % 8)); | ||||
| s->ModPred[i+7*128] = 2*((i + 5) - ((i + 4) % 9)); | |||||
| s->ModPred[i+7*128] = 2*((i + 5) - ((i + 4) % 9)); | |||||
| } | } | ||||
| s->corrector_type = (unsigned short *) av_malloc (24 * 256 * sizeof(unsigned short)); | s->corrector_type = (unsigned short *) av_malloc (24 * 256 * sizeof(unsigned short)); | ||||
| @@ -84,8 +84,8 @@ static void build_modpred(Indeo3DecodeContext *s) | |||||
| for (i=0; i < 24; ++i) { | for (i=0; i < 24; ++i) { | ||||
| for (j=0; j < 256; ++j) { | for (j=0; j < 256; ++j) { | ||||
| s->corrector_type[i*256+j] = (j < corrector_type_0[i]) | s->corrector_type[i*256+j] = (j < corrector_type_0[i]) | ||||
| ? 1 : ((j < 248 || (i == 16 && j == 248)) | |||||
| ? 0 : corrector_type_2[j - 248]); | |||||
| ? 1 : ((j < 248 || (i == 16 && j == 248)) | |||||
| ? 0 : corrector_type_2[j - 248]); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -83,10 +83,10 @@ | |||||
| */ | */ | ||||
| #if CONST_BITS == 8 | #if CONST_BITS == 8 | ||||
| #define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */ | |||||
| #define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */ | |||||
| #define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */ | |||||
| #define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */ | |||||
| #define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */ | |||||
| #define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */ | |||||
| #define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */ | |||||
| #define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */ | |||||
| #else | #else | ||||
| #define FIX_0_382683433 FIX(0.382683433) | #define FIX_0_382683433 FIX(0.382683433) | ||||
| #define FIX_0_541196100 FIX(0.541196100) | #define FIX_0_541196100 FIX(0.541196100) | ||||
| @@ -135,7 +135,7 @@ static always_inline void row_fdct(DCTELEM * data){ | |||||
| /* Even part */ | /* Even part */ | ||||
| tmp10 = tmp0 + tmp3; /* phase 2 */ | |||||
| tmp10 = tmp0 + tmp3; /* phase 2 */ | |||||
| tmp13 = tmp0 - tmp3; | tmp13 = tmp0 - tmp3; | ||||
| tmp11 = tmp1 + tmp2; | tmp11 = tmp1 + tmp2; | ||||
| tmp12 = tmp1 - tmp2; | tmp12 = tmp1 - tmp2; | ||||
| @@ -144,30 +144,30 @@ static always_inline void row_fdct(DCTELEM * data){ | |||||
| dataptr[4] = tmp10 - tmp11; | dataptr[4] = tmp10 - tmp11; | ||||
| z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ | z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ | ||||
| dataptr[2] = tmp13 + z1; /* phase 5 */ | |||||
| dataptr[2] = tmp13 + z1; /* phase 5 */ | |||||
| dataptr[6] = tmp13 - z1; | dataptr[6] = tmp13 - z1; | ||||
| /* Odd part */ | /* Odd part */ | ||||
| tmp10 = tmp4 + tmp5; /* phase 2 */ | |||||
| tmp10 = tmp4 + tmp5; /* phase 2 */ | |||||
| tmp11 = tmp5 + tmp6; | tmp11 = tmp5 + tmp6; | ||||
| tmp12 = tmp6 + tmp7; | tmp12 = tmp6 + tmp7; | ||||
| /* The rotator is modified from fig 4-8 to avoid extra negations. */ | /* The rotator is modified from fig 4-8 to avoid extra negations. */ | ||||
| z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ | z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ | ||||
| z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ | |||||
| z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ | |||||
| z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ | |||||
| z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ | |||||
| z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ | |||||
| z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ | |||||
| z11 = tmp7 + z3; /* phase 5 */ | |||||
| z11 = tmp7 + z3; /* phase 5 */ | |||||
| z13 = tmp7 - z3; | z13 = tmp7 - z3; | ||||
| dataptr[5] = z13 + z2; /* phase 6 */ | |||||
| dataptr[5] = z13 + z2; /* phase 6 */ | |||||
| dataptr[3] = z13 - z2; | dataptr[3] = z13 - z2; | ||||
| dataptr[1] = z11 + z4; | dataptr[1] = z11 + z4; | ||||
| dataptr[7] = z11 - z4; | dataptr[7] = z11 - z4; | ||||
| dataptr += DCTSIZE; /* advance pointer to next row */ | |||||
| dataptr += DCTSIZE; /* advance pointer to next row */ | |||||
| } | } | ||||
| } | } | ||||
| @@ -202,7 +202,7 @@ fdct_ifast (DCTELEM * data) | |||||
| /* Even part */ | /* Even part */ | ||||
| tmp10 = tmp0 + tmp3; /* phase 2 */ | |||||
| tmp10 = tmp0 + tmp3; /* phase 2 */ | |||||
| tmp13 = tmp0 - tmp3; | tmp13 = tmp0 - tmp3; | ||||
| tmp11 = tmp1 + tmp2; | tmp11 = tmp1 + tmp2; | ||||
| tmp12 = tmp1 - tmp2; | tmp12 = tmp1 - tmp2; | ||||
| @@ -216,7 +216,7 @@ fdct_ifast (DCTELEM * data) | |||||
| /* Odd part */ | /* Odd part */ | ||||
| tmp10 = tmp4 + tmp5; /* phase 2 */ | |||||
| tmp10 = tmp4 + tmp5; /* phase 2 */ | |||||
| tmp11 = tmp5 + tmp6; | tmp11 = tmp5 + tmp6; | ||||
| tmp12 = tmp6 + tmp7; | tmp12 = tmp6 + tmp7; | ||||
| @@ -226,7 +226,7 @@ fdct_ifast (DCTELEM * data) | |||||
| z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ | z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ | ||||
| z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ | z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ | ||||
| z11 = tmp7 + z3; /* phase 5 */ | |||||
| z11 = tmp7 + z3; /* phase 5 */ | |||||
| z13 = tmp7 - z3; | z13 = tmp7 - z3; | ||||
| dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ | dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ | ||||
| @@ -234,7 +234,7 @@ fdct_ifast (DCTELEM * data) | |||||
| dataptr[DCTSIZE*1] = z11 + z4; | dataptr[DCTSIZE*1] = z11 + z4; | ||||
| dataptr[DCTSIZE*7] = z11 - z4; | dataptr[DCTSIZE*7] = z11 - z4; | ||||
| dataptr++; /* advance pointer to next column */ | |||||
| dataptr++; /* advance pointer to next column */ | |||||
| } | } | ||||
| } | } | ||||
| @@ -293,7 +293,7 @@ fdct_ifast248 (DCTELEM * data) | |||||
| dataptr[DCTSIZE*3] = tmp13 + z1; | dataptr[DCTSIZE*3] = tmp13 + z1; | ||||
| dataptr[DCTSIZE*7] = tmp13 - z1; | dataptr[DCTSIZE*7] = tmp13 - z1; | ||||
| dataptr++; /* advance pointer to next column */ | |||||
| dataptr++; /* advance pointer to next column */ | |||||
| } | } | ||||
| } | } | ||||
| @@ -92,10 +92,10 @@ | |||||
| #if BITS_IN_JSAMPLE == 8 | #if BITS_IN_JSAMPLE == 8 | ||||
| #define CONST_BITS 13 | #define CONST_BITS 13 | ||||
| #define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */ | |||||
| #define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */ | |||||
| #else | #else | ||||
| #define CONST_BITS 13 | #define CONST_BITS 13 | ||||
| #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ | |||||
| #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ | |||||
| #endif | #endif | ||||
| /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus | /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus | ||||
| @@ -106,18 +106,18 @@ | |||||
| */ | */ | ||||
| #if CONST_BITS == 13 | #if CONST_BITS == 13 | ||||
| #define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */ | |||||
| #define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */ | |||||
| #define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */ | |||||
| #define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */ | |||||
| #define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */ | |||||
| #define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */ | |||||
| #define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */ | |||||
| #define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */ | |||||
| #define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */ | |||||
| #define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */ | |||||
| #define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */ | |||||
| #define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */ | |||||
| #define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */ | |||||
| #define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */ | |||||
| #define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */ | |||||
| #define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */ | |||||
| #define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */ | |||||
| #define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */ | |||||
| #define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */ | |||||
| #define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */ | |||||
| #define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */ | |||||
| #define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */ | |||||
| #define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */ | |||||
| #define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */ | |||||
| #else | #else | ||||
| #define FIX_0_298631336 FIX(0.298631336) | #define FIX_0_298631336 FIX(0.298631336) | ||||
| #define FIX_0_390180644 FIX(0.390180644) | #define FIX_0_390180644 FIX(0.390180644) | ||||
| @@ -185,9 +185,9 @@ static always_inline void row_fdct(DCTELEM * data){ | |||||
| z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | ||||
| dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | ||||
| CONST_BITS-PASS1_BITS); | |||||
| CONST_BITS-PASS1_BITS); | |||||
| dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), | dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), | ||||
| CONST_BITS-PASS1_BITS); | |||||
| CONST_BITS-PASS1_BITS); | |||||
| /* Odd part per figure 8 --- note paper omits factor of sqrt(2). | /* Odd part per figure 8 --- note paper omits factor of sqrt(2). | ||||
| * cK represents cos(K*pi/16). | * cK represents cos(K*pi/16). | ||||
| @@ -217,7 +217,7 @@ static always_inline void row_fdct(DCTELEM * data){ | |||||
| dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); | dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); | ||||
| dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); | dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); | ||||
| dataptr += DCTSIZE; /* advance pointer to next row */ | |||||
| dataptr += DCTSIZE; /* advance pointer to next row */ | |||||
| } | } | ||||
| } | } | ||||
| @@ -267,9 +267,9 @@ ff_jpeg_fdct_islow (DCTELEM * data) | |||||
| z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | ||||
| dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | ||||
| CONST_BITS+PASS1_BITS); | |||||
| CONST_BITS+PASS1_BITS); | |||||
| dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), | dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), | ||||
| CONST_BITS+PASS1_BITS); | |||||
| CONST_BITS+PASS1_BITS); | |||||
| /* Odd part per figure 8 --- note paper omits factor of sqrt(2). | /* Odd part per figure 8 --- note paper omits factor of sqrt(2). | ||||
| * cK represents cos(K*pi/16). | * cK represents cos(K*pi/16). | ||||
| @@ -295,15 +295,15 @@ ff_jpeg_fdct_islow (DCTELEM * data) | |||||
| z4 += z5; | z4 += z5; | ||||
| dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, | dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, | ||||
| CONST_BITS+PASS1_BITS); | |||||
| CONST_BITS+PASS1_BITS); | |||||
| dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, | dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, | ||||
| CONST_BITS+PASS1_BITS); | |||||
| CONST_BITS+PASS1_BITS); | |||||
| dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, | ||||
| CONST_BITS+PASS1_BITS); | |||||
| CONST_BITS+PASS1_BITS); | |||||
| dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, | dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, | ||||
| CONST_BITS+PASS1_BITS); | |||||
| CONST_BITS+PASS1_BITS); | |||||
| dataptr++; /* advance pointer to next column */ | |||||
| dataptr++; /* advance pointer to next column */ | |||||
| } | } | ||||
| } | } | ||||
| @@ -350,9 +350,9 @@ ff_fdct248_islow (DCTELEM * data) | |||||
| z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | ||||
| dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | ||||
| CONST_BITS+PASS1_BITS); | |||||
| CONST_BITS+PASS1_BITS); | |||||
| dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), | dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), | ||||
| CONST_BITS+PASS1_BITS); | |||||
| CONST_BITS+PASS1_BITS); | |||||
| tmp10 = tmp4 + tmp7; | tmp10 = tmp4 + tmp7; | ||||
| tmp11 = tmp5 + tmp6; | tmp11 = tmp5 + tmp6; | ||||
| @@ -364,10 +364,10 @@ ff_fdct248_islow (DCTELEM * data) | |||||
| z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | ||||
| dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | ||||
| CONST_BITS+PASS1_BITS); | |||||
| CONST_BITS+PASS1_BITS); | |||||
| dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), | dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), | ||||
| CONST_BITS+PASS1_BITS); | |||||
| CONST_BITS+PASS1_BITS); | |||||
| dataptr++; /* advance pointer to next column */ | |||||
| dataptr++; /* advance pointer to next column */ | |||||
| } | } | ||||
| } | } | ||||
| @@ -81,8 +81,8 @@ | |||||
| */ | */ | ||||
| typedef struct LclContext { | typedef struct LclContext { | ||||
| AVCodecContext *avctx; | |||||
| AVFrame pic; | |||||
| AVCodecContext *avctx; | |||||
| AVFrame pic; | |||||
| PutBitContext pb; | PutBitContext pb; | ||||
| // Image type | // Image type | ||||
| @@ -198,8 +198,8 @@ static unsigned int mszh_decomp(unsigned char * srcptr, int srclen, unsigned cha | |||||
| */ | */ | ||||
| static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size) | static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size) | ||||
| { | { | ||||
| LclContext * const c = (LclContext *)avctx->priv_data; | |||||
| unsigned char *encoded = (unsigned char *)buf; | |||||
| LclContext * const c = (LclContext *)avctx->priv_data; | |||||
| unsigned char *encoded = (unsigned char *)buf; | |||||
| unsigned int pixel_ptr; | unsigned int pixel_ptr; | ||||
| int row, col; | int row, col; | ||||
| unsigned char *outptr; | unsigned char *outptr; | ||||
| @@ -214,15 +214,15 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 | |||||
| #endif | #endif | ||||
| unsigned int len = buf_size; | unsigned int len = buf_size; | ||||
| if(c->pic.data[0]) | |||||
| avctx->release_buffer(avctx, &c->pic); | |||||
| if(c->pic.data[0]) | |||||
| avctx->release_buffer(avctx, &c->pic); | |||||
| c->pic.reference = 0; | |||||
| c->pic.buffer_hints = FF_BUFFER_HINTS_VALID; | |||||
| if(avctx->get_buffer(avctx, &c->pic) < 0){ | |||||
| av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |||||
| return -1; | |||||
| } | |||||
| c->pic.reference = 0; | |||||
| c->pic.buffer_hints = FF_BUFFER_HINTS_VALID; | |||||
| if(avctx->get_buffer(avctx, &c->pic) < 0){ | |||||
| av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |||||
| return -1; | |||||
| } | |||||
| outptr = c->pic.data[0]; // Output image pointer | outptr = c->pic.data[0]; // Output image pointer | ||||
| @@ -358,7 +358,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 | |||||
| pixel_ptr = row * width * 3; | pixel_ptr = row * width * 3; | ||||
| yq = encoded[pixel_ptr++]; | yq = encoded[pixel_ptr++]; | ||||
| uqvq = encoded[pixel_ptr++]; | uqvq = encoded[pixel_ptr++]; | ||||
| uqvq+=(encoded[pixel_ptr++] << 8); | |||||
| uqvq+=(encoded[pixel_ptr++] << 8); | |||||
| for (col = 1; col < width; col++) { | for (col = 1; col < width; col++) { | ||||
| encoded[pixel_ptr] = yq -= encoded[pixel_ptr]; | encoded[pixel_ptr] = yq -= encoded[pixel_ptr]; | ||||
| uqvq -= (encoded[pixel_ptr+1] | (encoded[pixel_ptr+2]<<8)); | uqvq -= (encoded[pixel_ptr+1] | (encoded[pixel_ptr+2]<<8)); | ||||
| @@ -588,8 +588,8 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, | |||||
| c->zstream.avail_in = avctx->width*3; | c->zstream.avail_in = avctx->width*3; | ||||
| zret = deflate(&(c->zstream), Z_NO_FLUSH); | zret = deflate(&(c->zstream), Z_NO_FLUSH); | ||||
| if (zret != Z_OK) { | if (zret != Z_OK) { | ||||
| av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret); | |||||
| return -1; | |||||
| av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret); | |||||
| return -1; | |||||
| } | } | ||||
| } | } | ||||
| zret = deflate(&(c->zstream), Z_FINISH); | zret = deflate(&(c->zstream), Z_FINISH); | ||||
| @@ -714,7 +714,7 @@ static int decode_init(AVCodecContext *avctx) | |||||
| break; | break; | ||||
| default: | default: | ||||
| if ((c->compression < Z_NO_COMPRESSION) || (c->compression > Z_BEST_COMPRESSION)) { | if ((c->compression < Z_NO_COMPRESSION) || (c->compression > Z_BEST_COMPRESSION)) { | ||||
| av_log(avctx, AV_LOG_ERROR, "Unsupported compression level for ZLIB: (%d).\n", c->compression); | |||||
| av_log(avctx, AV_LOG_ERROR, "Unsupported compression level for ZLIB: (%d).\n", c->compression); | |||||
| return 1; | return 1; | ||||
| } | } | ||||
| av_log(avctx, AV_LOG_INFO, "Compression level for ZLIB: (%d).\n", c->compression); | av_log(avctx, AV_LOG_INFO, "Compression level for ZLIB: (%d).\n", c->compression); | ||||
| @@ -851,15 +851,15 @@ static int encode_init(AVCodecContext *avctx) | |||||
| */ | */ | ||||
| static int decode_end(AVCodecContext *avctx) | static int decode_end(AVCodecContext *avctx) | ||||
| { | { | ||||
| LclContext * const c = (LclContext *)avctx->priv_data; | |||||
| LclContext * const c = (LclContext *)avctx->priv_data; | |||||
| if (c->pic.data[0]) | |||||
| avctx->release_buffer(avctx, &c->pic); | |||||
| if (c->pic.data[0]) | |||||
| avctx->release_buffer(avctx, &c->pic); | |||||
| #ifdef CONFIG_ZLIB | #ifdef CONFIG_ZLIB | ||||
| inflateEnd(&(c->zstream)); | inflateEnd(&(c->zstream)); | ||||
| #endif | #endif | ||||
| return 0; | |||||
| return 0; | |||||
| } | } | ||||
| @@ -883,28 +883,28 @@ static int encode_end(AVCodecContext *avctx) | |||||
| } | } | ||||
| AVCodec mszh_decoder = { | AVCodec mszh_decoder = { | ||||
| "mszh", | |||||
| CODEC_TYPE_VIDEO, | |||||
| CODEC_ID_MSZH, | |||||
| sizeof(LclContext), | |||||
| decode_init, | |||||
| NULL, | |||||
| decode_end, | |||||
| decode_frame, | |||||
| CODEC_CAP_DR1, | |||||
| "mszh", | |||||
| CODEC_TYPE_VIDEO, | |||||
| CODEC_ID_MSZH, | |||||
| sizeof(LclContext), | |||||
| decode_init, | |||||
| NULL, | |||||
| decode_end, | |||||
| decode_frame, | |||||
| CODEC_CAP_DR1, | |||||
| }; | }; | ||||
| AVCodec zlib_decoder = { | AVCodec zlib_decoder = { | ||||
| "zlib", | |||||
| CODEC_TYPE_VIDEO, | |||||
| CODEC_ID_ZLIB, | |||||
| sizeof(LclContext), | |||||
| decode_init, | |||||
| NULL, | |||||
| decode_end, | |||||
| decode_frame, | |||||
| CODEC_CAP_DR1, | |||||
| "zlib", | |||||
| CODEC_TYPE_VIDEO, | |||||
| CODEC_ID_ZLIB, | |||||
| sizeof(LclContext), | |||||
| decode_init, | |||||
| NULL, | |||||
| decode_end, | |||||
| decode_frame, | |||||
| CODEC_CAP_DR1, | |||||
| }; | }; | ||||
| #ifdef CONFIG_ENCODERS | #ifdef CONFIG_ENCODERS | ||||
| @@ -42,7 +42,7 @@ void pp_postprocess(uint8_t * src[3], int srcStride[3], | |||||
| uint8_t * dst[3], int dstStride[3], | uint8_t * dst[3], int dstStride[3], | ||||
| int horizontalSize, int verticalSize, | int horizontalSize, int verticalSize, | ||||
| QP_STORE_T *QP_store, int QP_stride, | QP_STORE_T *QP_store, int QP_stride, | ||||
| pp_mode_t *mode, pp_context_t *ppContext, int pict_type); | |||||
| pp_mode_t *mode, pp_context_t *ppContext, int pict_type); | |||||
| /** | /** | ||||
| @@ -26,35 +26,35 @@ | |||||
| #endif | #endif | ||||
| #define ALTIVEC_TRANSPOSE_8x8_SHORT(src_a,src_b,src_c,src_d,src_e,src_f,src_g,src_h) \ | #define ALTIVEC_TRANSPOSE_8x8_SHORT(src_a,src_b,src_c,src_d,src_e,src_f,src_g,src_h) \ | ||||
| do { \ | |||||
| __typeof__(src_a) tempA1, tempB1, tempC1, tempD1; \ | |||||
| __typeof__(src_a) tempE1, tempF1, tempG1, tempH1; \ | |||||
| __typeof__(src_a) tempA2, tempB2, tempC2, tempD2; \ | |||||
| __typeof__(src_a) tempE2, tempF2, tempG2, tempH2; \ | |||||
| tempA1 = vec_mergeh (src_a, src_e); \ | |||||
| tempB1 = vec_mergel (src_a, src_e); \ | |||||
| tempC1 = vec_mergeh (src_b, src_f); \ | |||||
| tempD1 = vec_mergel (src_b, src_f); \ | |||||
| tempE1 = vec_mergeh (src_c, src_g); \ | |||||
| tempF1 = vec_mergel (src_c, src_g); \ | |||||
| tempG1 = vec_mergeh (src_d, src_h); \ | |||||
| tempH1 = vec_mergel (src_d, src_h); \ | |||||
| tempA2 = vec_mergeh (tempA1, tempE1); \ | |||||
| tempB2 = vec_mergel (tempA1, tempE1); \ | |||||
| tempC2 = vec_mergeh (tempB1, tempF1); \ | |||||
| tempD2 = vec_mergel (tempB1, tempF1); \ | |||||
| tempE2 = vec_mergeh (tempC1, tempG1); \ | |||||
| tempF2 = vec_mergel (tempC1, tempG1); \ | |||||
| tempG2 = vec_mergeh (tempD1, tempH1); \ | |||||
| tempH2 = vec_mergel (tempD1, tempH1); \ | |||||
| src_a = vec_mergeh (tempA2, tempE2); \ | |||||
| src_b = vec_mergel (tempA2, tempE2); \ | |||||
| src_c = vec_mergeh (tempB2, tempF2); \ | |||||
| src_d = vec_mergel (tempB2, tempF2); \ | |||||
| src_e = vec_mergeh (tempC2, tempG2); \ | |||||
| src_f = vec_mergel (tempC2, tempG2); \ | |||||
| src_g = vec_mergeh (tempD2, tempH2); \ | |||||
| src_h = vec_mergel (tempD2, tempH2); \ | |||||
| do { \ | |||||
| __typeof__(src_a) tempA1, tempB1, tempC1, tempD1; \ | |||||
| __typeof__(src_a) tempE1, tempF1, tempG1, tempH1; \ | |||||
| __typeof__(src_a) tempA2, tempB2, tempC2, tempD2; \ | |||||
| __typeof__(src_a) tempE2, tempF2, tempG2, tempH2; \ | |||||
| tempA1 = vec_mergeh (src_a, src_e); \ | |||||
| tempB1 = vec_mergel (src_a, src_e); \ | |||||
| tempC1 = vec_mergeh (src_b, src_f); \ | |||||
| tempD1 = vec_mergel (src_b, src_f); \ | |||||
| tempE1 = vec_mergeh (src_c, src_g); \ | |||||
| tempF1 = vec_mergel (src_c, src_g); \ | |||||
| tempG1 = vec_mergeh (src_d, src_h); \ | |||||
| tempH1 = vec_mergel (src_d, src_h); \ | |||||
| tempA2 = vec_mergeh (tempA1, tempE1); \ | |||||
| tempB2 = vec_mergel (tempA1, tempE1); \ | |||||
| tempC2 = vec_mergeh (tempB1, tempF1); \ | |||||
| tempD2 = vec_mergel (tempB1, tempF1); \ | |||||
| tempE2 = vec_mergeh (tempC1, tempG1); \ | |||||
| tempF2 = vec_mergel (tempC1, tempG1); \ | |||||
| tempG2 = vec_mergeh (tempD1, tempH1); \ | |||||
| tempH2 = vec_mergel (tempD1, tempH1); \ | |||||
| src_a = vec_mergeh (tempA2, tempE2); \ | |||||
| src_b = vec_mergel (tempA2, tempE2); \ | |||||
| src_c = vec_mergeh (tempB2, tempF2); \ | |||||
| src_d = vec_mergel (tempB2, tempF2); \ | |||||
| src_e = vec_mergeh (tempC2, tempG2); \ | |||||
| src_f = vec_mergel (tempC2, tempG2); \ | |||||
| src_g = vec_mergeh (tempD2, tempH2); \ | |||||
| src_h = vec_mergel (tempD2, tempH2); \ | |||||
| } while (0) | } while (0) | ||||
| @@ -94,25 +94,25 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) | |||||
| vector signed short v_srcAss0, v_srcAss1, v_srcAss2, v_srcAss3, v_srcAss4, v_srcAss5, v_srcAss6, v_srcAss7; | vector signed short v_srcAss0, v_srcAss1, v_srcAss2, v_srcAss3, v_srcAss4, v_srcAss5, v_srcAss6, v_srcAss7; | ||||
| #define LOAD_LINE(i) \ | |||||
| register int j##i = i * stride; \ | |||||
| vector unsigned char perm##i = vec_lvsl(j##i, src2); \ | |||||
| const vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \ | |||||
| vector unsigned char v_srcA2##i; \ | |||||
| if (two_vectors) \ | |||||
| v_srcA2##i = vec_ld(j##i + 16, src2); \ | |||||
| const vector unsigned char v_srcA##i = \ | |||||
| vec_perm(v_srcA1##i, v_srcA2##i, perm##i); \ | |||||
| #define LOAD_LINE(i) \ | |||||
| register int j##i = i * stride; \ | |||||
| vector unsigned char perm##i = vec_lvsl(j##i, src2); \ | |||||
| const vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \ | |||||
| vector unsigned char v_srcA2##i; \ | |||||
| if (two_vectors) \ | |||||
| v_srcA2##i = vec_ld(j##i + 16, src2); \ | |||||
| const vector unsigned char v_srcA##i = \ | |||||
| vec_perm(v_srcA1##i, v_srcA2##i, perm##i); \ | |||||
| v_srcAss##i = \ | v_srcAss##i = \ | ||||
| (vector signed short)vec_mergeh((vector signed char)zero, \ | |||||
| (vector signed char)v_srcA##i) | |||||
| (vector signed short)vec_mergeh((vector signed char)zero, \ | |||||
| (vector signed char)v_srcA##i) | |||||
| #define LOAD_LINE_ALIGNED(i) \ | #define LOAD_LINE_ALIGNED(i) \ | ||||
| register int j##i = i * stride; \ | register int j##i = i * stride; \ | ||||
| const vector unsigned char v_srcA##i = vec_ld(j##i, src2); \ | const vector unsigned char v_srcA##i = vec_ld(j##i, src2); \ | ||||
| v_srcAss##i = \ | v_srcAss##i = \ | ||||
| (vector signed short)vec_mergeh((vector signed char)zero, \ | |||||
| (vector signed char)v_srcA##i) | |||||
| (vector signed short)vec_mergeh((vector signed char)zero, \ | |||||
| (vector signed char)v_srcA##i) | |||||
| // special casing the aligned case is worthwhile, as all call from | // special casing the aligned case is worthwhile, as all call from | ||||
| // the (transposed) horizontable deblocks will be aligned, i naddition | // the (transposed) horizontable deblocks will be aligned, i naddition | ||||
| @@ -139,15 +139,15 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) | |||||
| #undef LOAD_LINE | #undef LOAD_LINE | ||||
| #undef LOAD_LINE_ALIGNED | #undef LOAD_LINE_ALIGNED | ||||
| #define ITER(i, j) \ | |||||
| const vector signed short v_diff##i = \ | |||||
| vec_sub(v_srcAss##i, v_srcAss##j); \ | |||||
| const vector signed short v_sum##i = \ | |||||
| vec_add(v_diff##i, v_dcOffset); \ | |||||
| const vector signed short v_comp##i = \ | |||||
| (vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \ | |||||
| v_dcThreshold); \ | |||||
| const vector signed short v_part##i = vec_and(mask, v_comp##i); \ | |||||
| #define ITER(i, j) \ | |||||
| const vector signed short v_diff##i = \ | |||||
| vec_sub(v_srcAss##i, v_srcAss##j); \ | |||||
| const vector signed short v_sum##i = \ | |||||
| vec_add(v_diff##i, v_dcOffset); \ | |||||
| const vector signed short v_comp##i = \ | |||||
| (vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \ | |||||
| v_dcThreshold); \ | |||||
| const vector signed short v_part##i = vec_and(mask, v_comp##i); \ | |||||
| v_numEq = vec_sum4s(v_part##i, v_numEq); | v_numEq = vec_sum4s(v_part##i, v_numEq); | ||||
| ITER(0, 1); | ITER(0, 1); | ||||
| @@ -167,13 +167,13 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) | |||||
| if (numEq > c->ppMode.flatnessThreshold) | if (numEq > c->ppMode.flatnessThreshold) | ||||
| { | { | ||||
| const vector unsigned char mmoP1 = (const vector unsigned char) | const vector unsigned char mmoP1 = (const vector unsigned char) | ||||
| AVV(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, | |||||
| 0x00, 0x01, 0x12, 0x13, 0x08, 0x09, 0x1A, 0x1B); | |||||
| AVV(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, | |||||
| 0x00, 0x01, 0x12, 0x13, 0x08, 0x09, 0x1A, 0x1B); | |||||
| const vector unsigned char mmoP2 = (const vector unsigned char) | const vector unsigned char mmoP2 = (const vector unsigned char) | ||||
| AVV(0x04, 0x05, 0x16, 0x17, 0x0C, 0x0D, 0x1E, 0x1F, | |||||
| 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f); | |||||
| AVV(0x04, 0x05, 0x16, 0x17, 0x0C, 0x0D, 0x1E, 0x1F, | |||||
| 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f); | |||||
| const vector unsigned char mmoP = (const vector unsigned char) | const vector unsigned char mmoP = (const vector unsigned char) | ||||
| vec_lvsl(8, (unsigned char*)0); | |||||
| vec_lvsl(8, (unsigned char*)0); | |||||
| vector signed short mmoL1 = vec_perm(v_srcAss0, v_srcAss2, mmoP1); | vector signed short mmoL1 = vec_perm(v_srcAss0, v_srcAss2, mmoP1); | ||||
| vector signed short mmoL2 = vec_perm(v_srcAss4, v_srcAss6, mmoP2); | vector signed short mmoL2 = vec_perm(v_srcAss4, v_srcAss6, mmoP2); | ||||
| @@ -185,9 +185,9 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) | |||||
| vector unsigned short mmoSum = (vector unsigned short)vec_add(mmoDiff, v2QP); | vector unsigned short mmoSum = (vector unsigned short)vec_add(mmoDiff, v2QP); | ||||
| if (vec_any_gt(mmoSum, v4QP)) | if (vec_any_gt(mmoSum, v4QP)) | ||||
| return 0; | |||||
| return 0; | |||||
| else | else | ||||
| return 1; | |||||
| return 1; | |||||
| } | } | ||||
| else return 2; | else return 2; | ||||
| } | } | ||||
| @@ -218,21 +218,21 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) | |||||
| vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9; | vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9; | ||||
| #define LOAD_LINE(i) \ | #define LOAD_LINE(i) \ | ||||
| const vector unsigned char perml##i = \ | |||||
| vec_lvsl(i * stride, src2); \ | |||||
| const vector unsigned char perml##i = \ | |||||
| vec_lvsl(i * stride, src2); \ | |||||
| vbA##i = vec_ld(i * stride, src2); \ | vbA##i = vec_ld(i * stride, src2); \ | ||||
| vbB##i = vec_ld(i * stride + 16, src2); \ | vbB##i = vec_ld(i * stride + 16, src2); \ | ||||
| vbT##i = vec_perm(vbA##i, vbB##i, perml##i); \ | vbT##i = vec_perm(vbA##i, vbB##i, perml##i); \ | ||||
| vb##i = \ | vb##i = \ | ||||
| (vector signed short)vec_mergeh((vector unsigned char)zero, \ | |||||
| (vector unsigned char)vbT##i) | |||||
| (vector signed short)vec_mergeh((vector unsigned char)zero, \ | |||||
| (vector unsigned char)vbT##i) | |||||
| #define LOAD_LINE_ALIGNED(i) \ | #define LOAD_LINE_ALIGNED(i) \ | ||||
| register int j##i = i * stride; \ | register int j##i = i * stride; \ | ||||
| vbT##i = vec_ld(j##i, src2); \ | vbT##i = vec_ld(j##i, src2); \ | ||||
| vb##i = \ | vb##i = \ | ||||
| (vector signed short)vec_mergeh((vector signed char)zero, \ | |||||
| (vector signed char)vbT##i) | |||||
| (vector signed short)vec_mergeh((vector signed char)zero, \ | |||||
| (vector signed char)vbT##i) | |||||
| // special casing the aligned case is worthwhile, as all call from | // special casing the aligned case is worthwhile, as all call from | ||||
| // the (transposed) horizontable deblocks will be aligned, in addition | // the (transposed) horizontable deblocks will be aligned, in addition | ||||
| @@ -308,11 +308,11 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) | |||||
| const vector signed short temp91 = vec_sub(v_sumsB8, vb5); | const vector signed short temp91 = vec_sub(v_sumsB8, vb5); | ||||
| const vector signed short v_sumsB9 = vec_add(temp91, v_last); | const vector signed short v_sumsB9 = vec_add(temp91, v_last); | ||||
| #define COMPUTE_VR(i, j, k) \ | |||||
| const vector signed short temps1##i = \ | |||||
| vec_add(v_sumsB##i, v_sumsB##k); \ | |||||
| const vector signed short temps2##i = \ | |||||
| vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \ | |||||
| #define COMPUTE_VR(i, j, k) \ | |||||
| const vector signed short temps1##i = \ | |||||
| vec_add(v_sumsB##i, v_sumsB##k); \ | |||||
| const vector signed short temps2##i = \ | |||||
| vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \ | |||||
| const vector signed short vr##j = vec_sra(temps2##i, v_4) | const vector signed short vr##j = vec_sra(temps2##i, v_4) | ||||
| COMPUTE_VR(0, 1, 2); | COMPUTE_VR(0, 1, 2); | ||||
| @@ -326,31 +326,31 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) | |||||
| const vector signed char neg1 = vec_splat_s8(-1); | const vector signed char neg1 = vec_splat_s8(-1); | ||||
| const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | ||||
| 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | |||||
| #define PACK_AND_STORE(i) \ | |||||
| const vector unsigned char perms##i = \ | |||||
| vec_lvsr(i * stride, src2); \ | |||||
| const vector unsigned char vf##i = \ | |||||
| vec_packsu(vr##i, (vector signed short)zero); \ | |||||
| const vector unsigned char vg##i = \ | |||||
| vec_perm(vf##i, vbT##i, permHH); \ | |||||
| const vector unsigned char mask##i = \ | |||||
| vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \ | |||||
| const vector unsigned char vg2##i = \ | |||||
| vec_perm(vg##i, vg##i, perms##i); \ | |||||
| const vector unsigned char svA##i = \ | |||||
| vec_sel(vbA##i, vg2##i, mask##i); \ | |||||
| const vector unsigned char svB##i = \ | |||||
| vec_sel(vg2##i, vbB##i, mask##i); \ | |||||
| vec_st(svA##i, i * stride, src2); \ | |||||
| 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | |||||
| #define PACK_AND_STORE(i) \ | |||||
| const vector unsigned char perms##i = \ | |||||
| vec_lvsr(i * stride, src2); \ | |||||
| const vector unsigned char vf##i = \ | |||||
| vec_packsu(vr##i, (vector signed short)zero); \ | |||||
| const vector unsigned char vg##i = \ | |||||
| vec_perm(vf##i, vbT##i, permHH); \ | |||||
| const vector unsigned char mask##i = \ | |||||
| vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \ | |||||
| const vector unsigned char vg2##i = \ | |||||
| vec_perm(vg##i, vg##i, perms##i); \ | |||||
| const vector unsigned char svA##i = \ | |||||
| vec_sel(vbA##i, vg2##i, mask##i); \ | |||||
| const vector unsigned char svB##i = \ | |||||
| vec_sel(vg2##i, vbB##i, mask##i); \ | |||||
| vec_st(svA##i, i * stride, src2); \ | |||||
| vec_st(svB##i, i * stride + 16, src2) | vec_st(svB##i, i * stride + 16, src2) | ||||
| #define PACK_AND_STORE_ALIGNED(i) \ | |||||
| const vector unsigned char vf##i = \ | |||||
| vec_packsu(vr##i, (vector signed short)zero); \ | |||||
| const vector unsigned char vg##i = \ | |||||
| vec_perm(vf##i, vbT##i, permHH); \ | |||||
| #define PACK_AND_STORE_ALIGNED(i) \ | |||||
| const vector unsigned char vf##i = \ | |||||
| vec_packsu(vr##i, (vector signed short)zero); \ | |||||
| const vector unsigned char vg##i = \ | |||||
| vec_perm(vf##i, vbT##i, permHH); \ | |||||
| vec_st(vg##i, i * stride, src2) | vec_st(vg##i, i * stride, src2) | ||||
| // special casing the aligned case is worthwhile, as all call from | // special casing the aligned case is worthwhile, as all call from | ||||
| @@ -398,17 +398,17 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext | |||||
| vqp = vec_splat(vqp, 0); | vqp = vec_splat(vqp, 0); | ||||
| #define LOAD_LINE(i) \ | #define LOAD_LINE(i) \ | ||||
| const vector unsigned char perm##i = \ | |||||
| vec_lvsl(i * stride, src2); \ | |||||
| const vector unsigned char vbA##i = \ | |||||
| vec_ld(i * stride, src2); \ | |||||
| const vector unsigned char vbB##i = \ | |||||
| vec_ld(i * stride + 16, src2); \ | |||||
| const vector unsigned char vbT##i = \ | |||||
| vec_perm(vbA##i, vbB##i, perm##i); \ | |||||
| const vector signed short vb##i = \ | |||||
| (vector signed short)vec_mergeh((vector unsigned char)zero, \ | |||||
| (vector unsigned char)vbT##i) | |||||
| const vector unsigned char perm##i = \ | |||||
| vec_lvsl(i * stride, src2); \ | |||||
| const vector unsigned char vbA##i = \ | |||||
| vec_ld(i * stride, src2); \ | |||||
| const vector unsigned char vbB##i = \ | |||||
| vec_ld(i * stride + 16, src2); \ | |||||
| const vector unsigned char vbT##i = \ | |||||
| vec_perm(vbA##i, vbB##i, perm##i); \ | |||||
| const vector signed short vb##i = \ | |||||
| (vector signed short)vec_mergeh((vector unsigned char)zero, \ | |||||
| (vector unsigned char)vbT##i) | |||||
| src2 += stride*3; | src2 += stride*3; | ||||
| @@ -426,7 +426,7 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext | |||||
| const vector signed short v_2 = vec_splat_s16(2); | const vector signed short v_2 = vec_splat_s16(2); | ||||
| const vector signed short v_5 = vec_splat_s16(5); | const vector signed short v_5 = vec_splat_s16(5); | ||||
| const vector signed short v_32 = vec_sl(v_1, | const vector signed short v_32 = vec_sl(v_1, | ||||
| (vector unsigned short)v_5); | |||||
| (vector unsigned short)v_5); | |||||
| /* middle energy */ | /* middle energy */ | ||||
| const vector signed short l3minusl6 = vec_sub(vb3, vb6); | const vector signed short l3minusl6 = vec_sub(vb3, vb6); | ||||
| const vector signed short l5minusl4 = vec_sub(vb5, vb4); | const vector signed short l5minusl4 = vec_sub(vb5, vb4); | ||||
| @@ -483,22 +483,22 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext | |||||
| const vector signed char neg1 = vec_splat_s8(-1); | const vector signed char neg1 = vec_splat_s8(-1); | ||||
| const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | ||||
| 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | |||||
| #define STORE(i) \ | |||||
| const vector unsigned char perms##i = \ | |||||
| vec_lvsr(i * stride, src2); \ | |||||
| const vector unsigned char vg##i = \ | |||||
| vec_perm(st##i, vbT##i, permHH); \ | |||||
| const vector unsigned char mask##i = \ | |||||
| vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \ | |||||
| const vector unsigned char vg2##i = \ | |||||
| vec_perm(vg##i, vg##i, perms##i); \ | |||||
| const vector unsigned char svA##i = \ | |||||
| vec_sel(vbA##i, vg2##i, mask##i); \ | |||||
| const vector unsigned char svB##i = \ | |||||
| vec_sel(vg2##i, vbB##i, mask##i); \ | |||||
| vec_st(svA##i, i * stride, src2); \ | |||||
| 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | |||||
| #define STORE(i) \ | |||||
| const vector unsigned char perms##i = \ | |||||
| vec_lvsr(i * stride, src2); \ | |||||
| const vector unsigned char vg##i = \ | |||||
| vec_perm(st##i, vbT##i, permHH); \ | |||||
| const vector unsigned char mask##i = \ | |||||
| vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \ | |||||
| const vector unsigned char vg2##i = \ | |||||
| vec_perm(vg##i, vg##i, perms##i); \ | |||||
| const vector unsigned char svA##i = \ | |||||
| vec_sel(vbA##i, vg2##i, mask##i); \ | |||||
| const vector unsigned char svB##i = \ | |||||
| vec_sel(vg2##i, vbB##i, mask##i); \ | |||||
| vec_st(svA##i, i * stride, src2); \ | |||||
| vec_st(svB##i, i * stride + 16, src2) | vec_st(svB##i, i * stride + 16, src2) | ||||
| STORE(4); | STORE(4); | ||||
| @@ -522,11 +522,11 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||||
| dt[0] = deringThreshold; | dt[0] = deringThreshold; | ||||
| v_dt = vec_splat(vec_ld(0, dt), 0); | v_dt = vec_splat(vec_ld(0, dt), 0); | ||||
| #define LOAD_LINE(i) \ | |||||
| const vector unsigned char perm##i = \ | |||||
| vec_lvsl(i * stride, srcCopy); \ | |||||
| vector unsigned char sA##i = vec_ld(i * stride, srcCopy); \ | |||||
| vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy); \ | |||||
| #define LOAD_LINE(i) \ | |||||
| const vector unsigned char perm##i = \ | |||||
| vec_lvsl(i * stride, srcCopy); \ | |||||
| vector unsigned char sA##i = vec_ld(i * stride, srcCopy); \ | |||||
| vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy); \ | |||||
| vector unsigned char src##i = vec_perm(sA##i, sB##i, perm##i) | vector unsigned char src##i = vec_perm(sA##i, sB##i, perm##i) | ||||
| LOAD_LINE(0); | LOAD_LINE(0); | ||||
| @@ -545,13 +545,13 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||||
| { | { | ||||
| const vector unsigned char trunc_perm = (vector unsigned char) | const vector unsigned char trunc_perm = (vector unsigned char) | ||||
| AVV(0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, | AVV(0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, | ||||
| 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18); | |||||
| 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18); | |||||
| const vector unsigned char trunc_src12 = vec_perm(src1, src2, trunc_perm); | const vector unsigned char trunc_src12 = vec_perm(src1, src2, trunc_perm); | ||||
| const vector unsigned char trunc_src34 = vec_perm(src3, src4, trunc_perm); | const vector unsigned char trunc_src34 = vec_perm(src3, src4, trunc_perm); | ||||
| const vector unsigned char trunc_src56 = vec_perm(src5, src6, trunc_perm); | const vector unsigned char trunc_src56 = vec_perm(src5, src6, trunc_perm); | ||||
| const vector unsigned char trunc_src78 = vec_perm(src7, src8, trunc_perm); | const vector unsigned char trunc_src78 = vec_perm(src7, src8, trunc_perm); | ||||
| #define EXTRACT(op) do { \ | |||||
| #define EXTRACT(op) do { \ | |||||
| const vector unsigned char s##op##_1 = vec_##op(trunc_src12, trunc_src34); \ | const vector unsigned char s##op##_1 = vec_##op(trunc_src12, trunc_src34); \ | ||||
| const vector unsigned char s##op##_2 = vec_##op(trunc_src56, trunc_src78); \ | const vector unsigned char s##op##_2 = vec_##op(trunc_src56, trunc_src78); \ | ||||
| const vector unsigned char s##op##_6 = vec_##op(s##op##_1, s##op##_2); \ | const vector unsigned char s##op##_6 = vec_##op(s##op##_1, s##op##_2); \ | ||||
| @@ -584,29 +584,29 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||||
| { | { | ||||
| const vector unsigned short mask1 = (vector unsigned short) | const vector unsigned short mask1 = (vector unsigned short) | ||||
| AVV(0x0001, 0x0002, 0x0004, 0x0008, | AVV(0x0001, 0x0002, 0x0004, 0x0008, | ||||
| 0x0010, 0x0020, 0x0040, 0x0080); | |||||
| 0x0010, 0x0020, 0x0040, 0x0080); | |||||
| const vector unsigned short mask2 = (vector unsigned short) | const vector unsigned short mask2 = (vector unsigned short) | ||||
| AVV(0x0100, 0x0200, 0x0000, 0x0000, | AVV(0x0100, 0x0200, 0x0000, 0x0000, | ||||
| 0x0000, 0x0000, 0x0000, 0x0000); | |||||
| 0x0000, 0x0000, 0x0000, 0x0000); | |||||
| const vector unsigned int vuint32_16 = vec_sl(vec_splat_u32(1), vec_splat_u32(4)); | const vector unsigned int vuint32_16 = vec_sl(vec_splat_u32(1), vec_splat_u32(4)); | ||||
| const vector unsigned int vuint32_1 = vec_splat_u32(1); | const vector unsigned int vuint32_1 = vec_splat_u32(1); | ||||
| #define COMPARE(i) \ | |||||
| vector signed int sum##i; \ | |||||
| do { \ | |||||
| const vector unsigned char cmp##i = \ | |||||
| (vector unsigned char)vec_cmpgt(src##i, v_avg); \ | |||||
| const vector unsigned short cmpHi##i = \ | |||||
| (vector unsigned short)vec_mergeh(cmp##i, cmp##i); \ | |||||
| const vector unsigned short cmpLi##i = \ | |||||
| (vector unsigned short)vec_mergel(cmp##i, cmp##i); \ | |||||
| const vector signed short cmpHf##i = \ | |||||
| (vector signed short)vec_and(cmpHi##i, mask1); \ | |||||
| const vector signed short cmpLf##i = \ | |||||
| (vector signed short)vec_and(cmpLi##i, mask2); \ | |||||
| const vector signed int sump##i = vec_sum4s(cmpHf##i, zero); \ | |||||
| const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i); \ | |||||
| #define COMPARE(i) \ | |||||
| vector signed int sum##i; \ | |||||
| do { \ | |||||
| const vector unsigned char cmp##i = \ | |||||
| (vector unsigned char)vec_cmpgt(src##i, v_avg); \ | |||||
| const vector unsigned short cmpHi##i = \ | |||||
| (vector unsigned short)vec_mergeh(cmp##i, cmp##i); \ | |||||
| const vector unsigned short cmpLi##i = \ | |||||
| (vector unsigned short)vec_mergel(cmp##i, cmp##i); \ | |||||
| const vector signed short cmpHf##i = \ | |||||
| (vector signed short)vec_and(cmpHi##i, mask1); \ | |||||
| const vector signed short cmpLf##i = \ | |||||
| (vector signed short)vec_and(cmpLi##i, mask2); \ | |||||
| const vector signed int sump##i = vec_sum4s(cmpHf##i, zero); \ | |||||
| const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i); \ | |||||
| sum##i = vec_sums(sumq##i, zero); } while (0) | sum##i = vec_sums(sumq##i, zero); } while (0) | ||||
| COMPARE(0); | COMPARE(0); | ||||
| @@ -643,11 +643,11 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||||
| const vector signed int t2B = vec_or(sumB, tB); | const vector signed int t2B = vec_or(sumB, tB); | ||||
| const vector signed int t2C = vec_or(sumC, tC); | const vector signed int t2C = vec_or(sumC, tC); | ||||
| const vector signed int t3A = vec_and(vec_sra(t2A, vuint32_1), | const vector signed int t3A = vec_and(vec_sra(t2A, vuint32_1), | ||||
| vec_sl(t2A, vuint32_1)); | |||||
| vec_sl(t2A, vuint32_1)); | |||||
| const vector signed int t3B = vec_and(vec_sra(t2B, vuint32_1), | const vector signed int t3B = vec_and(vec_sra(t2B, vuint32_1), | ||||
| vec_sl(t2B, vuint32_1)); | |||||
| vec_sl(t2B, vuint32_1)); | |||||
| const vector signed int t3C = vec_and(vec_sra(t2C, vuint32_1), | const vector signed int t3C = vec_and(vec_sra(t2C, vuint32_1), | ||||
| vec_sl(t2C, vuint32_1)); | |||||
| vec_sl(t2C, vuint32_1)); | |||||
| const vector signed int yA = vec_and(t2A, t3A); | const vector signed int yA = vec_and(t2A, t3A); | ||||
| const vector signed int yB = vec_and(t2B, t3B); | const vector signed int yB = vec_and(t2B, t3B); | ||||
| const vector signed int yC = vec_and(t2C, t3C); | const vector signed int yC = vec_and(t2C, t3C); | ||||
| @@ -659,15 +659,15 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||||
| const vector signed int sumBd4 = vec_perm(yB, yC, strangeperm1); | const vector signed int sumBd4 = vec_perm(yB, yC, strangeperm1); | ||||
| const vector signed int sumBd8 = vec_perm(yB, yC, strangeperm2); | const vector signed int sumBd8 = vec_perm(yB, yC, strangeperm2); | ||||
| const vector signed int sumAp = vec_and(yA, | const vector signed int sumAp = vec_and(yA, | ||||
| vec_and(sumAd4,sumAd8)); | |||||
| vec_and(sumAd4,sumAd8)); | |||||
| const vector signed int sumBp = vec_and(yB, | const vector signed int sumBp = vec_and(yB, | ||||
| vec_and(sumBd4,sumBd8)); | |||||
| vec_and(sumBd4,sumBd8)); | |||||
| sumA2 = vec_or(sumAp, | sumA2 = vec_or(sumAp, | ||||
| vec_sra(sumAp, | |||||
| vuint32_16)); | |||||
| vec_sra(sumAp, | |||||
| vuint32_16)); | |||||
| sumB2 = vec_or(sumBp, | sumB2 = vec_or(sumBp, | ||||
| vec_sra(sumBp, | |||||
| vuint32_16)); | |||||
| vec_sra(sumBp, | |||||
| vuint32_16)); | |||||
| } | } | ||||
| vec_st(sumA2, 0, S); | vec_st(sumA2, 0, S); | ||||
| vec_st(sumB2, 16, S); | vec_st(sumB2, 16, S); | ||||
| @@ -686,84 +686,84 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||||
| const vector unsigned char permA1 = (vector unsigned char) | const vector unsigned char permA1 = (vector unsigned char) | ||||
| AVV(0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F, | AVV(0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F, | ||||
| 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F); | |||||
| 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F); | |||||
| const vector unsigned char permA2 = (vector unsigned char) | const vector unsigned char permA2 = (vector unsigned char) | ||||
| AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11, | AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11, | ||||
| 0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F); | |||||
| 0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F); | |||||
| const vector unsigned char permA1inc = (vector unsigned char) | const vector unsigned char permA1inc = (vector unsigned char) | ||||
| AVV(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, | AVV(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, | ||||
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); | |||||
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); | |||||
| const vector unsigned char permA2inc = (vector unsigned char) | const vector unsigned char permA2inc = (vector unsigned char) | ||||
| AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, | AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, | ||||
| 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); | |||||
| 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); | |||||
| const vector unsigned char magic = (vector unsigned char) | const vector unsigned char magic = (vector unsigned char) | ||||
| AVV(0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02, | AVV(0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02, | ||||
| 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); | |||||
| 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); | |||||
| const vector unsigned char extractPerm = (vector unsigned char) | const vector unsigned char extractPerm = (vector unsigned char) | ||||
| AVV(0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01, | AVV(0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01, | ||||
| 0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01); | |||||
| 0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01); | |||||
| const vector unsigned char extractPermInc = (vector unsigned char) | const vector unsigned char extractPermInc = (vector unsigned char) | ||||
| AVV(0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, | AVV(0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, | ||||
| 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01); | |||||
| 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01); | |||||
| const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0); | const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0); | ||||
| const vector unsigned char tenRight = (vector unsigned char) | const vector unsigned char tenRight = (vector unsigned char) | ||||
| AVV(0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | AVV(0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||||
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); | |||||
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); | |||||
| const vector unsigned char eightLeft = (vector unsigned char) | const vector unsigned char eightLeft = (vector unsigned char) | ||||
| AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||||
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08); | |||||
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08); | |||||
| #define F_INIT(i) \ | |||||
| vector unsigned char tenRightM##i = tenRight; \ | |||||
| vector unsigned char permA1M##i = permA1; \ | |||||
| vector unsigned char permA2M##i = permA2; \ | |||||
| #define F_INIT(i) \ | |||||
| vector unsigned char tenRightM##i = tenRight; \ | |||||
| vector unsigned char permA1M##i = permA1; \ | |||||
| vector unsigned char permA2M##i = permA2; \ | |||||
| vector unsigned char extractPermM##i = extractPerm | vector unsigned char extractPermM##i = extractPerm | ||||
| #define F2(i, j, k, l) \ | |||||
| if (S[i] & (1 << (l+1))) { \ | |||||
| const vector unsigned char a_##j##_A##l = \ | |||||
| vec_perm(src##i, src##j, permA1M##i); \ | |||||
| const vector unsigned char a_##j##_B##l = \ | |||||
| vec_perm(a_##j##_A##l, src##k, permA2M##i); \ | |||||
| const vector signed int a_##j##_sump##l = \ | |||||
| (vector signed int)vec_msum(a_##j##_B##l, magic, \ | |||||
| (vector unsigned int)zero); \ | |||||
| vector signed int F_##j##_##l = \ | |||||
| vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4); \ | |||||
| F_##j##_##l = vec_splat(F_##j##_##l, 3); \ | |||||
| const vector signed int p_##j##_##l = \ | |||||
| (vector signed int)vec_perm(src##j, \ | |||||
| (vector unsigned char)zero, \ | |||||
| extractPermM##i); \ | |||||
| const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2); \ | |||||
| const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2); \ | |||||
| vector signed int newpm_##j##_##l; \ | |||||
| if (vec_all_lt(sum_##j##_##l, F_##j##_##l)) \ | |||||
| newpm_##j##_##l = sum_##j##_##l; \ | |||||
| else if (vec_all_gt(diff_##j##_##l, F_##j##_##l)) \ | |||||
| newpm_##j##_##l = diff_##j##_##l; \ | |||||
| else newpm_##j##_##l = F_##j##_##l; \ | |||||
| const vector unsigned char newpm2_##j##_##l = \ | |||||
| vec_splat((vector unsigned char)newpm_##j##_##l, 15); \ | |||||
| const vector unsigned char mask##j##l = vec_add(identity, \ | |||||
| tenRightM##i); \ | |||||
| src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l); \ | |||||
| } \ | |||||
| permA1M##i = vec_add(permA1M##i, permA1inc); \ | |||||
| permA2M##i = vec_add(permA2M##i, permA2inc); \ | |||||
| tenRightM##i = vec_sro(tenRightM##i, eightLeft); \ | |||||
| #define F2(i, j, k, l) \ | |||||
| if (S[i] & (1 << (l+1))) { \ | |||||
| const vector unsigned char a_##j##_A##l = \ | |||||
| vec_perm(src##i, src##j, permA1M##i); \ | |||||
| const vector unsigned char a_##j##_B##l = \ | |||||
| vec_perm(a_##j##_A##l, src##k, permA2M##i); \ | |||||
| const vector signed int a_##j##_sump##l = \ | |||||
| (vector signed int)vec_msum(a_##j##_B##l, magic, \ | |||||
| (vector unsigned int)zero); \ | |||||
| vector signed int F_##j##_##l = \ | |||||
| vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4); \ | |||||
| F_##j##_##l = vec_splat(F_##j##_##l, 3); \ | |||||
| const vector signed int p_##j##_##l = \ | |||||
| (vector signed int)vec_perm(src##j, \ | |||||
| (vector unsigned char)zero, \ | |||||
| extractPermM##i); \ | |||||
| const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2);\ | |||||
| const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2);\ | |||||
| vector signed int newpm_##j##_##l; \ | |||||
| if (vec_all_lt(sum_##j##_##l, F_##j##_##l)) \ | |||||
| newpm_##j##_##l = sum_##j##_##l; \ | |||||
| else if (vec_all_gt(diff_##j##_##l, F_##j##_##l)) \ | |||||
| newpm_##j##_##l = diff_##j##_##l; \ | |||||
| else newpm_##j##_##l = F_##j##_##l; \ | |||||
| const vector unsigned char newpm2_##j##_##l = \ | |||||
| vec_splat((vector unsigned char)newpm_##j##_##l, 15); \ | |||||
| const vector unsigned char mask##j##l = vec_add(identity, \ | |||||
| tenRightM##i); \ | |||||
| src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l); \ | |||||
| } \ | |||||
| permA1M##i = vec_add(permA1M##i, permA1inc); \ | |||||
| permA2M##i = vec_add(permA2M##i, permA2inc); \ | |||||
| tenRightM##i = vec_sro(tenRightM##i, eightLeft); \ | |||||
| extractPermM##i = vec_add(extractPermM##i, extractPermInc) | extractPermM##i = vec_add(extractPermM##i, extractPermInc) | ||||
| #define ITER(i, j, k) \ | |||||
| F_INIT(i); \ | |||||
| F2(i, j, k, 0); \ | |||||
| F2(i, j, k, 1); \ | |||||
| F2(i, j, k, 2); \ | |||||
| F2(i, j, k, 3); \ | |||||
| F2(i, j, k, 4); \ | |||||
| F2(i, j, k, 5); \ | |||||
| F2(i, j, k, 6); \ | |||||
| #define ITER(i, j, k) \ | |||||
| F_INIT(i); \ | |||||
| F2(i, j, k, 0); \ | |||||
| F2(i, j, k, 1); \ | |||||
| F2(i, j, k, 2); \ | |||||
| F2(i, j, k, 3); \ | |||||
| F2(i, j, k, 4); \ | |||||
| F2(i, j, k, 5); \ | |||||
| F2(i, j, k, 6); \ | |||||
| F2(i, j, k, 7) | F2(i, j, k, 7) | ||||
| ITER(0, 1, 2); | ITER(0, 1, 2); | ||||
| @@ -777,16 +777,16 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||||
| const vector signed char neg1 = vec_splat_s8(-1); | const vector signed char neg1 = vec_splat_s8(-1); | ||||
| #define STORE_LINE(i) \ | |||||
| const vector unsigned char permST##i = \ | |||||
| vec_lvsr(i * stride, srcCopy); \ | |||||
| const vector unsigned char maskST##i = \ | |||||
| vec_perm((vector unsigned char)zero, \ | |||||
| (vector unsigned char)neg1, permST##i); \ | |||||
| src##i = vec_perm(src##i ,src##i, permST##i); \ | |||||
| sA##i= vec_sel(sA##i, src##i, maskST##i); \ | |||||
| sB##i= vec_sel(src##i, sB##i, maskST##i); \ | |||||
| vec_st(sA##i, i * stride, srcCopy); \ | |||||
| #define STORE_LINE(i) \ | |||||
| const vector unsigned char permST##i = \ | |||||
| vec_lvsr(i * stride, srcCopy); \ | |||||
| const vector unsigned char maskST##i = \ | |||||
| vec_perm((vector unsigned char)zero, \ | |||||
| (vector unsigned char)neg1, permST##i); \ | |||||
| src##i = vec_perm(src##i ,src##i, permST##i); \ | |||||
| sA##i= vec_sel(sA##i, src##i, maskST##i); \ | |||||
| sB##i= vec_sel(src##i, sB##i, maskST##i); \ | |||||
| vec_st(sA##i, i * stride, srcCopy); \ | |||||
| vec_st(sB##i, i * stride + 16, srcCopy) | vec_st(sB##i, i * stride + 16, srcCopy) | ||||
| STORE_LINE(1); | STORE_LINE(1); | ||||
| @@ -808,7 +808,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||||
| #define do_a_deblock_altivec(a...) do_a_deblock_C(a) | #define do_a_deblock_altivec(a...) do_a_deblock_C(a) | ||||
| static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | ||||
| uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) | |||||
| uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) | |||||
| { | { | ||||
| const vector signed int zero = vec_splat_s32(0); | const vector signed int zero = vec_splat_s32(0); | ||||
| const vector signed short vsint16_1 = vec_splat_s16(1); | const vector signed short vsint16_1 = vec_splat_s16(1); | ||||
| @@ -820,16 +820,16 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||||
| tempBluredPast[128]= maxNoise[1]; | tempBluredPast[128]= maxNoise[1]; | ||||
| tempBluredPast[129]= maxNoise[2]; | tempBluredPast[129]= maxNoise[2]; | ||||
| #define LOAD_LINE(src, i) \ | |||||
| register int j##src##i = i * stride; \ | |||||
| vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \ | |||||
| const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \ | |||||
| #define LOAD_LINE(src, i) \ | |||||
| register int j##src##i = i * stride; \ | |||||
| vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \ | |||||
| const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \ | |||||
| const vector unsigned char v_##src##A2##i = vec_ld(j##src##i + 16, src); \ | const vector unsigned char v_##src##A2##i = vec_ld(j##src##i + 16, src); \ | ||||
| const vector unsigned char v_##src##A##i = \ | |||||
| vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i); \ | |||||
| vector signed short v_##src##Ass##i = \ | |||||
| (vector signed short)vec_mergeh((vector signed char)zero, \ | |||||
| (vector signed char)v_##src##A##i) | |||||
| const vector unsigned char v_##src##A##i = \ | |||||
| vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i); \ | |||||
| vector signed short v_##src##Ass##i = \ | |||||
| (vector signed short)vec_mergeh((vector signed char)zero, \ | |||||
| (vector signed char)v_##src##A##i) | |||||
| LOAD_LINE(src, 0); | LOAD_LINE(src, 0); | ||||
| LOAD_LINE(src, 1); | LOAD_LINE(src, 1); | ||||
| @@ -850,10 +850,10 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||||
| LOAD_LINE(tempBlured, 7); | LOAD_LINE(tempBlured, 7); | ||||
| #undef LOAD_LINE | #undef LOAD_LINE | ||||
| #define ACCUMULATE_DIFFS(i) \ | |||||
| vector signed short v_d##i = vec_sub(v_tempBluredAss##i, \ | |||||
| v_srcAss##i); \ | |||||
| v_dp = vec_msums(v_d##i, v_d##i, v_dp); \ | |||||
| #define ACCUMULATE_DIFFS(i) \ | |||||
| vector signed short v_d##i = vec_sub(v_tempBluredAss##i, \ | |||||
| v_srcAss##i); \ | |||||
| v_dp = vec_msums(v_d##i, v_d##i, v_dp); \ | |||||
| v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp) | v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp) | ||||
| ACCUMULATE_DIFFS(0); | ACCUMULATE_DIFFS(0); | ||||
| @@ -916,12 +916,12 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||||
| const vector signed short vsint16_4 = vec_splat_s16(4); | const vector signed short vsint16_4 = vec_splat_s16(4); | ||||
| const vector unsigned short vuint16_3 = vec_splat_u16(3); | const vector unsigned short vuint16_3 = vec_splat_u16(3); | ||||
| #define OP(i) \ | |||||
| const vector signed short v_temp##i = \ | |||||
| vec_mladd(v_tempBluredAss##i, \ | |||||
| vsint16_7, v_srcAss##i); \ | |||||
| const vector signed short v_temp2##i = \ | |||||
| vec_add(v_temp##i, vsint16_4); \ | |||||
| #define OP(i) \ | |||||
| const vector signed short v_temp##i = \ | |||||
| vec_mladd(v_tempBluredAss##i, \ | |||||
| vsint16_7, v_srcAss##i); \ | |||||
| const vector signed short v_temp2##i = \ | |||||
| vec_add(v_temp##i, vsint16_4); \ | |||||
| v_tempBluredAss##i = vec_sr(v_temp2##i, vuint16_3) | v_tempBluredAss##i = vec_sr(v_temp2##i, vuint16_3) | ||||
| OP(0); | OP(0); | ||||
| @@ -937,12 +937,12 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||||
| const vector signed short vsint16_3 = vec_splat_s16(3); | const vector signed short vsint16_3 = vec_splat_s16(3); | ||||
| const vector signed short vsint16_2 = vec_splat_s16(2); | const vector signed short vsint16_2 = vec_splat_s16(2); | ||||
| #define OP(i) \ | |||||
| const vector signed short v_temp##i = \ | |||||
| vec_mladd(v_tempBluredAss##i, \ | |||||
| vsint16_3, v_srcAss##i); \ | |||||
| const vector signed short v_temp2##i = \ | |||||
| vec_add(v_temp##i, vsint16_2); \ | |||||
| #define OP(i) \ | |||||
| const vector signed short v_temp##i = \ | |||||
| vec_mladd(v_tempBluredAss##i, \ | |||||
| vsint16_3, v_srcAss##i); \ | |||||
| const vector signed short v_temp2##i = \ | |||||
| vec_add(v_temp##i, vsint16_2); \ | |||||
| v_tempBluredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2) | v_tempBluredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2) | ||||
| OP(0); | OP(0); | ||||
| @@ -959,24 +959,24 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||||
| const vector signed char neg1 = vec_splat_s8(-1); | const vector signed char neg1 = vec_splat_s8(-1); | ||||
| const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | ||||
| 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | |||||
| #define PACK_AND_STORE(src, i) \ | |||||
| const vector unsigned char perms##src##i = \ | |||||
| vec_lvsr(i * stride, src); \ | |||||
| const vector unsigned char vf##src##i = \ | |||||
| vec_packsu(v_tempBluredAss##i, (vector signed short)zero); \ | |||||
| const vector unsigned char vg##src##i = \ | |||||
| vec_perm(vf##src##i, v_##src##A##i, permHH); \ | |||||
| const vector unsigned char mask##src##i = \ | |||||
| 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | |||||
| #define PACK_AND_STORE(src, i) \ | |||||
| const vector unsigned char perms##src##i = \ | |||||
| vec_lvsr(i * stride, src); \ | |||||
| const vector unsigned char vf##src##i = \ | |||||
| vec_packsu(v_tempBluredAss##i, (vector signed short)zero); \ | |||||
| const vector unsigned char vg##src##i = \ | |||||
| vec_perm(vf##src##i, v_##src##A##i, permHH); \ | |||||
| const vector unsigned char mask##src##i = \ | |||||
| vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \ | vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \ | ||||
| const vector unsigned char vg2##src##i = \ | |||||
| vec_perm(vg##src##i, vg##src##i, perms##src##i); \ | |||||
| const vector unsigned char svA##src##i = \ | |||||
| vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i); \ | |||||
| const vector unsigned char svB##src##i = \ | |||||
| vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \ | |||||
| vec_st(svA##src##i, i * stride, src); \ | |||||
| const vector unsigned char vg2##src##i = \ | |||||
| vec_perm(vg##src##i, vg##src##i, perms##src##i); \ | |||||
| const vector unsigned char svA##src##i = \ | |||||
| vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i); \ | |||||
| const vector unsigned char svB##src##i = \ | |||||
| vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \ | |||||
| vec_st(svA##src##i, i * stride, src); \ | |||||
| vec_st(svB##src##i, i * stride + 16, src) | vec_st(svB##src##i, i * stride + 16, src) | ||||
| PACK_AND_STORE(src, 0); | PACK_AND_STORE(src, 0); | ||||
| @@ -1001,14 +1001,14 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||||
| static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) { | static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) { | ||||
| const vector unsigned char zero = vec_splat_u8(0); | const vector unsigned char zero = vec_splat_u8(0); | ||||
| #define LOAD_DOUBLE_LINE(i, j) \ | |||||
| vector unsigned char perm1##i = vec_lvsl(i * stride, src); \ | |||||
| vector unsigned char perm2##i = vec_lvsl(j * stride, src); \ | |||||
| vector unsigned char srcA##i = vec_ld(i * stride, src); \ | |||||
| #define LOAD_DOUBLE_LINE(i, j) \ | |||||
| vector unsigned char perm1##i = vec_lvsl(i * stride, src); \ | |||||
| vector unsigned char perm2##i = vec_lvsl(j * stride, src); \ | |||||
| vector unsigned char srcA##i = vec_ld(i * stride, src); \ | |||||
| vector unsigned char srcB##i = vec_ld(i * stride + 16, src); \ | vector unsigned char srcB##i = vec_ld(i * stride + 16, src); \ | ||||
| vector unsigned char srcC##i = vec_ld(j * stride, src); \ | |||||
| vector unsigned char srcC##i = vec_ld(j * stride, src); \ | |||||
| vector unsigned char srcD##i = vec_ld(j * stride+ 16, src); \ | vector unsigned char srcD##i = vec_ld(j * stride+ 16, src); \ | ||||
| vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i); \ | |||||
| vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i); \ | |||||
| vector unsigned char src##j = vec_perm(srcC##i, srcD##i, perm2##i) | vector unsigned char src##j = vec_perm(srcC##i, srcD##i, perm2##i) | ||||
| LOAD_DOUBLE_LINE(0, 1); | LOAD_DOUBLE_LINE(0, 1); | ||||
| @@ -1107,10 +1107,10 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds | |||||
| const vector unsigned char zero = vec_splat_u8(0); | const vector unsigned char zero = vec_splat_u8(0); | ||||
| const vector unsigned char magic_perm = (const vector unsigned char) | const vector unsigned char magic_perm = (const vector unsigned char) | ||||
| AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | ||||
| 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | |||||
| 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | |||||
| #define LOAD_DOUBLE_LINE(i, j) \ | |||||
| vector unsigned char src##i = vec_ld(i * 16, src); \ | |||||
| #define LOAD_DOUBLE_LINE(i, j) \ | |||||
| vector unsigned char src##i = vec_ld(i * 16, src); \ | |||||
| vector unsigned char src##j = vec_ld(j * 16, src) | vector unsigned char src##j = vec_ld(j * 16, src) | ||||
| LOAD_DOUBLE_LINE(0, 1); | LOAD_DOUBLE_LINE(0, 1); | ||||
| @@ -1169,24 +1169,24 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds | |||||
| const vector signed char neg1 = vec_splat_s8(-1); | const vector signed char neg1 = vec_splat_s8(-1); | ||||
| #define STORE_DOUBLE_LINE(i, j) \ | |||||
| vector unsigned char dstA##i = vec_ld(i * stride, dst); \ | |||||
| vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \ | |||||
| vector unsigned char dstA##j = vec_ld(j * stride, dst); \ | |||||
| vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst); \ | |||||
| vector unsigned char align##i = vec_lvsr(i * stride, dst); \ | |||||
| vector unsigned char align##j = vec_lvsr(j * stride, dst); \ | |||||
| vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \ | |||||
| vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \ | |||||
| vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i); \ | |||||
| vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j); \ | |||||
| vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \ | |||||
| vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \ | |||||
| vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \ | |||||
| vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j); \ | |||||
| vec_st(dstAF##i, i * stride, dst); \ | |||||
| vec_st(dstBF##i, i * stride + 16, dst); \ | |||||
| vec_st(dstAF##j, j * stride, dst); \ | |||||
| #define STORE_DOUBLE_LINE(i, j) \ | |||||
| vector unsigned char dstA##i = vec_ld(i * stride, dst); \ | |||||
| vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \ | |||||
| vector unsigned char dstA##j = vec_ld(j * stride, dst); \ | |||||
| vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst); \ | |||||
| vector unsigned char align##i = vec_lvsr(i * stride, dst); \ | |||||
| vector unsigned char align##j = vec_lvsr(j * stride, dst); \ | |||||
| vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \ | |||||
| vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \ | |||||
| vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i); \ | |||||
| vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j); \ | |||||
| vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \ | |||||
| vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \ | |||||
| vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \ | |||||
| vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j); \ | |||||
| vec_st(dstAF##i, i * stride, dst); \ | |||||
| vec_st(dstBF##i, i * stride + 16, dst); \ | |||||
| vec_st(dstAF##j, j * stride, dst); \ | |||||
| vec_st(dstBF##j, j * stride + 16, dst) | vec_st(dstBF##j, j * stride + 16, dst) | ||||
| STORE_DOUBLE_LINE(0,1); | STORE_DOUBLE_LINE(0,1); | ||||
| @@ -21,42 +21,42 @@ | |||||
| * internal api header. | * internal api header. | ||||
| */ | */ | ||||
| #define V_DEBLOCK 0x01 | |||||
| #define H_DEBLOCK 0x02 | |||||
| #define DERING 0x04 | |||||
| #define LEVEL_FIX 0x08 ///< Brightness & Contrast | |||||
| #define LUM_V_DEBLOCK V_DEBLOCK // 1 | |||||
| #define LUM_H_DEBLOCK H_DEBLOCK // 2 | |||||
| #define CHROM_V_DEBLOCK (V_DEBLOCK<<4) // 16 | |||||
| #define CHROM_H_DEBLOCK (H_DEBLOCK<<4) // 32 | |||||
| #define LUM_DERING DERING // 4 | |||||
| #define CHROM_DERING (DERING<<4) // 64 | |||||
| #define LUM_LEVEL_FIX LEVEL_FIX // 8 | |||||
| #define CHROM_LEVEL_FIX (LEVEL_FIX<<4) // 128 (not implemented yet) | |||||
| #define V_DEBLOCK 0x01 | |||||
| #define H_DEBLOCK 0x02 | |||||
| #define DERING 0x04 | |||||
| #define LEVEL_FIX 0x08 ///< Brightness & Contrast | |||||
| #define LUM_V_DEBLOCK V_DEBLOCK // 1 | |||||
| #define LUM_H_DEBLOCK H_DEBLOCK // 2 | |||||
| #define CHROM_V_DEBLOCK (V_DEBLOCK<<4) // 16 | |||||
| #define CHROM_H_DEBLOCK (H_DEBLOCK<<4) // 32 | |||||
| #define LUM_DERING DERING // 4 | |||||
| #define CHROM_DERING (DERING<<4) // 64 | |||||
| #define LUM_LEVEL_FIX LEVEL_FIX // 8 | |||||
| #define CHROM_LEVEL_FIX (LEVEL_FIX<<4) // 128 (not implemented yet) | |||||
| // Experimental vertical filters | // Experimental vertical filters | ||||
| #define V_X1_FILTER 0x0200 // 512 | |||||
| #define V_A_DEBLOCK 0x0400 | |||||
| #define V_X1_FILTER 0x0200 // 512 | |||||
| #define V_A_DEBLOCK 0x0400 | |||||
| // Experimental horizontal filters | // Experimental horizontal filters | ||||
| #define H_X1_FILTER 0x2000 // 8192 | |||||
| #define H_A_DEBLOCK 0x4000 | |||||
| #define H_X1_FILTER 0x2000 // 8192 | |||||
| #define H_A_DEBLOCK 0x4000 | |||||
| /// select between full y range (255-0) or standart one (234-16) | /// select between full y range (255-0) or standart one (234-16) | ||||
| #define FULL_Y_RANGE 0x8000 // 32768 | |||||
| #define FULL_Y_RANGE 0x8000 // 32768 | |||||
| //Deinterlacing Filters | //Deinterlacing Filters | ||||
| #define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536 | |||||
| #define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072 | |||||
| #define CUBIC_BLEND_DEINT_FILTER 0x8000 // (not implemented yet) | |||||
| #define CUBIC_IPOL_DEINT_FILTER 0x40000 // 262144 | |||||
| #define MEDIAN_DEINT_FILTER 0x80000 // 524288 | |||||
| #define FFMPEG_DEINT_FILTER 0x400000 | |||||
| #define LOWPASS5_DEINT_FILTER 0x800000 | |||||
| #define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536 | |||||
| #define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072 | |||||
| #define CUBIC_BLEND_DEINT_FILTER 0x8000 // (not implemented yet) | |||||
| #define CUBIC_IPOL_DEINT_FILTER 0x40000 // 262144 | |||||
| #define MEDIAN_DEINT_FILTER 0x80000 // 524288 | |||||
| #define FFMPEG_DEINT_FILTER 0x400000 | |||||
| #define LOWPASS5_DEINT_FILTER 0x800000 | |||||
| #define TEMP_NOISE_FILTER 0x100000 | |||||
| #define FORCE_QUANT 0x200000 | |||||
| #define TEMP_NOISE_FILTER 0x100000 | |||||
| #define FORCE_QUANT 0x200000 | |||||
| //use if u want a faster postprocessing code | //use if u want a faster postprocessing code | ||||
| //cant differentiate between chroma & luma filters (both on or both off) | //cant differentiate between chroma & luma filters (both on or both off) | ||||
| @@ -66,8 +66,8 @@ | |||||
| #if 1 | #if 1 | ||||
| static inline int CLIP(int a){ | static inline int CLIP(int a){ | ||||
| if(a&256) return ((a)>>31)^(-1); | |||||
| else return a; | |||||
| if(a&256) return ((a)>>31)^(-1); | |||||
| else return a; | |||||
| } | } | ||||
| //#define CLIP(a) (((a)&256) ? ((a)>>31)^(-1) : (a)) | //#define CLIP(a) (((a)&256) ? ((a)>>31)^(-1) : (a)) | ||||
| #elif 0 | #elif 0 | ||||
| @@ -79,92 +79,92 @@ static inline int CLIP(int a){ | |||||
| * Postprocessng filter. | * Postprocessng filter. | ||||
| */ | */ | ||||
| struct PPFilter{ | struct PPFilter{ | ||||
| char *shortName; | |||||
| char *longName; | |||||
| int chromDefault; ///< is chrominance filtering on by default if this filter is manually activated | |||||
| int minLumQuality; ///< minimum quality to turn luminance filtering on | |||||
| int minChromQuality; ///< minimum quality to turn chrominance filtering on | |||||
| int mask; ///< Bitmask to turn this filter on | |||||
| char *shortName; | |||||
| char *longName; | |||||
| int chromDefault; ///< is chrominance filtering on by default if this filter is manually activated | |||||
| int minLumQuality; ///< minimum quality to turn luminance filtering on | |||||
| int minChromQuality; ///< minimum quality to turn chrominance filtering on | |||||
| int mask; ///< Bitmask to turn this filter on | |||||
| }; | }; | ||||
| /** | /** | ||||
| * Postprocessng mode. | * Postprocessng mode. | ||||
| */ | */ | ||||
| typedef struct PPMode{ | typedef struct PPMode{ | ||||
| int lumMode; ///< acivates filters for luminance | |||||
| int chromMode; ///< acivates filters for chrominance | |||||
| int error; ///< non zero on error | |||||
| int lumMode; ///< acivates filters for luminance | |||||
| int chromMode; ///< acivates filters for chrominance | |||||
| int error; ///< non zero on error | |||||
| int minAllowedY; ///< for brigtness correction | |||||
| int maxAllowedY; ///< for brihtness correction | |||||
| float maxClippedThreshold; ///< amount of "black" u r willing to loose to get a brightness corrected picture | |||||
| int minAllowedY; ///< for brigtness correction | |||||
| int maxAllowedY; ///< for brihtness correction | |||||
| float maxClippedThreshold; ///< amount of "black" u r willing to loose to get a brightness corrected picture | |||||
| int maxTmpNoise[3]; ///< for Temporal Noise Reducing filter (Maximal sum of abs differences) | |||||
| int maxTmpNoise[3]; ///< for Temporal Noise Reducing filter (Maximal sum of abs differences) | |||||
| int baseDcDiff; | |||||
| int flatnessThreshold; | |||||
| int baseDcDiff; | |||||
| int flatnessThreshold; | |||||
| int forcedQuant; ///< quantizer if FORCE_QUANT is used | |||||
| int forcedQuant; ///< quantizer if FORCE_QUANT is used | |||||
| } PPMode; | } PPMode; | ||||
| /** | /** | ||||
| * postprocess context. | * postprocess context. | ||||
| */ | */ | ||||
| typedef struct PPContext{ | typedef struct PPContext{ | ||||
| uint8_t *tempBlocks; ///<used for the horizontal code | |||||
| uint8_t *tempBlocks; ///<used for the horizontal code | |||||
| /** | |||||
| * luma histogram. | |||||
| * we need 64bit here otherwise we'll going to have a problem | |||||
| * after watching a black picture for 5 hours | |||||
| */ | |||||
| uint64_t *yHistogram; | |||||
| /** | |||||
| * luma histogram. | |||||
| * we need 64bit here otherwise we'll going to have a problem | |||||
| * after watching a black picture for 5 hours | |||||
| */ | |||||
| uint64_t *yHistogram; | |||||
| uint64_t __attribute__((aligned(8))) packedYOffset; | |||||
| uint64_t __attribute__((aligned(8))) packedYScale; | |||||
| uint64_t __attribute__((aligned(8))) packedYOffset; | |||||
| uint64_t __attribute__((aligned(8))) packedYScale; | |||||
| /** Temporal noise reducing buffers */ | |||||
| uint8_t *tempBlured[3]; | |||||
| int32_t *tempBluredPast[3]; | |||||
| /** Temporal noise reducing buffers */ | |||||
| uint8_t *tempBlured[3]; | |||||
| int32_t *tempBluredPast[3]; | |||||
| /** Temporary buffers for handling the last row(s) */ | |||||
| uint8_t *tempDst; | |||||
| uint8_t *tempSrc; | |||||
| /** Temporary buffers for handling the last row(s) */ | |||||
| uint8_t *tempDst; | |||||
| uint8_t *tempSrc; | |||||
| uint8_t *deintTemp; | |||||
| uint8_t *deintTemp; | |||||
| uint64_t __attribute__((aligned(8))) pQPb; | |||||
| uint64_t __attribute__((aligned(8))) pQPb2; | |||||
| uint64_t __attribute__((aligned(8))) pQPb; | |||||
| uint64_t __attribute__((aligned(8))) pQPb2; | |||||
| uint64_t __attribute__((aligned(8))) mmxDcOffset[64]; | |||||
| uint64_t __attribute__((aligned(8))) mmxDcThreshold[64]; | |||||
| uint64_t __attribute__((aligned(8))) mmxDcOffset[64]; | |||||
| uint64_t __attribute__((aligned(8))) mmxDcThreshold[64]; | |||||
| QP_STORE_T *stdQPTable; ///< used to fix MPEG2 style qscale | |||||
| QP_STORE_T *nonBQPTable; | |||||
| QP_STORE_T *forcedQPTable; | |||||
| QP_STORE_T *stdQPTable; ///< used to fix MPEG2 style qscale | |||||
| QP_STORE_T *nonBQPTable; | |||||
| QP_STORE_T *forcedQPTable; | |||||
| int QP; | |||||
| int nonBQP; | |||||
| int QP; | |||||
| int nonBQP; | |||||
| int frameNum; | |||||
| int frameNum; | |||||
| int cpuCaps; | |||||
| int cpuCaps; | |||||
| int qpStride; ///<size of qp buffers (needed to realloc them if needed) | |||||
| int stride; ///<size of some buffers (needed to realloc them if needed) | |||||
| int qpStride; ///<size of qp buffers (needed to realloc them if needed) | |||||
| int stride; ///<size of some buffers (needed to realloc them if needed) | |||||
| int hChromaSubSample; | |||||
| int vChromaSubSample; | |||||
| int hChromaSubSample; | |||||
| int vChromaSubSample; | |||||
| PPMode ppMode; | |||||
| PPMode ppMode; | |||||
| } PPContext; | } PPContext; | ||||
| static inline void linecpy(void *dest, void *src, int lines, int stride) | static inline void linecpy(void *dest, void *src, int lines, int stride) | ||||
| { | { | ||||
| if (stride > 0) { | |||||
| memcpy(dest, src, lines*stride); | |||||
| } else { | |||||
| memcpy(dest+(lines-1)*stride, src+(lines-1)*stride, -lines*stride); | |||||
| } | |||||
| if (stride > 0) { | |||||
| memcpy(dest, src, lines*stride); | |||||
| } else { | |||||
| memcpy(dest+(lines-1)*stride, src+(lines-1)*stride, -lines*stride); | |||||
| } | |||||
| } | } | ||||
| @@ -64,8 +64,8 @@ void *av_malloc(unsigned int size) | |||||
| Indeed, we should align it: | Indeed, we should align it: | ||||
| on 4 for 386 | on 4 for 386 | ||||
| on 16 for 486 | on 16 for 486 | ||||
| on 32 for 586, PPro - k6-III | |||||
| on 64 for K7 (maybe for P3 too). | |||||
| on 32 for 586, PPro - k6-III | |||||
| on 64 for K7 (maybe for P3 too). | |||||
| Because L1 and L2 caches are aligned on those values. | Because L1 and L2 caches are aligned on those values. | ||||
| But I don't want to code such logic here! | But I don't want to code such logic here! | ||||
| */ | */ | ||||
| @@ -76,13 +76,13 @@ void *av_malloc(unsigned int size) | |||||
| Why not larger? because i didnt see a difference in benchmarks ... | Why not larger? because i didnt see a difference in benchmarks ... | ||||
| */ | */ | ||||
| /* benchmarks with p3 | /* benchmarks with p3 | ||||
| memalign(64)+1 3071,3051,3032 | |||||
| memalign(64)+2 3051,3032,3041 | |||||
| memalign(64)+4 2911,2896,2915 | |||||
| memalign(64)+8 2545,2554,2550 | |||||
| memalign(64)+16 2543,2572,2563 | |||||
| memalign(64)+32 2546,2545,2571 | |||||
| memalign(64)+64 2570,2533,2558 | |||||
| memalign(64)+1 3071,3051,3032 | |||||
| memalign(64)+2 3051,3032,3041 | |||||
| memalign(64)+4 2911,2896,2915 | |||||
| memalign(64)+8 2545,2554,2550 | |||||
| memalign(64)+16 2543,2572,2563 | |||||
| memalign(64)+32 2546,2545,2571 | |||||
| memalign(64)+64 2570,2533,2558 | |||||
| btw, malloc seems to do 8 byte alignment by default here | btw, malloc seems to do 8 byte alignment by default here | ||||
| */ | */ | ||||
| @@ -54,26 +54,26 @@ typedef struct MJpegContext { | |||||
| /* JPEG marker codes */ | /* JPEG marker codes */ | ||||
| typedef enum { | typedef enum { | ||||
| /* start of frame */ | /* start of frame */ | ||||
| SOF0 = 0xc0, /* baseline */ | |||||
| SOF1 = 0xc1, /* extended sequential, huffman */ | |||||
| SOF2 = 0xc2, /* progressive, huffman */ | |||||
| SOF3 = 0xc3, /* lossless, huffman */ | |||||
| SOF0 = 0xc0, /* baseline */ | |||||
| SOF1 = 0xc1, /* extended sequential, huffman */ | |||||
| SOF2 = 0xc2, /* progressive, huffman */ | |||||
| SOF3 = 0xc3, /* lossless, huffman */ | |||||
| SOF5 = 0xc5, /* differential sequential, huffman */ | |||||
| SOF6 = 0xc6, /* differential progressive, huffman */ | |||||
| SOF7 = 0xc7, /* differential lossless, huffman */ | |||||
| JPG = 0xc8, /* reserved for JPEG extension */ | |||||
| SOF9 = 0xc9, /* extended sequential, arithmetic */ | |||||
| SOF10 = 0xca, /* progressive, arithmetic */ | |||||
| SOF11 = 0xcb, /* lossless, arithmetic */ | |||||
| SOF5 = 0xc5, /* differential sequential, huffman */ | |||||
| SOF6 = 0xc6, /* differential progressive, huffman */ | |||||
| SOF7 = 0xc7, /* differential lossless, huffman */ | |||||
| JPG = 0xc8, /* reserved for JPEG extension */ | |||||
| SOF9 = 0xc9, /* extended sequential, arithmetic */ | |||||
| SOF10 = 0xca, /* progressive, arithmetic */ | |||||
| SOF11 = 0xcb, /* lossless, arithmetic */ | |||||
| SOF13 = 0xcd, /* differential sequential, arithmetic */ | |||||
| SOF14 = 0xce, /* differential progressive, arithmetic */ | |||||
| SOF15 = 0xcf, /* differential lossless, arithmetic */ | |||||
| SOF13 = 0xcd, /* differential sequential, arithmetic */ | |||||
| SOF14 = 0xce, /* differential progressive, arithmetic */ | |||||
| SOF15 = 0xcf, /* differential lossless, arithmetic */ | |||||
| DHT = 0xc4, /* define huffman tables */ | |||||
| DHT = 0xc4, /* define huffman tables */ | |||||
| DAC = 0xcc, /* define arithmetic-coding conditioning */ | |||||
| DAC = 0xcc, /* define arithmetic-coding conditioning */ | |||||
| /* restart with modulo 8 count "m" */ | /* restart with modulo 8 count "m" */ | ||||
| RST0 = 0xd0, | RST0 = 0xd0, | ||||
| @@ -85,14 +85,14 @@ typedef enum { | |||||
| RST6 = 0xd6, | RST6 = 0xd6, | ||||
| RST7 = 0xd7, | RST7 = 0xd7, | ||||
| SOI = 0xd8, /* start of image */ | |||||
| EOI = 0xd9, /* end of image */ | |||||
| SOS = 0xda, /* start of scan */ | |||||
| DQT = 0xdb, /* define quantization tables */ | |||||
| DNL = 0xdc, /* define number of lines */ | |||||
| DRI = 0xdd, /* define restart interval */ | |||||
| DHP = 0xde, /* define hierarchical progression */ | |||||
| EXP = 0xdf, /* expand reference components */ | |||||
| SOI = 0xd8, /* start of image */ | |||||
| EOI = 0xd9, /* end of image */ | |||||
| SOS = 0xda, /* start of scan */ | |||||
| DQT = 0xdb, /* define quantization tables */ | |||||
| DNL = 0xdc, /* define number of lines */ | |||||
| DRI = 0xdd, /* define restart interval */ | |||||
| DHP = 0xde, /* define hierarchical progression */ | |||||
| EXP = 0xdf, /* expand reference components */ | |||||
| APP0 = 0xe0, | APP0 = 0xe0, | ||||
| APP1 = 0xe1, | APP1 = 0xe1, | ||||
| @@ -118,17 +118,17 @@ typedef enum { | |||||
| JPG4 = 0xf4, | JPG4 = 0xf4, | ||||
| JPG5 = 0xf5, | JPG5 = 0xf5, | ||||
| JPG6 = 0xf6, | JPG6 = 0xf6, | ||||
| SOF48 = 0xf7, ///< JPEG-LS | |||||
| LSE = 0xf8, ///< JPEG-LS extension parameters | |||||
| SOF48 = 0xf7, ///< JPEG-LS | |||||
| LSE = 0xf8, ///< JPEG-LS extension parameters | |||||
| JPG9 = 0xf9, | JPG9 = 0xf9, | ||||
| JPG10 = 0xfa, | JPG10 = 0xfa, | ||||
| JPG11 = 0xfb, | JPG11 = 0xfb, | ||||
| JPG12 = 0xfc, | JPG12 = 0xfc, | ||||
| JPG13 = 0xfd, | JPG13 = 0xfd, | ||||
| COM = 0xfe, /* comment */ | |||||
| COM = 0xfe, /* comment */ | |||||
| TEM = 0x01, /* temporary private use for arithmetic coding */ | |||||
| TEM = 0x01, /* temporary private use for arithmetic coding */ | |||||
| /* 0x02 -> 0xbf reserved */ | /* 0x02 -> 0xbf reserved */ | ||||
| } JPEG_MARKER; | } JPEG_MARKER; | ||||
| @@ -583,7 +583,7 @@ void mjpeg_picture_trailer(MpegEncContext *s) | |||||
| } | } | ||||
| static inline void mjpeg_encode_dc(MpegEncContext *s, int val, | static inline void mjpeg_encode_dc(MpegEncContext *s, int val, | ||||
| uint8_t *huff_size, uint16_t *huff_code) | |||||
| uint8_t *huff_size, uint16_t *huff_code) | |||||
| { | { | ||||
| int mant, nbits; | int mant, nbits; | ||||
| @@ -935,10 +935,10 @@ static int mjpeg_decode_init(AVCodecContext *avctx) | |||||
| if (avctx->flags & CODEC_FLAG_EXTERN_HUFF) | if (avctx->flags & CODEC_FLAG_EXTERN_HUFF) | ||||
| { | { | ||||
| av_log(avctx, AV_LOG_INFO, "mjpeg: using external huffman table\n"); | |||||
| init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8); | |||||
| mjpeg_decode_dht(s); | |||||
| /* should check for error - but dunno */ | |||||
| av_log(avctx, AV_LOG_INFO, "mjpeg: using external huffman table\n"); | |||||
| init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8); | |||||
| mjpeg_decode_dht(s); | |||||
| /* should check for error - but dunno */ | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| @@ -1017,10 +1017,10 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s) | |||||
| while (len >= 65) { | while (len >= 65) { | ||||
| /* only 8 bit precision handled */ | /* only 8 bit precision handled */ | ||||
| if (get_bits(&s->gb, 4) != 0) | if (get_bits(&s->gb, 4) != 0) | ||||
| { | |||||
| dprintf("dqt: 16bit precision\n"); | |||||
| { | |||||
| dprintf("dqt: 16bit precision\n"); | |||||
| return -1; | return -1; | ||||
| } | |||||
| } | |||||
| index = get_bits(&s->gb, 4); | index = get_bits(&s->gb, 4); | ||||
| if (index >= 4) | if (index >= 4) | ||||
| return -1; | return -1; | ||||
| @@ -1028,14 +1028,14 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s) | |||||
| /* read quant table */ | /* read quant table */ | ||||
| for(i=0;i<64;i++) { | for(i=0;i<64;i++) { | ||||
| j = s->scantable.permutated[i]; | j = s->scantable.permutated[i]; | ||||
| s->quant_matrixes[index][j] = get_bits(&s->gb, 8); | |||||
| s->quant_matrixes[index][j] = get_bits(&s->gb, 8); | |||||
| } | } | ||||
| //XXX FIXME finetune, and perhaps add dc too | //XXX FIXME finetune, and perhaps add dc too | ||||
| s->qscale[index]= FFMAX( | s->qscale[index]= FFMAX( | ||||
| s->quant_matrixes[index][s->scantable.permutated[1]], | s->quant_matrixes[index][s->scantable.permutated[1]], | ||||
| s->quant_matrixes[index][s->scantable.permutated[8]]) >> 1; | s->quant_matrixes[index][s->scantable.permutated[8]]) >> 1; | ||||
| dprintf("qscale[%d]: %d\n", index, s->qscale[index]); | |||||
| dprintf("qscale[%d]: %d\n", index, s->qscale[index]); | |||||
| len -= 65; | len -= 65; | ||||
| } | } | ||||
| @@ -1132,7 +1132,7 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s) | |||||
| if (s->quant_index[i] >= 4) | if (s->quant_index[i] >= 4) | ||||
| return -1; | return -1; | ||||
| dprintf("component %d %d:%d id: %d quant:%d\n", i, s->h_count[i], | dprintf("component %d %d:%d id: %d quant:%d\n", i, s->h_count[i], | ||||
| s->v_count[i], s->component_id[i], s->quant_index[i]); | |||||
| s->v_count[i], s->component_id[i], s->quant_index[i]); | |||||
| } | } | ||||
| if(s->v_max==1 && s->h_max==1 && s->lossless==1) s->rgb=1; | if(s->v_max==1 && s->h_max==1 && s->lossless==1) s->rgb=1; | ||||
| @@ -1151,7 +1151,7 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s) | |||||
| s->org_height != 0 && | s->org_height != 0 && | ||||
| s->height < ((s->org_height * 3) / 4)) { | s->height < ((s->org_height * 3) / 4)) { | ||||
| s->interlaced = 1; | s->interlaced = 1; | ||||
| // s->bottom_field = (s->interlace_polarity) ? 1 : 0; | |||||
| // s->bottom_field = (s->interlace_polarity) ? 1 : 0; | |||||
| s->bottom_field = 0; | s->bottom_field = 0; | ||||
| s->avctx->height *= 2; | s->avctx->height *= 2; | ||||
| } | } | ||||
| @@ -1202,7 +1202,7 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s) | |||||
| if (len != (8+(3*nb_components))) | if (len != (8+(3*nb_components))) | ||||
| { | { | ||||
| dprintf("decode_sof0: error, len(%d) mismatch\n", len); | |||||
| dprintf("decode_sof0: error, len(%d) mismatch\n", len); | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| @@ -1214,7 +1214,7 @@ static inline int mjpeg_decode_dc(MJpegDecodeContext *s, int dc_index) | |||||
| code = get_vlc2(&s->gb, s->vlcs[0][dc_index].table, 9, 2); | code = get_vlc2(&s->gb, s->vlcs[0][dc_index].table, 9, 2); | ||||
| if (code < 0) | if (code < 0) | ||||
| { | { | ||||
| dprintf("mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index, | |||||
| dprintf("mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index, | |||||
| &s->vlcs[0][dc_index]); | &s->vlcs[0][dc_index]); | ||||
| return 0xffff; | return 0xffff; | ||||
| } | } | ||||
| @@ -1247,7 +1247,7 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block, | |||||
| ac_vlc = &s->vlcs[1][ac_index]; | ac_vlc = &s->vlcs[1][ac_index]; | ||||
| i = 1; | i = 1; | ||||
| for(;;) { | for(;;) { | ||||
| code = get_vlc2(&s->gb, s->vlcs[1][ac_index].table, 9, 2); | |||||
| code = get_vlc2(&s->gb, s->vlcs[1][ac_index].table, 9, 2); | |||||
| if (code < 0) { | if (code < 0) { | ||||
| dprintf("error ac\n"); | dprintf("error ac\n"); | ||||
| @@ -1452,7 +1452,7 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s){ | |||||
| dprintf("error y=%d x=%d\n", mb_y, mb_x); | dprintf("error y=%d x=%d\n", mb_y, mb_x); | ||||
| return -1; | return -1; | ||||
| } | } | ||||
| // dprintf("mb: %d %d processed\n", mb_y, mb_x); | |||||
| // dprintf("mb: %d %d processed\n", mb_y, mb_x); | |||||
| ptr = s->picture.data[c] + | ptr = s->picture.data[c] + | ||||
| (((s->linesize[c] * (v * mb_y + y) * 8) + | (((s->linesize[c] * (v * mb_y + y) * 8) + | ||||
| (h * mb_x + x) * 8) >> s->avctx->lowres); | (h * mb_x + x) * 8) >> s->avctx->lowres); | ||||
| @@ -1491,29 +1491,29 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s) | |||||
| nb_components = get_bits(&s->gb, 8); | nb_components = get_bits(&s->gb, 8); | ||||
| if (len != 6+2*nb_components) | if (len != 6+2*nb_components) | ||||
| { | { | ||||
| dprintf("decode_sos: invalid len (%d)\n", len); | |||||
| return -1; | |||||
| dprintf("decode_sos: invalid len (%d)\n", len); | |||||
| return -1; | |||||
| } | } | ||||
| /* XXX: only interleaved scan accepted */ | /* XXX: only interleaved scan accepted */ | ||||
| if (nb_components != s->nb_components) | if (nb_components != s->nb_components) | ||||
| { | { | ||||
| dprintf("decode_sos: components(%d) mismatch\n", nb_components); | |||||
| dprintf("decode_sos: components(%d) mismatch\n", nb_components); | |||||
| return -1; | return -1; | ||||
| } | } | ||||
| vmax = 0; | vmax = 0; | ||||
| hmax = 0; | hmax = 0; | ||||
| for(i=0;i<nb_components;i++) { | for(i=0;i<nb_components;i++) { | ||||
| id = get_bits(&s->gb, 8) - 1; | id = get_bits(&s->gb, 8) - 1; | ||||
| dprintf("component: %d\n", id); | |||||
| dprintf("component: %d\n", id); | |||||
| /* find component index */ | /* find component index */ | ||||
| for(index=0;index<s->nb_components;index++) | for(index=0;index<s->nb_components;index++) | ||||
| if (id == s->component_id[index]) | if (id == s->component_id[index]) | ||||
| break; | break; | ||||
| if (index == s->nb_components) | if (index == s->nb_components) | ||||
| { | |||||
| dprintf("decode_sos: index(%d) out of components\n", index); | |||||
| { | |||||
| dprintf("decode_sos: index(%d) out of components\n", index); | |||||
| return -1; | return -1; | ||||
| } | |||||
| } | |||||
| s->comp_index[i] = index; | s->comp_index[i] = index; | ||||
| @@ -1524,26 +1524,26 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s) | |||||
| s->dc_index[i] = get_bits(&s->gb, 4); | s->dc_index[i] = get_bits(&s->gb, 4); | ||||
| s->ac_index[i] = get_bits(&s->gb, 4); | s->ac_index[i] = get_bits(&s->gb, 4); | ||||
| if (s->dc_index[i] < 0 || s->ac_index[i] < 0 || | |||||
| s->dc_index[i] >= 4 || s->ac_index[i] >= 4) | |||||
| goto out_of_range; | |||||
| if (s->dc_index[i] < 0 || s->ac_index[i] < 0 || | |||||
| s->dc_index[i] >= 4 || s->ac_index[i] >= 4) | |||||
| goto out_of_range; | |||||
| #if 0 //buggy | #if 0 //buggy | ||||
| switch(s->start_code) | |||||
| { | |||||
| case SOF0: | |||||
| if (dc_index[i] > 1 || ac_index[i] > 1) | |||||
| goto out_of_range; | |||||
| break; | |||||
| case SOF1: | |||||
| case SOF2: | |||||
| if (dc_index[i] > 3 || ac_index[i] > 3) | |||||
| goto out_of_range; | |||||
| break; | |||||
| case SOF3: | |||||
| if (dc_index[i] > 3 || ac_index[i] != 0) | |||||
| goto out_of_range; | |||||
| break; | |||||
| } | |||||
| switch(s->start_code) | |||||
| { | |||||
| case SOF0: | |||||
| if (dc_index[i] > 1 || ac_index[i] > 1) | |||||
| goto out_of_range; | |||||
| break; | |||||
| case SOF1: | |||||
| case SOF2: | |||||
| if (dc_index[i] > 3 || ac_index[i] > 3) | |||||
| goto out_of_range; | |||||
| break; | |||||
| case SOF3: | |||||
| if (dc_index[i] > 3 || ac_index[i] != 0) | |||||
| goto out_of_range; | |||||
| break; | |||||
| } | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -1605,7 +1605,7 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s) | |||||
| static int mjpeg_decode_dri(MJpegDecodeContext *s) | static int mjpeg_decode_dri(MJpegDecodeContext *s) | ||||
| { | { | ||||
| if (get_bits(&s->gb, 16) != 4) | if (get_bits(&s->gb, 16) != 4) | ||||
| return -1; | |||||
| return -1; | |||||
| s->restart_interval = get_bits(&s->gb, 16); | s->restart_interval = get_bits(&s->gb, 16); | ||||
| s->restart_count = 0; | s->restart_count = 0; | ||||
| dprintf("restart interval: %d\n", s->restart_interval); | dprintf("restart interval: %d\n", s->restart_interval); | ||||
| @@ -1619,7 +1619,7 @@ static int mjpeg_decode_app(MJpegDecodeContext *s) | |||||
| len = get_bits(&s->gb, 16); | len = get_bits(&s->gb, 16); | ||||
| if (len < 5) | if (len < 5) | ||||
| return -1; | |||||
| return -1; | |||||
| if(8*len + get_bits_count(&s->gb) > s->gb.size_in_bits) | if(8*len + get_bits_count(&s->gb) > s->gb.size_in_bits) | ||||
| return -1; | return -1; | ||||
| @@ -1636,35 +1636,35 @@ static int mjpeg_decode_app(MJpegDecodeContext *s) | |||||
| informations, but it's always present in AVID creates files */ | informations, but it's always present in AVID creates files */ | ||||
| if (id == ff_get_fourcc("AVI1")) | if (id == ff_get_fourcc("AVI1")) | ||||
| { | { | ||||
| /* structure: | |||||
| 4bytes AVI1 | |||||
| 1bytes polarity | |||||
| 1bytes always zero | |||||
| 4bytes field_size | |||||
| 4bytes field_size_less_padding | |||||
| */ | |||||
| s->buggy_avid = 1; | |||||
| // if (s->first_picture) | |||||
| // printf("mjpeg: workarounding buggy AVID\n"); | |||||
| s->interlace_polarity = get_bits(&s->gb, 8); | |||||
| /* structure: | |||||
| 4bytes AVI1 | |||||
| 1bytes polarity | |||||
| 1bytes always zero | |||||
| 4bytes field_size | |||||
| 4bytes field_size_less_padding | |||||
| */ | |||||
| s->buggy_avid = 1; | |||||
| // if (s->first_picture) | |||||
| // printf("mjpeg: workarounding buggy AVID\n"); | |||||
| s->interlace_polarity = get_bits(&s->gb, 8); | |||||
| #if 0 | #if 0 | ||||
| skip_bits(&s->gb, 8); | |||||
| skip_bits(&s->gb, 32); | |||||
| skip_bits(&s->gb, 32); | |||||
| len -= 10; | |||||
| skip_bits(&s->gb, 8); | |||||
| skip_bits(&s->gb, 32); | |||||
| skip_bits(&s->gb, 32); | |||||
| len -= 10; | |||||
| #endif | #endif | ||||
| // if (s->interlace_polarity) | |||||
| // printf("mjpeg: interlace polarity: %d\n", s->interlace_polarity); | |||||
| goto out; | |||||
| // if (s->interlace_polarity) | |||||
| // printf("mjpeg: interlace polarity: %d\n", s->interlace_polarity); | |||||
| goto out; | |||||
| } | } | ||||
| // len -= 2; | // len -= 2; | ||||
| if (id == ff_get_fourcc("JFIF")) | if (id == ff_get_fourcc("JFIF")) | ||||
| { | { | ||||
| int t_w, t_h, v1, v2; | |||||
| skip_bits(&s->gb, 8); /* the trailing zero-byte */ | |||||
| v1= get_bits(&s->gb, 8); | |||||
| int t_w, t_h, v1, v2; | |||||
| skip_bits(&s->gb, 8); /* the trailing zero-byte */ | |||||
| v1= get_bits(&s->gb, 8); | |||||
| v2= get_bits(&s->gb, 8); | v2= get_bits(&s->gb, 8); | ||||
| skip_bits(&s->gb, 8); | skip_bits(&s->gb, 8); | ||||
| @@ -1678,37 +1678,37 @@ static int mjpeg_decode_app(MJpegDecodeContext *s) | |||||
| s->avctx->sample_aspect_ratio.den | s->avctx->sample_aspect_ratio.den | ||||
| ); | ); | ||||
| t_w = get_bits(&s->gb, 8); | |||||
| t_h = get_bits(&s->gb, 8); | |||||
| if (t_w && t_h) | |||||
| { | |||||
| /* skip thumbnail */ | |||||
| if (len-10-(t_w*t_h*3) > 0) | |||||
| len -= t_w*t_h*3; | |||||
| } | |||||
| len -= 10; | |||||
| goto out; | |||||
| t_w = get_bits(&s->gb, 8); | |||||
| t_h = get_bits(&s->gb, 8); | |||||
| if (t_w && t_h) | |||||
| { | |||||
| /* skip thumbnail */ | |||||
| if (len-10-(t_w*t_h*3) > 0) | |||||
| len -= t_w*t_h*3; | |||||
| } | |||||
| len -= 10; | |||||
| goto out; | |||||
| } | } | ||||
| if (id == ff_get_fourcc("Adob") && (get_bits(&s->gb, 8) == 'e')) | if (id == ff_get_fourcc("Adob") && (get_bits(&s->gb, 8) == 'e')) | ||||
| { | { | ||||
| if (s->avctx->debug & FF_DEBUG_PICT_INFO) | if (s->avctx->debug & FF_DEBUG_PICT_INFO) | ||||
| av_log(s->avctx, AV_LOG_INFO, "mjpeg: Adobe header found\n"); | av_log(s->avctx, AV_LOG_INFO, "mjpeg: Adobe header found\n"); | ||||
| skip_bits(&s->gb, 16); /* version */ | |||||
| skip_bits(&s->gb, 16); /* flags0 */ | |||||
| skip_bits(&s->gb, 16); /* flags1 */ | |||||
| skip_bits(&s->gb, 8); /* transform */ | |||||
| len -= 7; | |||||
| goto out; | |||||
| skip_bits(&s->gb, 16); /* version */ | |||||
| skip_bits(&s->gb, 16); /* flags0 */ | |||||
| skip_bits(&s->gb, 16); /* flags1 */ | |||||
| skip_bits(&s->gb, 8); /* transform */ | |||||
| len -= 7; | |||||
| goto out; | |||||
| } | } | ||||
| if (id == ff_get_fourcc("LJIF")){ | if (id == ff_get_fourcc("LJIF")){ | ||||
| if (s->avctx->debug & FF_DEBUG_PICT_INFO) | if (s->avctx->debug & FF_DEBUG_PICT_INFO) | ||||
| av_log(s->avctx, AV_LOG_INFO, "Pegasus lossless jpeg header found\n"); | av_log(s->avctx, AV_LOG_INFO, "Pegasus lossless jpeg header found\n"); | ||||
| skip_bits(&s->gb, 16); /* version ? */ | |||||
| skip_bits(&s->gb, 16); /* unknwon always 0? */ | |||||
| skip_bits(&s->gb, 16); /* unknwon always 0? */ | |||||
| skip_bits(&s->gb, 16); /* unknwon always 0? */ | |||||
| skip_bits(&s->gb, 16); /* version ? */ | |||||
| skip_bits(&s->gb, 16); /* unknwon always 0? */ | |||||
| skip_bits(&s->gb, 16); /* unknwon always 0? */ | |||||
| skip_bits(&s->gb, 16); /* unknwon always 0? */ | |||||
| switch( get_bits(&s->gb, 8)){ | switch( get_bits(&s->gb, 8)){ | ||||
| case 1: | case 1: | ||||
| s->rgb= 1; | s->rgb= 1; | ||||
| @@ -1728,32 +1728,32 @@ static int mjpeg_decode_app(MJpegDecodeContext *s) | |||||
| /* Apple MJPEG-A */ | /* Apple MJPEG-A */ | ||||
| if ((s->start_code == APP1) && (len > (0x28 - 8))) | if ((s->start_code == APP1) && (len > (0x28 - 8))) | ||||
| { | { | ||||
| id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16); | |||||
| id = be2me_32(id); | |||||
| len -= 4; | |||||
| if (id == ff_get_fourcc("mjpg")) /* Apple MJPEG-A */ | |||||
| { | |||||
| id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16); | |||||
| id = be2me_32(id); | |||||
| len -= 4; | |||||
| if (id == ff_get_fourcc("mjpg")) /* Apple MJPEG-A */ | |||||
| { | |||||
| #if 0 | #if 0 | ||||
| skip_bits(&s->gb, 32); /* field size */ | |||||
| skip_bits(&s->gb, 32); /* pad field size */ | |||||
| skip_bits(&s->gb, 32); /* next off */ | |||||
| skip_bits(&s->gb, 32); /* quant off */ | |||||
| skip_bits(&s->gb, 32); /* huff off */ | |||||
| skip_bits(&s->gb, 32); /* image off */ | |||||
| skip_bits(&s->gb, 32); /* scan off */ | |||||
| skip_bits(&s->gb, 32); /* data off */ | |||||
| skip_bits(&s->gb, 32); /* field size */ | |||||
| skip_bits(&s->gb, 32); /* pad field size */ | |||||
| skip_bits(&s->gb, 32); /* next off */ | |||||
| skip_bits(&s->gb, 32); /* quant off */ | |||||
| skip_bits(&s->gb, 32); /* huff off */ | |||||
| skip_bits(&s->gb, 32); /* image off */ | |||||
| skip_bits(&s->gb, 32); /* scan off */ | |||||
| skip_bits(&s->gb, 32); /* data off */ | |||||
| #endif | #endif | ||||
| if (s->avctx->debug & FF_DEBUG_PICT_INFO) | if (s->avctx->debug & FF_DEBUG_PICT_INFO) | ||||
| av_log(s->avctx, AV_LOG_INFO, "mjpeg: Apple MJPEG-A header found\n"); | |||||
| } | |||||
| av_log(s->avctx, AV_LOG_INFO, "mjpeg: Apple MJPEG-A header found\n"); | |||||
| } | |||||
| } | } | ||||
| out: | out: | ||||
| /* slow but needed for extreme adobe jpegs */ | /* slow but needed for extreme adobe jpegs */ | ||||
| if (len < 0) | if (len < 0) | ||||
| av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error, decode_app parser read over the end\n"); | |||||
| av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error, decode_app parser read over the end\n"); | |||||
| while(--len > 0) | while(--len > 0) | ||||
| skip_bits(&s->gb, 8); | |||||
| skip_bits(&s->gb, 8); | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -1762,32 +1762,32 @@ static int mjpeg_decode_com(MJpegDecodeContext *s) | |||||
| { | { | ||||
| int len = get_bits(&s->gb, 16); | int len = get_bits(&s->gb, 16); | ||||
| if (len >= 2 && 8*len - 16 + get_bits_count(&s->gb) <= s->gb.size_in_bits) { | if (len >= 2 && 8*len - 16 + get_bits_count(&s->gb) <= s->gb.size_in_bits) { | ||||
| uint8_t *cbuf = av_malloc(len - 1); | |||||
| if (cbuf) { | |||||
| int i; | |||||
| for (i = 0; i < len - 2; i++) | |||||
| cbuf[i] = get_bits(&s->gb, 8); | |||||
| if (i > 0 && cbuf[i-1] == '\n') | |||||
| cbuf[i-1] = 0; | |||||
| else | |||||
| cbuf[i] = 0; | |||||
| uint8_t *cbuf = av_malloc(len - 1); | |||||
| if (cbuf) { | |||||
| int i; | |||||
| for (i = 0; i < len - 2; i++) | |||||
| cbuf[i] = get_bits(&s->gb, 8); | |||||
| if (i > 0 && cbuf[i-1] == '\n') | |||||
| cbuf[i-1] = 0; | |||||
| else | |||||
| cbuf[i] = 0; | |||||
| if(s->avctx->debug & FF_DEBUG_PICT_INFO) | if(s->avctx->debug & FF_DEBUG_PICT_INFO) | ||||
| av_log(s->avctx, AV_LOG_INFO, "mjpeg comment: '%s'\n", cbuf); | av_log(s->avctx, AV_LOG_INFO, "mjpeg comment: '%s'\n", cbuf); | ||||
| /* buggy avid, it puts EOI only at every 10th frame */ | |||||
| if (!strcmp(cbuf, "AVID")) | |||||
| { | |||||
| s->buggy_avid = 1; | |||||
| // if (s->first_picture) | |||||
| // printf("mjpeg: workarounding buggy AVID\n"); | |||||
| } | |||||
| /* buggy avid, it puts EOI only at every 10th frame */ | |||||
| if (!strcmp(cbuf, "AVID")) | |||||
| { | |||||
| s->buggy_avid = 1; | |||||
| // if (s->first_picture) | |||||
| // printf("mjpeg: workarounding buggy AVID\n"); | |||||
| } | |||||
| else if(!strcmp(cbuf, "CS=ITU601")){ | else if(!strcmp(cbuf, "CS=ITU601")){ | ||||
| s->cs_itu601= 1; | s->cs_itu601= 1; | ||||
| } | } | ||||
| av_free(cbuf); | |||||
| } | |||||
| av_free(cbuf); | |||||
| } | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| @@ -1830,13 +1830,13 @@ static int find_marker(uint8_t **pbuf_ptr, uint8_t *buf_end) | |||||
| buf_ptr = *pbuf_ptr; | buf_ptr = *pbuf_ptr; | ||||
| while (buf_ptr < buf_end) { | while (buf_ptr < buf_end) { | ||||
| v = *buf_ptr++; | v = *buf_ptr++; | ||||
| v2 = *buf_ptr; | |||||
| v2 = *buf_ptr; | |||||
| if ((v == 0xff) && (v2 >= 0xc0) && (v2 <= 0xfe) && buf_ptr < buf_end) { | if ((v == 0xff) && (v2 >= 0xc0) && (v2 <= 0xfe) && buf_ptr < buf_end) { | ||||
| val = *buf_ptr++; | |||||
| goto found; | |||||
| val = *buf_ptr++; | |||||
| goto found; | |||||
| } | } | ||||
| #ifdef DEBUG | #ifdef DEBUG | ||||
| skipped++; | |||||
| skipped++; | |||||
| #endif | #endif | ||||
| } | } | ||||
| val = -1; | val = -1; | ||||
| @@ -1862,74 +1862,74 @@ static int mjpeg_decode_frame(AVCodecContext *avctx, | |||||
| while (buf_ptr < buf_end) { | while (buf_ptr < buf_end) { | ||||
| /* find start next marker */ | /* find start next marker */ | ||||
| start_code = find_marker(&buf_ptr, buf_end); | start_code = find_marker(&buf_ptr, buf_end); | ||||
| { | |||||
| /* EOF */ | |||||
| { | |||||
| /* EOF */ | |||||
| if (start_code < 0) { | if (start_code < 0) { | ||||
| goto the_end; | |||||
| goto the_end; | |||||
| } else { | } else { | ||||
| dprintf("marker=%x avail_size_in_buf=%d\n", start_code, buf_end - buf_ptr); | dprintf("marker=%x avail_size_in_buf=%d\n", start_code, buf_end - buf_ptr); | ||||
| if ((buf_end - buf_ptr) > s->buffer_size) | |||||
| { | |||||
| av_free(s->buffer); | |||||
| s->buffer_size = buf_end-buf_ptr; | |||||
| if ((buf_end - buf_ptr) > s->buffer_size) | |||||
| { | |||||
| av_free(s->buffer); | |||||
| s->buffer_size = buf_end-buf_ptr; | |||||
| s->buffer = av_malloc(s->buffer_size + FF_INPUT_BUFFER_PADDING_SIZE); | s->buffer = av_malloc(s->buffer_size + FF_INPUT_BUFFER_PADDING_SIZE); | ||||
| dprintf("buffer too small, expanding to %d bytes\n", | |||||
| s->buffer_size); | |||||
| } | |||||
| /* unescape buffer of SOS */ | |||||
| if (start_code == SOS) | |||||
| { | |||||
| uint8_t *src = buf_ptr; | |||||
| uint8_t *dst = s->buffer; | |||||
| while (src<buf_end) | |||||
| { | |||||
| uint8_t x = *(src++); | |||||
| *(dst++) = x; | |||||
| if (x == 0xff) | |||||
| { | |||||
| dprintf("buffer too small, expanding to %d bytes\n", | |||||
| s->buffer_size); | |||||
| } | |||||
| /* unescape buffer of SOS */ | |||||
| if (start_code == SOS) | |||||
| { | |||||
| uint8_t *src = buf_ptr; | |||||
| uint8_t *dst = s->buffer; | |||||
| while (src<buf_end) | |||||
| { | |||||
| uint8_t x = *(src++); | |||||
| *(dst++) = x; | |||||
| if (x == 0xff) | |||||
| { | |||||
| while(src<buf_end && x == 0xff) | while(src<buf_end && x == 0xff) | ||||
| x = *(src++); | x = *(src++); | ||||
| if (x >= 0xd0 && x <= 0xd7) | |||||
| *(dst++) = x; | |||||
| else if (x) | |||||
| break; | |||||
| } | |||||
| } | |||||
| init_get_bits(&s->gb, s->buffer, (dst - s->buffer)*8); | |||||
| dprintf("escaping removed %d bytes\n", | |||||
| (buf_end - buf_ptr) - (dst - s->buffer)); | |||||
| } | |||||
| else | |||||
| init_get_bits(&s->gb, buf_ptr, (buf_end - buf_ptr)*8); | |||||
| s->start_code = start_code; | |||||
| if (x >= 0xd0 && x <= 0xd7) | |||||
| *(dst++) = x; | |||||
| else if (x) | |||||
| break; | |||||
| } | |||||
| } | |||||
| init_get_bits(&s->gb, s->buffer, (dst - s->buffer)*8); | |||||
| dprintf("escaping removed %d bytes\n", | |||||
| (buf_end - buf_ptr) - (dst - s->buffer)); | |||||
| } | |||||
| else | |||||
| init_get_bits(&s->gb, buf_ptr, (buf_end - buf_ptr)*8); | |||||
| s->start_code = start_code; | |||||
| if(s->avctx->debug & FF_DEBUG_STARTCODE){ | if(s->avctx->debug & FF_DEBUG_STARTCODE){ | ||||
| av_log(s->avctx, AV_LOG_DEBUG, "startcode: %X\n", start_code); | av_log(s->avctx, AV_LOG_DEBUG, "startcode: %X\n", start_code); | ||||
| } | } | ||||
| /* process markers */ | |||||
| if (start_code >= 0xd0 && start_code <= 0xd7) { | |||||
| dprintf("restart marker: %d\n", start_code&0x0f); | |||||
| /* APP fields */ | |||||
| } else if (start_code >= APP0 && start_code <= APP15) { | |||||
| mjpeg_decode_app(s); | |||||
| /* Comment */ | |||||
| } else if (start_code == COM){ | |||||
| mjpeg_decode_com(s); | |||||
| } | |||||
| /* process markers */ | |||||
| if (start_code >= 0xd0 && start_code <= 0xd7) { | |||||
| dprintf("restart marker: %d\n", start_code&0x0f); | |||||
| /* APP fields */ | |||||
| } else if (start_code >= APP0 && start_code <= APP15) { | |||||
| mjpeg_decode_app(s); | |||||
| /* Comment */ | |||||
| } else if (start_code == COM){ | |||||
| mjpeg_decode_com(s); | |||||
| } | |||||
| switch(start_code) { | switch(start_code) { | ||||
| case SOI: | case SOI: | ||||
| s->restart_interval = 0; | |||||
| s->restart_interval = 0; | |||||
| reset_ls_coding_parameters(s, 1); | reset_ls_coding_parameters(s, 1); | ||||
| s->restart_count = 0; | |||||
| s->restart_count = 0; | |||||
| /* nothing to do on SOI */ | /* nothing to do on SOI */ | ||||
| break; | break; | ||||
| case DQT: | case DQT: | ||||
| @@ -1944,12 +1944,12 @@ static int mjpeg_decode_frame(AVCodecContext *avctx, | |||||
| case SOF0: | case SOF0: | ||||
| s->lossless=0; | s->lossless=0; | ||||
| if (mjpeg_decode_sof(s) < 0) | if (mjpeg_decode_sof(s) < 0) | ||||
| return -1; | |||||
| return -1; | |||||
| break; | break; | ||||
| case SOF3: | case SOF3: | ||||
| s->lossless=1; | s->lossless=1; | ||||
| if (mjpeg_decode_sof(s) < 0) | if (mjpeg_decode_sof(s) < 0) | ||||
| return -1; | |||||
| return -1; | |||||
| break; | break; | ||||
| case SOF48: | case SOF48: | ||||
| s->lossless=1; | s->lossless=1; | ||||
| @@ -1961,11 +1961,11 @@ static int mjpeg_decode_frame(AVCodecContext *avctx, | |||||
| if (decode_lse(s) < 0) | if (decode_lse(s) < 0) | ||||
| return -1; | return -1; | ||||
| break; | break; | ||||
| case EOI: | |||||
| if ((s->buggy_avid && !s->interlaced) || s->restart_interval) | |||||
| case EOI: | |||||
| if ((s->buggy_avid && !s->interlaced) || s->restart_interval) | |||||
| break; | break; | ||||
| eoi_parser: | eoi_parser: | ||||
| { | |||||
| { | |||||
| if (s->interlaced) { | if (s->interlaced) { | ||||
| s->bottom_field ^= 1; | s->bottom_field ^= 1; | ||||
| /* if not bottom field, do not output image yet */ | /* if not bottom field, do not output image yet */ | ||||
| @@ -1987,41 +1987,41 @@ eoi_parser: | |||||
| goto the_end; | goto the_end; | ||||
| } | } | ||||
| break; | |||||
| break; | |||||
| case SOS: | case SOS: | ||||
| mjpeg_decode_sos(s); | mjpeg_decode_sos(s); | ||||
| /* buggy avid puts EOI every 10-20th frame */ | |||||
| /* if restart period is over process EOI */ | |||||
| if ((s->buggy_avid && !s->interlaced) || s->restart_interval) | |||||
| goto eoi_parser; | |||||
| /* buggy avid puts EOI every 10-20th frame */ | |||||
| /* if restart period is over process EOI */ | |||||
| if ((s->buggy_avid && !s->interlaced) || s->restart_interval) | |||||
| goto eoi_parser; | |||||
| break; | |||||
| case DRI: | |||||
| mjpeg_decode_dri(s); | |||||
| break; | |||||
| case SOF1: | |||||
| case SOF2: | |||||
| case SOF5: | |||||
| case SOF6: | |||||
| case SOF7: | |||||
| case SOF9: | |||||
| case SOF10: | |||||
| case SOF11: | |||||
| case SOF13: | |||||
| case SOF14: | |||||
| case SOF15: | |||||
| case JPG: | |||||
| av_log(s->avctx, AV_LOG_ERROR, "mjpeg: unsupported coding type (%x)\n", start_code); | |||||
| break; | break; | ||||
| case DRI: | |||||
| mjpeg_decode_dri(s); | |||||
| break; | |||||
| case SOF1: | |||||
| case SOF2: | |||||
| case SOF5: | |||||
| case SOF6: | |||||
| case SOF7: | |||||
| case SOF9: | |||||
| case SOF10: | |||||
| case SOF11: | |||||
| case SOF13: | |||||
| case SOF14: | |||||
| case SOF15: | |||||
| case JPG: | |||||
| av_log(s->avctx, AV_LOG_ERROR, "mjpeg: unsupported coding type (%x)\n", start_code); | |||||
| break; | |||||
| // default: | |||||
| // printf("mjpeg: unsupported marker (%x)\n", start_code); | |||||
| // break; | |||||
| // default: | |||||
| // printf("mjpeg: unsupported marker (%x)\n", start_code); | |||||
| // break; | |||||
| } | } | ||||
| not_the_end: | not_the_end: | ||||
| /* eof process start code */ | |||||
| buf_ptr += (get_bits_count(&s->gb)+7)/8; | |||||
| dprintf("marker parser used %d bytes (%d bits)\n", | |||||
| (get_bits_count(&s->gb)+7)/8, get_bits_count(&s->gb)); | |||||
| /* eof process start code */ | |||||
| buf_ptr += (get_bits_count(&s->gb)+7)/8; | |||||
| dprintf("marker parser used %d bytes (%d bits)\n", | |||||
| (get_bits_count(&s->gb)+7)/8, get_bits_count(&s->gb)); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -2057,8 +2057,8 @@ read_header: | |||||
| if (get_bits_long(&hgb, 32) != be2me_32(ff_get_fourcc("mjpg"))) | if (get_bits_long(&hgb, 32) != be2me_32(ff_get_fourcc("mjpg"))) | ||||
| { | { | ||||
| dprintf("not mjpeg-b (bad fourcc)\n"); | |||||
| return 0; | |||||
| dprintf("not mjpeg-b (bad fourcc)\n"); | |||||
| return 0; | |||||
| } | } | ||||
| field_size = get_bits_long(&hgb, 32); /* field size */ | field_size = get_bits_long(&hgb, 32); /* field size */ | ||||
| @@ -2067,34 +2067,34 @@ read_header: | |||||
| second_field_offs = get_bits_long(&hgb, 32); | second_field_offs = get_bits_long(&hgb, 32); | ||||
| dprintf("second field offs: 0x%x\n", second_field_offs); | dprintf("second field offs: 0x%x\n", second_field_offs); | ||||
| if (second_field_offs) | if (second_field_offs) | ||||
| s->interlaced = 1; | |||||
| s->interlaced = 1; | |||||
| dqt_offs = get_bits_long(&hgb, 32); | dqt_offs = get_bits_long(&hgb, 32); | ||||
| dprintf("dqt offs: 0x%x\n", dqt_offs); | dprintf("dqt offs: 0x%x\n", dqt_offs); | ||||
| if (dqt_offs) | if (dqt_offs) | ||||
| { | { | ||||
| init_get_bits(&s->gb, buf+dqt_offs, (buf_end - (buf+dqt_offs))*8); | |||||
| s->start_code = DQT; | |||||
| mjpeg_decode_dqt(s); | |||||
| init_get_bits(&s->gb, buf+dqt_offs, (buf_end - (buf+dqt_offs))*8); | |||||
| s->start_code = DQT; | |||||
| mjpeg_decode_dqt(s); | |||||
| } | } | ||||
| dht_offs = get_bits_long(&hgb, 32); | dht_offs = get_bits_long(&hgb, 32); | ||||
| dprintf("dht offs: 0x%x\n", dht_offs); | dprintf("dht offs: 0x%x\n", dht_offs); | ||||
| if (dht_offs) | if (dht_offs) | ||||
| { | { | ||||
| init_get_bits(&s->gb, buf+dht_offs, (buf_end - (buf+dht_offs))*8); | |||||
| s->start_code = DHT; | |||||
| mjpeg_decode_dht(s); | |||||
| init_get_bits(&s->gb, buf+dht_offs, (buf_end - (buf+dht_offs))*8); | |||||
| s->start_code = DHT; | |||||
| mjpeg_decode_dht(s); | |||||
| } | } | ||||
| sof_offs = get_bits_long(&hgb, 32); | sof_offs = get_bits_long(&hgb, 32); | ||||
| dprintf("sof offs: 0x%x\n", sof_offs); | dprintf("sof offs: 0x%x\n", sof_offs); | ||||
| if (sof_offs) | if (sof_offs) | ||||
| { | { | ||||
| init_get_bits(&s->gb, buf+sof_offs, (buf_end - (buf+sof_offs))*8); | |||||
| s->start_code = SOF0; | |||||
| if (mjpeg_decode_sof(s) < 0) | |||||
| return -1; | |||||
| init_get_bits(&s->gb, buf+sof_offs, (buf_end - (buf+sof_offs))*8); | |||||
| s->start_code = SOF0; | |||||
| if (mjpeg_decode_sof(s) < 0) | |||||
| return -1; | |||||
| } | } | ||||
| sos_offs = get_bits_long(&hgb, 32); | sos_offs = get_bits_long(&hgb, 32); | ||||
| @@ -2103,22 +2103,22 @@ read_header: | |||||
| dprintf("sod offs: 0x%x\n", sod_offs); | dprintf("sod offs: 0x%x\n", sod_offs); | ||||
| if (sos_offs) | if (sos_offs) | ||||
| { | { | ||||
| // init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8); | |||||
| init_get_bits(&s->gb, buf+sos_offs, field_size*8); | |||||
| s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16)); | |||||
| s->start_code = SOS; | |||||
| mjpeg_decode_sos(s); | |||||
| // init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8); | |||||
| init_get_bits(&s->gb, buf+sos_offs, field_size*8); | |||||
| s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16)); | |||||
| s->start_code = SOS; | |||||
| mjpeg_decode_sos(s); | |||||
| } | } | ||||
| if (s->interlaced) { | if (s->interlaced) { | ||||
| s->bottom_field ^= 1; | s->bottom_field ^= 1; | ||||
| /* if not bottom field, do not output image yet */ | /* if not bottom field, do not output image yet */ | ||||
| if (s->bottom_field && second_field_offs) | if (s->bottom_field && second_field_offs) | ||||
| { | |||||
| buf_ptr = buf + second_field_offs; | |||||
| second_field_offs = 0; | |||||
| goto read_header; | |||||
| } | |||||
| { | |||||
| buf_ptr = buf + second_field_offs; | |||||
| second_field_offs = 0; | |||||
| goto read_header; | |||||
| } | |||||
| } | } | ||||
| //XXX FIXME factorize, this looks very similar to the EOI code | //XXX FIXME factorize, this looks very similar to the EOI code | ||||
| @@ -2153,7 +2153,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx, | |||||
| int i = 0, j = 0; | int i = 0, j = 0; | ||||
| if (!avctx->width || !avctx->height) | if (!avctx->width || !avctx->height) | ||||
| return -1; | |||||
| return -1; | |||||
| buf_ptr = buf; | buf_ptr = buf; | ||||
| buf_end = buf + buf_size; | buf_end = buf + buf_size; | ||||
| @@ -2161,7 +2161,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx, | |||||
| #if 1 | #if 1 | ||||
| recoded = av_mallocz(buf_size + 1024); | recoded = av_mallocz(buf_size + 1024); | ||||
| if (!recoded) | if (!recoded) | ||||
| return -1; | |||||
| return -1; | |||||
| /* SOI */ | /* SOI */ | ||||
| recoded[j++] = 0xFF; | recoded[j++] = 0xFF; | ||||
| @@ -2187,9 +2187,9 @@ static int sp5x_decode_frame(AVCodecContext *avctx, | |||||
| for (i = 14; i < buf_size && j < buf_size+1024-2; i++) | for (i = 14; i < buf_size && j < buf_size+1024-2; i++) | ||||
| { | { | ||||
| recoded[j++] = buf[i]; | |||||
| if (buf[i] == 0xff) | |||||
| recoded[j++] = 0; | |||||
| recoded[j++] = buf[i]; | |||||
| if (buf[i] == 0xff) | |||||
| recoded[j++] = 0; | |||||
| } | } | ||||
| /* EOI */ | /* EOI */ | ||||
| @@ -2229,33 +2229,33 @@ static int sp5x_decode_frame(AVCodecContext *avctx, | |||||
| if (avctx->get_buffer(avctx, &s->picture) < 0) | if (avctx->get_buffer(avctx, &s->picture) < 0) | ||||
| { | { | ||||
| av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | ||||
| return -1; | |||||
| return -1; | |||||
| } | } | ||||
| s->picture.pict_type = I_TYPE; | s->picture.pict_type = I_TYPE; | ||||
| s->picture.key_frame = 1; | s->picture.key_frame = 1; | ||||
| for (i = 0; i < 3; i++) | for (i = 0; i < 3; i++) | ||||
| s->linesize[i] = s->picture.linesize[i] << s->interlaced; | |||||
| s->linesize[i] = s->picture.linesize[i] << s->interlaced; | |||||
| /* DQT */ | /* DQT */ | ||||
| for (i = 0; i < 64; i++) | for (i = 0; i < 64; i++) | ||||
| { | { | ||||
| j = s->scantable.permutated[i]; | |||||
| s->quant_matrixes[0][j] = sp5x_quant_table[(qscale * 2) + i]; | |||||
| j = s->scantable.permutated[i]; | |||||
| s->quant_matrixes[0][j] = sp5x_quant_table[(qscale * 2) + i]; | |||||
| } | } | ||||
| s->qscale[0] = FFMAX( | s->qscale[0] = FFMAX( | ||||
| s->quant_matrixes[0][s->scantable.permutated[1]], | |||||
| s->quant_matrixes[0][s->scantable.permutated[8]]) >> 1; | |||||
| s->quant_matrixes[0][s->scantable.permutated[1]], | |||||
| s->quant_matrixes[0][s->scantable.permutated[8]]) >> 1; | |||||
| for (i = 0; i < 64; i++) | for (i = 0; i < 64; i++) | ||||
| { | { | ||||
| j = s->scantable.permutated[i]; | |||||
| s->quant_matrixes[1][j] = sp5x_quant_table[(qscale * 2) + 1 + i]; | |||||
| j = s->scantable.permutated[i]; | |||||
| s->quant_matrixes[1][j] = sp5x_quant_table[(qscale * 2) + 1 + i]; | |||||
| } | } | ||||
| s->qscale[1] = FFMAX( | s->qscale[1] = FFMAX( | ||||
| s->quant_matrixes[1][s->scantable.permutated[1]], | |||||
| s->quant_matrixes[1][s->scantable.permutated[8]]) >> 1; | |||||
| s->quant_matrixes[1][s->scantable.permutated[1]], | |||||
| s->quant_matrixes[1][s->scantable.permutated[8]]) >> 1; | |||||
| /* DHT */ | /* DHT */ | ||||
| @@ -2282,7 +2282,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx, | |||||
| s->ac_index[2] = 1; | s->ac_index[2] = 1; | ||||
| for (i = 0; i < 3; i++) | for (i = 0; i < 3; i++) | ||||
| s->last_dc[i] = 1024; | |||||
| s->last_dc[i] = 1024; | |||||
| s->mb_width = (s->width * s->h_max * 8 -1) / (s->h_max * 8); | s->mb_width = (s->width * s->h_max * 8 -1) / (s->h_max * 8); | ||||
| s->mb_height = (s->height * s->v_max * 8 -1) / (s->v_max * 8); | s->mb_height = (s->height * s->v_max * 8 -1) / (s->v_max * 8); | ||||
| @@ -61,7 +61,7 @@ static void add_pixels_clamped_mlib(const DCTELEM *block, uint8_t *pixels, int l | |||||
| /* put block, width 16 pixel, height 8/16 */ | /* put block, width 16 pixel, height 8/16 */ | ||||
| static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref, | static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref, | ||||
| int stride, int height) | |||||
| int stride, int height) | |||||
| { | { | ||||
| switch (height) { | switch (height) { | ||||
| case 8: | case 8: | ||||
| @@ -78,7 +78,7 @@ static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref, | static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref, | ||||
| int stride, int height) | |||||
| int stride, int height) | |||||
| { | { | ||||
| switch (height) { | switch (height) { | ||||
| case 8: | case 8: | ||||
| @@ -95,7 +95,7 @@ static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref, | static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref, | ||||
| int stride, int height) | |||||
| int stride, int height) | |||||
| { | { | ||||
| switch (height) { | switch (height) { | ||||
| case 8: | case 8: | ||||
| @@ -112,7 +112,7 @@ static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref, | static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref, | ||||
| int stride, int height) | |||||
| int stride, int height) | |||||
| { | { | ||||
| switch (height) { | switch (height) { | ||||
| case 8: | case 8: | ||||
| @@ -131,7 +131,7 @@ static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref, | |||||
| /* put block, width 8 pixel, height 4/8/16 */ | /* put block, width 8 pixel, height 4/8/16 */ | ||||
| static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref, | static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref, | ||||
| int stride, int height) | |||||
| int stride, int height) | |||||
| { | { | ||||
| switch (height) { | switch (height) { | ||||
| case 4: | case 4: | ||||
| @@ -152,7 +152,7 @@ static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref, | static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref, | ||||
| int stride, int height) | |||||
| int stride, int height) | |||||
| { | { | ||||
| switch (height) { | switch (height) { | ||||
| case 4: | case 4: | ||||
| @@ -173,7 +173,7 @@ static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref, | static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref, | ||||
| int stride, int height) | |||||
| int stride, int height) | |||||
| { | { | ||||
| switch (height) { | switch (height) { | ||||
| case 4: | case 4: | ||||
| @@ -194,7 +194,7 @@ static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref, | static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref, | ||||
| int stride, int height) | |||||
| int stride, int height) | |||||
| { | { | ||||
| switch (height) { | switch (height) { | ||||
| case 4: | case 4: | ||||
| @@ -217,7 +217,7 @@ static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref, | |||||
| /* average block, width 16 pixel, height 8/16 */ | /* average block, width 16 pixel, height 8/16 */ | ||||
| static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref, | static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref, | ||||
| int stride, int height) | |||||
| int stride, int height) | |||||
| { | { | ||||
| switch (height) { | switch (height) { | ||||
| case 8: | case 8: | ||||
| @@ -234,7 +234,7 @@ static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref, | static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref, | ||||
| int stride, int height) | |||||
| int stride, int height) | |||||
| { | { | ||||
| switch (height) { | switch (height) { | ||||
| case 8: | case 8: | ||||
| @@ -251,7 +251,7 @@ static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref, | static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref, | ||||
| int stride, int height) | |||||
| int stride, int height) | |||||
| { | { | ||||
| switch (height) { | switch (height) { | ||||
| case 8: | case 8: | ||||
| @@ -268,7 +268,7 @@ static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref, | static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref, | ||||
| int stride, int height) | |||||
| int stride, int height) | |||||
| { | { | ||||
| switch (height) { | switch (height) { | ||||
| case 8: | case 8: | ||||
| @@ -287,7 +287,7 @@ static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref, | |||||
| /* average block, width 8 pixel, height 4/8/16 */ | /* average block, width 8 pixel, height 4/8/16 */ | ||||
| static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref, | static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref, | ||||
| int stride, int height) | |||||
| int stride, int height) | |||||
| { | { | ||||
| switch (height) { | switch (height) { | ||||
| case 4: | case 4: | ||||
| @@ -308,7 +308,7 @@ static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref, | static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref, | ||||
| int stride, int height) | |||||
| int stride, int height) | |||||
| { | { | ||||
| switch (height) { | switch (height) { | ||||
| case 4: | case 4: | ||||
| @@ -329,7 +329,7 @@ static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref, | static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref, | ||||
| int stride, int height) | |||||
| int stride, int height) | |||||
| { | { | ||||
| switch (height) { | switch (height) { | ||||
| case 4: | case 4: | ||||
| @@ -350,7 +350,7 @@ static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void avg_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref, | static void avg_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref, | ||||
| int stride, int height) | |||||
| int stride, int height) | |||||
| { | { | ||||
| switch (height) { | switch (height) { | ||||
| case 4: | case 4: | ||||
| @@ -450,7 +450,7 @@ void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx) | |||||
| void MPV_common_init_mlib(MpegEncContext *s) | void MPV_common_init_mlib(MpegEncContext *s) | ||||
| { | { | ||||
| if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){ | if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){ | ||||
| s->dsp.fdct = ff_fdct_mlib; | |||||
| s->dsp.fdct = ff_fdct_mlib; | |||||
| } | } | ||||
| if(s->avctx->idct_algo==FF_IDCT_AUTO || s->avctx->idct_algo==FF_IDCT_MLIB){ | if(s->avctx->idct_algo==FF_IDCT_AUTO || s->avctx->idct_algo==FF_IDCT_MLIB){ | ||||
| @@ -45,7 +45,7 @@ | |||||
| #define P_MV1 P[9] | #define P_MV1 P[9] | ||||
| static inline int sad_hpel_motion_search(MpegEncContext * s, | static inline int sad_hpel_motion_search(MpegEncContext * s, | ||||
| int *mx_ptr, int *my_ptr, int dmin, | |||||
| int *mx_ptr, int *my_ptr, int dmin, | |||||
| int src_index, int ref_index, | int src_index, int ref_index, | ||||
| int size, int h); | int size, int h); | ||||
| @@ -293,25 +293,25 @@ static int pix_dev(uint8_t * pix, int line_size, int mean) | |||||
| s = 0; | s = 0; | ||||
| for (i = 0; i < 16; i++) { | for (i = 0; i < 16; i++) { | ||||
| for (j = 0; j < 16; j += 8) { | |||||
| s += ABS(pix[0]-mean); | |||||
| s += ABS(pix[1]-mean); | |||||
| s += ABS(pix[2]-mean); | |||||
| s += ABS(pix[3]-mean); | |||||
| s += ABS(pix[4]-mean); | |||||
| s += ABS(pix[5]-mean); | |||||
| s += ABS(pix[6]-mean); | |||||
| s += ABS(pix[7]-mean); | |||||
| pix += 8; | |||||
| } | |||||
| pix += line_size - 16; | |||||
| for (j = 0; j < 16; j += 8) { | |||||
| s += ABS(pix[0]-mean); | |||||
| s += ABS(pix[1]-mean); | |||||
| s += ABS(pix[2]-mean); | |||||
| s += ABS(pix[3]-mean); | |||||
| s += ABS(pix[4]-mean); | |||||
| s += ABS(pix[5]-mean); | |||||
| s += ABS(pix[6]-mean); | |||||
| s += ABS(pix[7]-mean); | |||||
| pix += 8; | |||||
| } | |||||
| pix += line_size - 16; | |||||
| } | } | ||||
| return s; | return s; | ||||
| } | } | ||||
| #endif | #endif | ||||
| static inline void no_motion_search(MpegEncContext * s, | static inline void no_motion_search(MpegEncContext * s, | ||||
| int *mx_ptr, int *my_ptr) | |||||
| int *mx_ptr, int *my_ptr) | |||||
| { | { | ||||
| *mx_ptr = 16 * s->mb_x; | *mx_ptr = 16 * s->mb_x; | ||||
| *my_ptr = 16 * s->mb_y; | *my_ptr = 16 * s->mb_y; | ||||
| @@ -328,35 +328,35 @@ static int full_motion_search(MpegEncContext * s, | |||||
| xx = 16 * s->mb_x; | xx = 16 * s->mb_x; | ||||
| yy = 16 * s->mb_y; | yy = 16 * s->mb_y; | ||||
| x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */ | |||||
| x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */ | |||||
| if (x1 < xmin) | if (x1 < xmin) | ||||
| x1 = xmin; | |||||
| x1 = xmin; | |||||
| x2 = xx + range - 1; | x2 = xx + range - 1; | ||||
| if (x2 > xmax) | if (x2 > xmax) | ||||
| x2 = xmax; | |||||
| x2 = xmax; | |||||
| y1 = yy - range + 1; | y1 = yy - range + 1; | ||||
| if (y1 < ymin) | if (y1 < ymin) | ||||
| y1 = ymin; | |||||
| y1 = ymin; | |||||
| y2 = yy + range - 1; | y2 = yy + range - 1; | ||||
| if (y2 > ymax) | if (y2 > ymax) | ||||
| y2 = ymax; | |||||
| y2 = ymax; | |||||
| pix = s->new_picture.data[0] + (yy * s->linesize) + xx; | pix = s->new_picture.data[0] + (yy * s->linesize) + xx; | ||||
| dmin = 0x7fffffff; | dmin = 0x7fffffff; | ||||
| mx = 0; | mx = 0; | ||||
| my = 0; | my = 0; | ||||
| for (y = y1; y <= y2; y++) { | for (y = y1; y <= y2; y++) { | ||||
| for (x = x1; x <= x2; x++) { | |||||
| d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, | |||||
| s->linesize, 16); | |||||
| if (d < dmin || | |||||
| (d == dmin && | |||||
| (abs(x - xx) + abs(y - yy)) < | |||||
| (abs(mx - xx) + abs(my - yy)))) { | |||||
| dmin = d; | |||||
| mx = x; | |||||
| my = y; | |||||
| } | |||||
| } | |||||
| for (x = x1; x <= x2; x++) { | |||||
| d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, | |||||
| s->linesize, 16); | |||||
| if (d < dmin || | |||||
| (d == dmin && | |||||
| (abs(x - xx) + abs(y - yy)) < | |||||
| (abs(mx - xx) + abs(my - yy)))) { | |||||
| dmin = d; | |||||
| mx = x; | |||||
| my = y; | |||||
| } | |||||
| } | |||||
| } | } | ||||
| *mx_ptr = mx; | *mx_ptr = mx; | ||||
| @@ -364,8 +364,8 @@ static int full_motion_search(MpegEncContext * s, | |||||
| #if 0 | #if 0 | ||||
| if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) || | if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) || | ||||
| *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) { | |||||
| fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr); | |||||
| *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) { | |||||
| fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr); | |||||
| } | } | ||||
| #endif | #endif | ||||
| return dmin; | return dmin; | ||||
| @@ -386,22 +386,22 @@ static int log_motion_search(MpegEncContext * s, | |||||
| /* Left limit */ | /* Left limit */ | ||||
| x1 = xx - range; | x1 = xx - range; | ||||
| if (x1 < xmin) | if (x1 < xmin) | ||||
| x1 = xmin; | |||||
| x1 = xmin; | |||||
| /* Right limit */ | /* Right limit */ | ||||
| x2 = xx + range; | x2 = xx + range; | ||||
| if (x2 > xmax) | if (x2 > xmax) | ||||
| x2 = xmax; | |||||
| x2 = xmax; | |||||
| /* Upper limit */ | /* Upper limit */ | ||||
| y1 = yy - range; | y1 = yy - range; | ||||
| if (y1 < ymin) | if (y1 < ymin) | ||||
| y1 = ymin; | |||||
| y1 = ymin; | |||||
| /* Lower limit */ | /* Lower limit */ | ||||
| y2 = yy + range; | y2 = yy + range; | ||||
| if (y2 > ymax) | if (y2 > ymax) | ||||
| y2 = ymax; | |||||
| y2 = ymax; | |||||
| pix = s->new_picture.data[0] + (yy * s->linesize) + xx; | pix = s->new_picture.data[0] + (yy * s->linesize) + xx; | ||||
| dmin = 0x7fffffff; | dmin = 0x7fffffff; | ||||
| @@ -409,34 +409,34 @@ static int log_motion_search(MpegEncContext * s, | |||||
| my = 0; | my = 0; | ||||
| do { | do { | ||||
| for (y = y1; y <= y2; y += range) { | |||||
| for (x = x1; x <= x2; x += range) { | |||||
| d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); | |||||
| if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { | |||||
| dmin = d; | |||||
| mx = x; | |||||
| my = y; | |||||
| } | |||||
| } | |||||
| } | |||||
| range = range >> 1; | |||||
| x1 = mx - range; | |||||
| if (x1 < xmin) | |||||
| x1 = xmin; | |||||
| x2 = mx + range; | |||||
| if (x2 > xmax) | |||||
| x2 = xmax; | |||||
| y1 = my - range; | |||||
| if (y1 < ymin) | |||||
| y1 = ymin; | |||||
| y2 = my + range; | |||||
| if (y2 > ymax) | |||||
| y2 = ymax; | |||||
| for (y = y1; y <= y2; y += range) { | |||||
| for (x = x1; x <= x2; x += range) { | |||||
| d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); | |||||
| if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { | |||||
| dmin = d; | |||||
| mx = x; | |||||
| my = y; | |||||
| } | |||||
| } | |||||
| } | |||||
| range = range >> 1; | |||||
| x1 = mx - range; | |||||
| if (x1 < xmin) | |||||
| x1 = xmin; | |||||
| x2 = mx + range; | |||||
| if (x2 > xmax) | |||||
| x2 = xmax; | |||||
| y1 = my - range; | |||||
| if (y1 < ymin) | |||||
| y1 = ymin; | |||||
| y2 = my + range; | |||||
| if (y2 > ymax) | |||||
| y2 = ymax; | |||||
| } while (range >= 1); | } while (range >= 1); | ||||
| @@ -462,22 +462,22 @@ static int phods_motion_search(MpegEncContext * s, | |||||
| /* Left limit */ | /* Left limit */ | ||||
| x1 = xx - range; | x1 = xx - range; | ||||
| if (x1 < xmin) | if (x1 < xmin) | ||||
| x1 = xmin; | |||||
| x1 = xmin; | |||||
| /* Right limit */ | /* Right limit */ | ||||
| x2 = xx + range; | x2 = xx + range; | ||||
| if (x2 > xmax) | if (x2 > xmax) | ||||
| x2 = xmax; | |||||
| x2 = xmax; | |||||
| /* Upper limit */ | /* Upper limit */ | ||||
| y1 = yy - range; | y1 = yy - range; | ||||
| if (y1 < ymin) | if (y1 < ymin) | ||||
| y1 = ymin; | |||||
| y1 = ymin; | |||||
| /* Lower limit */ | /* Lower limit */ | ||||
| y2 = yy + range; | y2 = yy + range; | ||||
| if (y2 > ymax) | if (y2 > ymax) | ||||
| y2 = ymax; | |||||
| y2 = ymax; | |||||
| pix = s->new_picture.data[0] + (yy * s->linesize) + xx; | pix = s->new_picture.data[0] + (yy * s->linesize) + xx; | ||||
| mx = 0; | mx = 0; | ||||
| @@ -489,43 +489,43 @@ static int phods_motion_search(MpegEncContext * s, | |||||
| dminx = 0x7fffffff; | dminx = 0x7fffffff; | ||||
| dminy = 0x7fffffff; | dminy = 0x7fffffff; | ||||
| lastx = x; | |||||
| for (x = x1; x <= x2; x += range) { | |||||
| d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); | |||||
| if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { | |||||
| dminx = d; | |||||
| mx = x; | |||||
| } | |||||
| } | |||||
| x = lastx; | |||||
| for (y = y1; y <= y2; y += range) { | |||||
| d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); | |||||
| if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { | |||||
| dminy = d; | |||||
| my = y; | |||||
| } | |||||
| } | |||||
| range = range >> 1; | |||||
| x = mx; | |||||
| y = my; | |||||
| x1 = mx - range; | |||||
| if (x1 < xmin) | |||||
| x1 = xmin; | |||||
| x2 = mx + range; | |||||
| if (x2 > xmax) | |||||
| x2 = xmax; | |||||
| y1 = my - range; | |||||
| if (y1 < ymin) | |||||
| y1 = ymin; | |||||
| y2 = my + range; | |||||
| if (y2 > ymax) | |||||
| y2 = ymax; | |||||
| lastx = x; | |||||
| for (x = x1; x <= x2; x += range) { | |||||
| d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); | |||||
| if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { | |||||
| dminx = d; | |||||
| mx = x; | |||||
| } | |||||
| } | |||||
| x = lastx; | |||||
| for (y = y1; y <= y2; y += range) { | |||||
| d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); | |||||
| if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { | |||||
| dminy = d; | |||||
| my = y; | |||||
| } | |||||
| } | |||||
| range = range >> 1; | |||||
| x = mx; | |||||
| y = my; | |||||
| x1 = mx - range; | |||||
| if (x1 < xmin) | |||||
| x1 = xmin; | |||||
| x2 = mx + range; | |||||
| if (x2 > xmax) | |||||
| x2 = xmax; | |||||
| y1 = my - range; | |||||
| if (y1 < ymin) | |||||
| y1 = ymin; | |||||
| y2 = my + range; | |||||
| if (y2 > ymax) | |||||
| y2 = ymax; | |||||
| } while (range >= 1); | } while (range >= 1); | ||||
| @@ -550,7 +550,7 @@ static int phods_motion_search(MpegEncContext * s, | |||||
| } | } | ||||
| static inline int sad_hpel_motion_search(MpegEncContext * s, | static inline int sad_hpel_motion_search(MpegEncContext * s, | ||||
| int *mx_ptr, int *my_ptr, int dmin, | |||||
| int *mx_ptr, int *my_ptr, int dmin, | |||||
| int src_index, int ref_index, | int src_index, int ref_index, | ||||
| int size, int h) | int size, int h) | ||||
| { | { | ||||
| @@ -1190,24 +1190,24 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, | |||||
| switch(s->me_method) { | switch(s->me_method) { | ||||
| case ME_ZERO: | case ME_ZERO: | ||||
| default: | default: | ||||
| no_motion_search(s, &mx, &my); | |||||
| no_motion_search(s, &mx, &my); | |||||
| mx-= mb_x*16; | mx-= mb_x*16; | ||||
| my-= mb_y*16; | my-= mb_y*16; | ||||
| dmin = 0; | dmin = 0; | ||||
| break; | break; | ||||
| #if 0 | #if 0 | ||||
| case ME_FULL: | case ME_FULL: | ||||
| dmin = full_motion_search(s, &mx, &my, range, ref_picture); | |||||
| dmin = full_motion_search(s, &mx, &my, range, ref_picture); | |||||
| mx-= mb_x*16; | mx-= mb_x*16; | ||||
| my-= mb_y*16; | my-= mb_y*16; | ||||
| break; | break; | ||||
| case ME_LOG: | case ME_LOG: | ||||
| dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture); | |||||
| dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture); | |||||
| mx-= mb_x*16; | mx-= mb_x*16; | ||||
| my-= mb_y*16; | my-= mb_y*16; | ||||
| break; | break; | ||||
| case ME_PHODS: | case ME_PHODS: | ||||
| dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture); | |||||
| dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture); | |||||
| mx-= mb_x*16; | mx-= mb_x*16; | ||||
| my-= mb_y*16; | my-= mb_y*16; | ||||
| break; | break; | ||||
| @@ -1264,7 +1264,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, | |||||
| #if 0 | #if 0 | ||||
| printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n", | printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n", | ||||
| varc, s->avg_mb_var, sum, vard, mx - xx, my - yy); | |||||
| varc, s->avg_mb_var, sum, vard, mx - xx, my - yy); | |||||
| #endif | #endif | ||||
| if(mb_type){ | if(mb_type){ | ||||
| if (vard <= 64 || vard < varc) | if (vard <= 64 || vard < varc) | ||||
| @@ -1479,24 +1479,24 @@ static int ff_estimate_motion_b(MpegEncContext * s, | |||||
| switch(s->me_method) { | switch(s->me_method) { | ||||
| case ME_ZERO: | case ME_ZERO: | ||||
| default: | default: | ||||
| no_motion_search(s, &mx, &my); | |||||
| no_motion_search(s, &mx, &my); | |||||
| dmin = 0; | dmin = 0; | ||||
| mx-= mb_x*16; | mx-= mb_x*16; | ||||
| my-= mb_y*16; | my-= mb_y*16; | ||||
| break; | break; | ||||
| #if 0 | #if 0 | ||||
| case ME_FULL: | case ME_FULL: | ||||
| dmin = full_motion_search(s, &mx, &my, range, ref_picture); | |||||
| dmin = full_motion_search(s, &mx, &my, range, ref_picture); | |||||
| mx-= mb_x*16; | mx-= mb_x*16; | ||||
| my-= mb_y*16; | my-= mb_y*16; | ||||
| break; | break; | ||||
| case ME_LOG: | case ME_LOG: | ||||
| dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture); | |||||
| dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture); | |||||
| mx-= mb_x*16; | mx-= mb_x*16; | ||||
| my-= mb_y*16; | my-= mb_y*16; | ||||
| break; | break; | ||||
| case ME_PHODS: | case ME_PHODS: | ||||
| dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture); | |||||
| dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture); | |||||
| mx-= mb_x*16; | mx-= mb_x*16; | ||||
| my-= mb_y*16; | my-= mb_y*16; | ||||
| break; | break; | ||||
| @@ -45,7 +45,7 @@ | |||||
| #if 0 | #if 0 | ||||
| static int hpel_motion_search)(MpegEncContext * s, | static int hpel_motion_search)(MpegEncContext * s, | ||||
| int *mx_ptr, int *my_ptr, int dmin, | |||||
| int *mx_ptr, int *my_ptr, int dmin, | |||||
| uint8_t *ref_data[3], | uint8_t *ref_data[3], | ||||
| int size) | int size) | ||||
| { | { | ||||
| @@ -113,7 +113,7 @@ static int hpel_motion_search)(MpegEncContext * s, | |||||
| #else | #else | ||||
| static int hpel_motion_search(MpegEncContext * s, | static int hpel_motion_search(MpegEncContext * s, | ||||
| int *mx_ptr, int *my_ptr, int dmin, | |||||
| int *mx_ptr, int *my_ptr, int dmin, | |||||
| int src_index, int ref_index, | int src_index, int ref_index, | ||||
| int size, int h) | int size, int h) | ||||
| { | { | ||||
| @@ -271,7 +271,7 @@ int inline ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index, | |||||
| } | } | ||||
| static int qpel_motion_search(MpegEncContext * s, | static int qpel_motion_search(MpegEncContext * s, | ||||
| int *mx_ptr, int *my_ptr, int dmin, | |||||
| int *mx_ptr, int *my_ptr, int dmin, | |||||
| int src_index, int ref_index, | int src_index, int ref_index, | ||||
| int size, int h) | int size, int h) | ||||
| { | { | ||||
| @@ -1005,7 +1005,7 @@ static int epzs_motion_search4(MpegEncContext * s, | |||||
| //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); | //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); | ||||
| /* first line */ | /* first line */ | ||||
| if (s->first_slice_line) { | if (s->first_slice_line) { | ||||
| CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) | |||||
| CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) | |||||
| CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, | CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, | ||||
| (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) | (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) | ||||
| CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) | CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) | ||||
| @@ -1067,7 +1067,7 @@ static int epzs_motion_search2(MpegEncContext * s, | |||||
| //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); | //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); | ||||
| /* first line */ | /* first line */ | ||||
| if (s->first_slice_line) { | if (s->first_slice_line) { | ||||
| CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) | |||||
| CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) | |||||
| CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, | CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, | ||||
| (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) | (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) | ||||
| CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) | CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) | ||||
| @@ -28,51 +28,51 @@ | |||||
| #define BUFFER_SIZE (2*MPA_FRAME_SIZE) | #define BUFFER_SIZE (2*MPA_FRAME_SIZE) | ||||
| typedef struct Mp3AudioContext { | typedef struct Mp3AudioContext { | ||||
| lame_global_flags *gfp; | |||||
| int stereo; | |||||
| lame_global_flags *gfp; | |||||
| int stereo; | |||||
| uint8_t buffer[BUFFER_SIZE]; | uint8_t buffer[BUFFER_SIZE]; | ||||
| int buffer_index; | int buffer_index; | ||||
| } Mp3AudioContext; | } Mp3AudioContext; | ||||
| static int MP3lame_encode_init(AVCodecContext *avctx) | static int MP3lame_encode_init(AVCodecContext *avctx) | ||||
| { | { | ||||
| Mp3AudioContext *s = avctx->priv_data; | |||||
| if (avctx->channels > 2) | |||||
| return -1; | |||||
| s->stereo = avctx->channels > 1 ? 1 : 0; | |||||
| if ((s->gfp = lame_init()) == NULL) | |||||
| goto err; | |||||
| lame_set_in_samplerate(s->gfp, avctx->sample_rate); | |||||
| lame_set_out_samplerate(s->gfp, avctx->sample_rate); | |||||
| lame_set_num_channels(s->gfp, avctx->channels); | |||||
| /* lame 3.91 dies on quality != 5 */ | |||||
| lame_set_quality(s->gfp, 5); | |||||
| /* lame 3.91 doesn't work in mono */ | |||||
| lame_set_mode(s->gfp, JOINT_STEREO); | |||||
| lame_set_brate(s->gfp, avctx->bit_rate/1000); | |||||
| Mp3AudioContext *s = avctx->priv_data; | |||||
| if (avctx->channels > 2) | |||||
| return -1; | |||||
| s->stereo = avctx->channels > 1 ? 1 : 0; | |||||
| if ((s->gfp = lame_init()) == NULL) | |||||
| goto err; | |||||
| lame_set_in_samplerate(s->gfp, avctx->sample_rate); | |||||
| lame_set_out_samplerate(s->gfp, avctx->sample_rate); | |||||
| lame_set_num_channels(s->gfp, avctx->channels); | |||||
| /* lame 3.91 dies on quality != 5 */ | |||||
| lame_set_quality(s->gfp, 5); | |||||
| /* lame 3.91 doesn't work in mono */ | |||||
| lame_set_mode(s->gfp, JOINT_STEREO); | |||||
| lame_set_brate(s->gfp, avctx->bit_rate/1000); | |||||
| if(avctx->flags & CODEC_FLAG_QSCALE) { | if(avctx->flags & CODEC_FLAG_QSCALE) { | ||||
| lame_set_brate(s->gfp, 0); | lame_set_brate(s->gfp, 0); | ||||
| lame_set_VBR(s->gfp, vbr_default); | lame_set_VBR(s->gfp, vbr_default); | ||||
| lame_set_VBR_q(s->gfp, avctx->global_quality / (float)FF_QP2LAMBDA); | lame_set_VBR_q(s->gfp, avctx->global_quality / (float)FF_QP2LAMBDA); | ||||
| } | } | ||||
| lame_set_bWriteVbrTag(s->gfp,0); | lame_set_bWriteVbrTag(s->gfp,0); | ||||
| if (lame_init_params(s->gfp) < 0) | |||||
| goto err_close; | |||||
| if (lame_init_params(s->gfp) < 0) | |||||
| goto err_close; | |||||
| avctx->frame_size = lame_get_framesize(s->gfp); | |||||
| avctx->frame_size = lame_get_framesize(s->gfp); | |||||
| avctx->coded_frame= avcodec_alloc_frame(); | avctx->coded_frame= avcodec_alloc_frame(); | ||||
| avctx->coded_frame->key_frame= 1; | avctx->coded_frame->key_frame= 1; | ||||
| return 0; | |||||
| return 0; | |||||
| err_close: | err_close: | ||||
| lame_close(s->gfp); | |||||
| lame_close(s->gfp); | |||||
| err: | err: | ||||
| return -1; | |||||
| return -1; | |||||
| } | } | ||||
| static const int sSampleRates[3] = { | static const int sSampleRates[3] = { | ||||
| @@ -136,11 +136,11 @@ static int mp3len(void *data, int *samplesPerFrame, int *sampleRate) | |||||
| int MP3lame_encode_frame(AVCodecContext *avctx, | int MP3lame_encode_frame(AVCodecContext *avctx, | ||||
| unsigned char *frame, int buf_size, void *data) | unsigned char *frame, int buf_size, void *data) | ||||
| { | { | ||||
| Mp3AudioContext *s = avctx->priv_data; | |||||
| int len; | |||||
| int lame_result; | |||||
| Mp3AudioContext *s = avctx->priv_data; | |||||
| int len; | |||||
| int lame_result; | |||||
| /* lame 3.91 dies on '1-channel interleaved' data */ | |||||
| /* lame 3.91 dies on '1-channel interleaved' data */ | |||||
| if(data){ | if(data){ | ||||
| if (s->stereo) { | if (s->stereo) { | ||||
| @@ -198,12 +198,12 @@ int MP3lame_encode_frame(AVCodecContext *avctx, | |||||
| int MP3lame_encode_close(AVCodecContext *avctx) | int MP3lame_encode_close(AVCodecContext *avctx) | ||||
| { | { | ||||
| Mp3AudioContext *s = avctx->priv_data; | |||||
| Mp3AudioContext *s = avctx->priv_data; | |||||
| av_freep(&avctx->coded_frame); | av_freep(&avctx->coded_frame); | ||||
| lame_close(s->gfp); | |||||
| return 0; | |||||
| lame_close(s->gfp); | |||||
| return 0; | |||||
| } | } | ||||
| @@ -35,14 +35,14 @@ | |||||
| /* Start codes. */ | /* Start codes. */ | ||||
| #define SEQ_END_CODE 0x000001b7 | |||||
| #define SEQ_START_CODE 0x000001b3 | |||||
| #define GOP_START_CODE 0x000001b8 | |||||
| #define PICTURE_START_CODE 0x00000100 | |||||
| #define SLICE_MIN_START_CODE 0x00000101 | |||||
| #define SLICE_MAX_START_CODE 0x000001af | |||||
| #define EXT_START_CODE 0x000001b5 | |||||
| #define USER_START_CODE 0x000001b2 | |||||
| #define SEQ_END_CODE 0x000001b7 | |||||
| #define SEQ_START_CODE 0x000001b3 | |||||
| #define GOP_START_CODE 0x000001b8 | |||||
| #define PICTURE_START_CODE 0x00000100 | |||||
| #define SLICE_MIN_START_CODE 0x00000101 | |||||
| #define SLICE_MAX_START_CODE 0x000001af | |||||
| #define EXT_START_CODE 0x000001b5 | |||||
| #define USER_START_CODE 0x000001b2 | |||||
| #define DC_VLC_BITS 9 | #define DC_VLC_BITS 9 | ||||
| #define MV_VLC_BITS 9 | #define MV_VLC_BITS 9 | ||||
| @@ -89,7 +89,7 @@ const enum PixelFormat pixfmt_yuv_444[]= {PIX_FMT_YUV444P,-1}; | |||||
| const enum PixelFormat pixfmt_xvmc_mpg2_420[] = { | const enum PixelFormat pixfmt_xvmc_mpg2_420[] = { | ||||
| PIX_FMT_XVMC_MPEG2_IDCT, | PIX_FMT_XVMC_MPEG2_IDCT, | ||||
| PIX_FMT_XVMC_MPEG2_MC, | PIX_FMT_XVMC_MPEG2_MC, | ||||
| -1}; | |||||
| -1}; | |||||
| #ifdef CONFIG_ENCODERS | #ifdef CONFIG_ENCODERS | ||||
| static uint8_t (*mv_penalty)[MAX_MV*2+1]= NULL; | static uint8_t (*mv_penalty)[MAX_MV*2+1]= NULL; | ||||
| static uint8_t fcode_tab[MAX_MV*2+1]; | static uint8_t fcode_tab[MAX_MV*2+1]; | ||||
| @@ -166,7 +166,7 @@ static void init_uni_ac_vlc(RLTable *rl, uint32_t *uni_ac_vlc_bits, uint8_t *uni | |||||
| code= rl->index_run[0][run] + alevel - 1; | code= rl->index_run[0][run] + alevel - 1; | ||||
| if (code < 111 /* rl->n */) { | if (code < 111 /* rl->n */) { | ||||
| /* store the vlc & sign at once */ | |||||
| /* store the vlc & sign at once */ | |||||
| len= mpeg1_vlc[code][1]+1; | len= mpeg1_vlc[code][1]+1; | ||||
| bits= (mpeg1_vlc[code][0]<<1) + sign; | bits= (mpeg1_vlc[code][0]<<1) + sign; | ||||
| } else { | } else { | ||||
| @@ -764,38 +764,38 @@ void ff_mpeg1_encode_init(MpegEncContext *s) | |||||
| if(!done){ | if(!done){ | ||||
| int f_code; | int f_code; | ||||
| int mv; | int mv; | ||||
| int i; | |||||
| int i; | |||||
| done=1; | done=1; | ||||
| init_rl(&rl_mpeg1, 1); | init_rl(&rl_mpeg1, 1); | ||||
| for(i=0; i<64; i++) | |||||
| { | |||||
| mpeg1_max_level[0][i]= rl_mpeg1.max_level[0][i]; | |||||
| mpeg1_index_run[0][i]= rl_mpeg1.index_run[0][i]; | |||||
| } | |||||
| for(i=0; i<64; i++) | |||||
| { | |||||
| mpeg1_max_level[0][i]= rl_mpeg1.max_level[0][i]; | |||||
| mpeg1_index_run[0][i]= rl_mpeg1.index_run[0][i]; | |||||
| } | |||||
| init_uni_ac_vlc(&rl_mpeg1, uni_mpeg1_ac_vlc_bits, uni_mpeg1_ac_vlc_len); | init_uni_ac_vlc(&rl_mpeg1, uni_mpeg1_ac_vlc_bits, uni_mpeg1_ac_vlc_len); | ||||
| /* build unified dc encoding tables */ | |||||
| for(i=-255; i<256; i++) | |||||
| { | |||||
| int adiff, index; | |||||
| int bits, code; | |||||
| int diff=i; | |||||
| /* build unified dc encoding tables */ | |||||
| for(i=-255; i<256; i++) | |||||
| { | |||||
| int adiff, index; | |||||
| int bits, code; | |||||
| int diff=i; | |||||
| adiff = ABS(diff); | |||||
| if(diff<0) diff--; | |||||
| index = av_log2(2*adiff); | |||||
| adiff = ABS(diff); | |||||
| if(diff<0) diff--; | |||||
| index = av_log2(2*adiff); | |||||
| bits= vlc_dc_lum_bits[index] + index; | |||||
| code= (vlc_dc_lum_code[index]<<index) + (diff & ((1 << index) - 1)); | |||||
| mpeg1_lum_dc_uni[i+255]= bits + (code<<8); | |||||
| bits= vlc_dc_lum_bits[index] + index; | |||||
| code= (vlc_dc_lum_code[index]<<index) + (diff & ((1 << index) - 1)); | |||||
| mpeg1_lum_dc_uni[i+255]= bits + (code<<8); | |||||
| bits= vlc_dc_chroma_bits[index] + index; | |||||
| code= (vlc_dc_chroma_code[index]<<index) + (diff & ((1 << index) - 1)); | |||||
| mpeg1_chr_dc_uni[i+255]= bits + (code<<8); | |||||
| } | |||||
| bits= vlc_dc_chroma_bits[index] + index; | |||||
| code= (vlc_dc_chroma_code[index]<<index) + (diff & ((1 << index) - 1)); | |||||
| mpeg1_chr_dc_uni[i+255]= bits + (code<<8); | |||||
| } | |||||
| mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) ); | mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) ); | ||||
| @@ -873,14 +873,14 @@ static inline void encode_dc(MpegEncContext *s, int diff, int component) | |||||
| }else{ | }else{ | ||||
| if (component == 0) { | if (component == 0) { | ||||
| put_bits( | put_bits( | ||||
| &s->pb, | |||||
| mpeg1_lum_dc_uni[diff+255]&0xFF, | |||||
| mpeg1_lum_dc_uni[diff+255]>>8); | |||||
| &s->pb, | |||||
| mpeg1_lum_dc_uni[diff+255]&0xFF, | |||||
| mpeg1_lum_dc_uni[diff+255]>>8); | |||||
| } else { | } else { | ||||
| put_bits( | put_bits( | ||||
| &s->pb, | &s->pb, | ||||
| mpeg1_chr_dc_uni[diff+255]&0xFF, | |||||
| mpeg1_chr_dc_uni[diff+255]>>8); | |||||
| mpeg1_chr_dc_uni[diff+255]&0xFF, | |||||
| mpeg1_chr_dc_uni[diff+255]>>8); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -946,10 +946,10 @@ static void mpeg1_encode_block(MpegEncContext *s, | |||||
| // code = get_rl_index(rl, 0, run, alevel); | // code = get_rl_index(rl, 0, run, alevel); | ||||
| if (alevel <= mpeg1_max_level[0][run]){ | if (alevel <= mpeg1_max_level[0][run]){ | ||||
| code= mpeg1_index_run[0][run] + alevel - 1; | code= mpeg1_index_run[0][run] + alevel - 1; | ||||
| /* store the vlc & sign at once */ | |||||
| /* store the vlc & sign at once */ | |||||
| put_bits(&s->pb, mpeg1_vlc[code][1]+1, (mpeg1_vlc[code][0]<<1) + sign); | put_bits(&s->pb, mpeg1_vlc[code][1]+1, (mpeg1_vlc[code][0]<<1) + sign); | ||||
| } else { | } else { | ||||
| /* escape seems to be pretty rare <5% so i dont optimize it */ | |||||
| /* escape seems to be pretty rare <5% so i dont optimize it */ | |||||
| put_bits(&s->pb, mpeg1_vlc[111/*rl->n*/][1], mpeg1_vlc[111/*rl->n*/][0]); | put_bits(&s->pb, mpeg1_vlc[111/*rl->n*/][1], mpeg1_vlc[111/*rl->n*/][0]); | ||||
| /* escape: only clip in this case */ | /* escape: only clip in this case */ | ||||
| put_bits(&s->pb, 6, run); | put_bits(&s->pb, 6, run); | ||||
| @@ -1376,8 +1376,8 @@ static int mpeg_decode_mb(MpegEncContext *s, | |||||
| return -1; | return -1; | ||||
| } | } | ||||
| if(mb_block_count > 6){ | if(mb_block_count > 6){ | ||||
| cbp<<= mb_block_count-6; | |||||
| cbp |= get_bits(&s->gb, mb_block_count-6); | |||||
| cbp<<= mb_block_count-6; | |||||
| cbp |= get_bits(&s->gb, mb_block_count-6); | |||||
| } | } | ||||
| #ifdef HAVE_XVMC | #ifdef HAVE_XVMC | ||||
| @@ -2074,7 +2074,7 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){ | |||||
| uint8_t old_permutation[64]; | uint8_t old_permutation[64]; | ||||
| if ( | if ( | ||||
| (s1->mpeg_enc_ctx_allocated == 0)|| | |||||
| (s1->mpeg_enc_ctx_allocated == 0)|| | |||||
| avctx->coded_width != s->width || | avctx->coded_width != s->width || | ||||
| avctx->coded_height != s->height|| | avctx->coded_height != s->height|| | ||||
| s1->save_aspect_info != s->aspect_ratio_info|| | s1->save_aspect_info != s->aspect_ratio_info|| | ||||
| @@ -2088,8 +2088,8 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){ | |||||
| s->parse_context= pc; | s->parse_context= pc; | ||||
| } | } | ||||
| if( (s->width == 0 )||(s->height == 0)) | |||||
| return -2; | |||||
| if( (s->width == 0 )||(s->height == 0)) | |||||
| return -2; | |||||
| avcodec_set_dimensions(avctx, s->width, s->height); | avcodec_set_dimensions(avctx, s->width, s->height); | ||||
| avctx->bit_rate = s->bit_rate; | avctx->bit_rate = s->bit_rate; | ||||
| @@ -2129,7 +2129,7 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){ | |||||
| mpeg2_aspect[s->aspect_ratio_info], | mpeg2_aspect[s->aspect_ratio_info], | ||||
| (AVRational){s1->pan_scan.width, s1->pan_scan.height} | (AVRational){s1->pan_scan.width, s1->pan_scan.height} | ||||
| ); | ); | ||||
| } | |||||
| } | |||||
| }else{ | }else{ | ||||
| s->avctx->sample_aspect_ratio= | s->avctx->sample_aspect_ratio= | ||||
| mpeg2_aspect[s->aspect_ratio_info]; | mpeg2_aspect[s->aspect_ratio_info]; | ||||
| @@ -2312,16 +2312,16 @@ static void mpeg_decode_picture_display_extension(Mpeg1Context *s1) | |||||
| nofco = 1; | nofco = 1; | ||||
| if(s->progressive_sequence){ | if(s->progressive_sequence){ | ||||
| if(s->repeat_first_field){ | if(s->repeat_first_field){ | ||||
| nofco++; | |||||
| if(s->top_field_first) | |||||
| nofco++; | |||||
| } | |||||
| nofco++; | |||||
| if(s->top_field_first) | |||||
| nofco++; | |||||
| } | |||||
| }else{ | }else{ | ||||
| if(s->picture_structure == PICT_FRAME){ | if(s->picture_structure == PICT_FRAME){ | ||||
| nofco++; | nofco++; | ||||
| if(s->repeat_first_field) | |||||
| nofco++; | |||||
| } | |||||
| if(s->repeat_first_field) | |||||
| nofco++; | |||||
| } | |||||
| } | } | ||||
| for(i=0; i<nofco; i++){ | for(i=0; i<nofco; i++){ | ||||
| s1->pan_scan.position[i][0]= get_sbits(&s->gb, 16); | s1->pan_scan.position[i][0]= get_sbits(&s->gb, 16); | ||||
| @@ -2985,8 +2985,8 @@ static void mpeg_decode_gop(AVCodecContext *avctx, | |||||
| if(s->avctx->debug & FF_DEBUG_PICT_INFO) | if(s->avctx->debug & FF_DEBUG_PICT_INFO) | ||||
| av_log(s->avctx, AV_LOG_DEBUG, "GOP (%2d:%02d:%02d.[%02d]) broken_link=%d\n", | av_log(s->avctx, AV_LOG_DEBUG, "GOP (%2d:%02d:%02d.[%02d]) broken_link=%d\n", | ||||
| time_code_hours, time_code_minutes, time_code_seconds, | |||||
| time_code_pictures, broken_link); | |||||
| time_code_hours, time_code_minutes, time_code_seconds, | |||||
| time_code_pictures, broken_link); | |||||
| } | } | ||||
| /** | /** | ||||
| * finds the end of the current frame in the bitstream. | * finds the end of the current frame in the bitstream. | ||||
| @@ -3044,13 +3044,13 @@ static int mpeg_decode_frame(AVCodecContext *avctx, | |||||
| dprintf("fill_buffer\n"); | dprintf("fill_buffer\n"); | ||||
| if (buf_size == 0) { | if (buf_size == 0) { | ||||
| /* special case for last picture */ | |||||
| if (s2->low_delay==0 && s2->next_picture_ptr) { | |||||
| *picture= *(AVFrame*)s2->next_picture_ptr; | |||||
| s2->next_picture_ptr= NULL; | |||||
| /* special case for last picture */ | |||||
| if (s2->low_delay==0 && s2->next_picture_ptr) { | |||||
| *picture= *(AVFrame*)s2->next_picture_ptr; | |||||
| s2->next_picture_ptr= NULL; | |||||
| *data_size = sizeof(AVFrame); | |||||
| } | |||||
| *data_size = sizeof(AVFrame); | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -3111,13 +3111,13 @@ static int mpeg_decode_frame(AVCodecContext *avctx, | |||||
| switch(start_code) { | switch(start_code) { | ||||
| case SEQ_START_CODE: | case SEQ_START_CODE: | ||||
| mpeg1_decode_sequence(avctx, buf_ptr, | mpeg1_decode_sequence(avctx, buf_ptr, | ||||
| input_size); | |||||
| input_size); | |||||
| break; | break; | ||||
| case PICTURE_START_CODE: | case PICTURE_START_CODE: | ||||
| /* we have a complete image : we try to decompress it */ | /* we have a complete image : we try to decompress it */ | ||||
| mpeg1_decode_picture(avctx, | mpeg1_decode_picture(avctx, | ||||
| buf_ptr, input_size); | |||||
| buf_ptr, input_size); | |||||
| break; | break; | ||||
| case EXT_START_CODE: | case EXT_START_CODE: | ||||
| mpeg_decode_extension(avctx, | mpeg_decode_extension(avctx, | ||||
| @@ -4,14 +4,14 @@ | |||||
| */ | */ | ||||
| const int16_t ff_mpeg1_default_intra_matrix[64] = { | const int16_t ff_mpeg1_default_intra_matrix[64] = { | ||||
| 8, 16, 19, 22, 26, 27, 29, 34, | |||||
| 16, 16, 22, 24, 27, 29, 34, 37, | |||||
| 19, 22, 26, 27, 29, 34, 34, 38, | |||||
| 22, 22, 26, 27, 29, 34, 37, 40, | |||||
| 22, 26, 27, 29, 32, 35, 40, 48, | |||||
| 26, 27, 29, 32, 35, 40, 48, 58, | |||||
| 26, 27, 29, 34, 38, 46, 56, 69, | |||||
| 27, 29, 35, 38, 46, 56, 69, 83 | |||||
| 8, 16, 19, 22, 26, 27, 29, 34, | |||||
| 16, 16, 22, 24, 27, 29, 34, 37, | |||||
| 19, 22, 26, 27, 29, 34, 34, 38, | |||||
| 22, 22, 26, 27, 29, 34, 37, 40, | |||||
| 22, 26, 27, 29, 32, 35, 40, 48, | |||||
| 26, 27, 29, 32, 35, 40, 48, 58, | |||||
| 26, 27, 29, 34, 38, 46, 56, 69, | |||||
| 27, 29, 35, 38, 46, 56, 69, 83 | |||||
| }; | }; | ||||
| const int16_t ff_mpeg1_default_non_intra_matrix[64] = { | const int16_t ff_mpeg1_default_non_intra_matrix[64] = { | ||||
| @@ -748,7 +748,7 @@ static void encode_frame(MpegAudioContext *s, | |||||
| } | } | ||||
| static int MPA_encode_frame(AVCodecContext *avctx, | static int MPA_encode_frame(AVCodecContext *avctx, | ||||
| unsigned char *frame, int buf_size, void *data) | |||||
| unsigned char *frame, int buf_size, void *data) | |||||
| { | { | ||||
| MpegAudioContext *s = avctx->priv_data; | MpegAudioContext *s = avctx->priv_data; | ||||
| short *samples = data; | short *samples = data; | ||||
| @@ -55,7 +55,7 @@ int l2_select_table(int bitrate, int nb_channels, int freq, int lsf); | |||||
| int mpa_decode_header(AVCodecContext *avctx, uint32_t head); | int mpa_decode_header(AVCodecContext *avctx, uint32_t head); | ||||
| void ff_mpa_synth_init(MPA_INT *window); | void ff_mpa_synth_init(MPA_INT *window); | ||||
| void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, | void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, | ||||
| MPA_INT *window, int *dither_state, | |||||
| MPA_INT *window, int *dither_state, | |||||
| OUT_INT *samples, int incr, | OUT_INT *samples, int incr, | ||||
| int32_t sb_samples[SBLIMIT]); | int32_t sb_samples[SBLIMIT]); | ||||
| @@ -64,7 +64,7 @@ static always_inline int MULH(int a, int b){ | |||||
| struct GranuleDef; | struct GranuleDef; | ||||
| typedef struct MPADecodeContext { | typedef struct MPADecodeContext { | ||||
| uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE]; /* input buffer */ | |||||
| uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE]; /* input buffer */ | |||||
| int inbuf_index; | int inbuf_index; | ||||
| uint8_t *inbuf_ptr, *inbuf; | uint8_t *inbuf_ptr, *inbuf; | ||||
| int frame_size; | int frame_size; | ||||
| @@ -340,13 +340,13 @@ static int decode_init(AVCodecContext * avctx) | |||||
| scale_factor_mult[i][2]); | scale_factor_mult[i][2]); | ||||
| } | } | ||||
| ff_mpa_synth_init(window); | |||||
| ff_mpa_synth_init(window); | |||||
| /* huffman decode tables */ | /* huffman decode tables */ | ||||
| huff_code_table[0] = NULL; | huff_code_table[0] = NULL; | ||||
| for(i=1;i<16;i++) { | for(i=1;i<16;i++) { | ||||
| const HuffTable *h = &mpa_huff_tables[i]; | const HuffTable *h = &mpa_huff_tables[i]; | ||||
| int xsize, x, y; | |||||
| int xsize, x, y; | |||||
| unsigned int n; | unsigned int n; | ||||
| uint8_t *code_table; | uint8_t *code_table; | ||||
| @@ -378,11 +378,11 @@ static int decode_init(AVCodecContext * avctx) | |||||
| band_index_long[i][22] = k; | band_index_long[i][22] = k; | ||||
| } | } | ||||
| /* compute n ^ (4/3) and store it in mantissa/exp format */ | |||||
| table_4_3_exp= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_exp[0])); | |||||
| /* compute n ^ (4/3) and store it in mantissa/exp format */ | |||||
| table_4_3_exp= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_exp[0])); | |||||
| if(!table_4_3_exp) | if(!table_4_3_exp) | ||||
| return -1; | |||||
| table_4_3_value= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_value[0])); | |||||
| return -1; | |||||
| table_4_3_value= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_value[0])); | |||||
| if(!table_4_3_value) | if(!table_4_3_value) | ||||
| return -1; | return -1; | ||||
| @@ -844,7 +844,7 @@ void ff_mpa_synth_init(MPA_INT *window) | |||||
| 32 samples. */ | 32 samples. */ | ||||
| /* XXX: optimize by avoiding ring buffer usage */ | /* XXX: optimize by avoiding ring buffer usage */ | ||||
| void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, | void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, | ||||
| MPA_INT *window, int *dither_state, | |||||
| MPA_INT *window, int *dither_state, | |||||
| OUT_INT *samples, int incr, | OUT_INT *samples, int incr, | ||||
| int32_t sb_samples[SBLIMIT]) | int32_t sb_samples[SBLIMIT]) | ||||
| { | { | ||||
| @@ -2440,8 +2440,8 @@ static int mp_decode_frame(MPADecodeContext *s, | |||||
| samples_ptr = samples + ch; | samples_ptr = samples + ch; | ||||
| for(i=0;i<nb_frames;i++) { | for(i=0;i<nb_frames;i++) { | ||||
| ff_mpa_synth_filter(s->synth_buf[ch], &(s->synth_buf_offset[ch]), | ff_mpa_synth_filter(s->synth_buf[ch], &(s->synth_buf_offset[ch]), | ||||
| window, &s->dither_state, | |||||
| samples_ptr, s->nb_channels, | |||||
| window, &s->dither_state, | |||||
| samples_ptr, s->nb_channels, | |||||
| s->sb_samples[ch][i]); | s->sb_samples[ch][i]); | ||||
| samples_ptr += 32 * s->nb_channels; | samples_ptr += 32 * s->nb_channels; | ||||
| } | } | ||||
| @@ -2453,8 +2453,8 @@ static int mp_decode_frame(MPADecodeContext *s, | |||||
| } | } | ||||
| static int decode_frame(AVCodecContext * avctx, | static int decode_frame(AVCodecContext * avctx, | ||||
| void *data, int *data_size, | |||||
| uint8_t * buf, int buf_size) | |||||
| void *data, int *data_size, | |||||
| uint8_t * buf, int buf_size) | |||||
| { | { | ||||
| MPADecodeContext *s = avctx->priv_data; | MPADecodeContext *s = avctx->priv_data; | ||||
| uint32_t header; | uint32_t header; | ||||
| @@ -2464,8 +2464,8 @@ static int decode_frame(AVCodecContext * avctx, | |||||
| buf_ptr = buf; | buf_ptr = buf; | ||||
| while (buf_size > 0) { | while (buf_size > 0) { | ||||
| len = s->inbuf_ptr - s->inbuf; | |||||
| if (s->frame_size == 0) { | |||||
| len = s->inbuf_ptr - s->inbuf; | |||||
| if (s->frame_size == 0) { | |||||
| /* special case for next header for first frame in free | /* special case for next header for first frame in free | ||||
| format case (XXX: find a simpler method) */ | format case (XXX: find a simpler method) */ | ||||
| if (s->free_format_next_header != 0) { | if (s->free_format_next_header != 0) { | ||||
| @@ -2477,34 +2477,34 @@ static int decode_frame(AVCodecContext * avctx, | |||||
| s->free_format_next_header = 0; | s->free_format_next_header = 0; | ||||
| goto got_header; | goto got_header; | ||||
| } | } | ||||
| /* no header seen : find one. We need at least HEADER_SIZE | |||||
| /* no header seen : find one. We need at least HEADER_SIZE | |||||
| bytes to parse it */ | bytes to parse it */ | ||||
| len = HEADER_SIZE - len; | |||||
| if (len > buf_size) | |||||
| len = buf_size; | |||||
| if (len > 0) { | |||||
| memcpy(s->inbuf_ptr, buf_ptr, len); | |||||
| buf_ptr += len; | |||||
| buf_size -= len; | |||||
| s->inbuf_ptr += len; | |||||
| } | |||||
| if ((s->inbuf_ptr - s->inbuf) >= HEADER_SIZE) { | |||||
| len = HEADER_SIZE - len; | |||||
| if (len > buf_size) | |||||
| len = buf_size; | |||||
| if (len > 0) { | |||||
| memcpy(s->inbuf_ptr, buf_ptr, len); | |||||
| buf_ptr += len; | |||||
| buf_size -= len; | |||||
| s->inbuf_ptr += len; | |||||
| } | |||||
| if ((s->inbuf_ptr - s->inbuf) >= HEADER_SIZE) { | |||||
| got_header: | got_header: | ||||
| header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) | | |||||
| (s->inbuf[2] << 8) | s->inbuf[3]; | |||||
| header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) | | |||||
| (s->inbuf[2] << 8) | s->inbuf[3]; | |||||
| if (ff_mpa_check_header(header) < 0) { | |||||
| /* no sync found : move by one byte (inefficient, but simple!) */ | |||||
| memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); | |||||
| s->inbuf_ptr--; | |||||
| if (ff_mpa_check_header(header) < 0) { | |||||
| /* no sync found : move by one byte (inefficient, but simple!) */ | |||||
| memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); | |||||
| s->inbuf_ptr--; | |||||
| dprintf("skip %x\n", header); | dprintf("skip %x\n", header); | ||||
| /* reset free format frame size to give a chance | /* reset free format frame size to give a chance | ||||
| to get a new bitrate */ | to get a new bitrate */ | ||||
| s->free_format_frame_size = 0; | s->free_format_frame_size = 0; | ||||
| } else { | |||||
| if (decode_header(s, header) == 1) { | |||||
| } else { | |||||
| if (decode_header(s, header) == 1) { | |||||
| /* free format: prepare to compute frame size */ | /* free format: prepare to compute frame size */ | ||||
| s->frame_size = -1; | |||||
| s->frame_size = -1; | |||||
| } | } | ||||
| /* update codec info */ | /* update codec info */ | ||||
| avctx->sample_rate = s->sample_rate; | avctx->sample_rate = s->sample_rate; | ||||
| @@ -2525,18 +2525,18 @@ static int decode_frame(AVCodecContext * avctx, | |||||
| avctx->frame_size = 1152; | avctx->frame_size = 1152; | ||||
| break; | break; | ||||
| } | } | ||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } else if (s->frame_size == -1) { | } else if (s->frame_size == -1) { | ||||
| /* free format : find next sync to compute frame size */ | /* free format : find next sync to compute frame size */ | ||||
| len = MPA_MAX_CODED_FRAME_SIZE - len; | |||||
| if (len > buf_size) | |||||
| len = buf_size; | |||||
| len = MPA_MAX_CODED_FRAME_SIZE - len; | |||||
| if (len > buf_size) | |||||
| len = buf_size; | |||||
| if (len == 0) { | if (len == 0) { | ||||
| /* frame too long: resync */ | |||||
| /* frame too long: resync */ | |||||
| s->frame_size = 0; | s->frame_size = 0; | ||||
| memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); | |||||
| s->inbuf_ptr--; | |||||
| memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); | |||||
| s->inbuf_ptr--; | |||||
| } else { | } else { | ||||
| uint8_t *p, *pend; | uint8_t *p, *pend; | ||||
| uint32_t header1; | uint32_t header1; | ||||
| @@ -2580,17 +2580,17 @@ static int decode_frame(AVCodecContext * avctx, | |||||
| s->inbuf_ptr += len; | s->inbuf_ptr += len; | ||||
| buf_size -= len; | buf_size -= len; | ||||
| } | } | ||||
| } else if (len < s->frame_size) { | |||||
| } else if (len < s->frame_size) { | |||||
| if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE) | if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE) | ||||
| s->frame_size = MPA_MAX_CODED_FRAME_SIZE; | s->frame_size = MPA_MAX_CODED_FRAME_SIZE; | ||||
| len = s->frame_size - len; | |||||
| if (len > buf_size) | |||||
| len = buf_size; | |||||
| memcpy(s->inbuf_ptr, buf_ptr, len); | |||||
| buf_ptr += len; | |||||
| s->inbuf_ptr += len; | |||||
| buf_size -= len; | |||||
| } | |||||
| len = s->frame_size - len; | |||||
| if (len > buf_size) | |||||
| len = buf_size; | |||||
| memcpy(s->inbuf_ptr, buf_ptr, len); | |||||
| buf_ptr += len; | |||||
| s->inbuf_ptr += len; | |||||
| buf_size -= len; | |||||
| } | |||||
| next_data: | next_data: | ||||
| if (s->frame_size > 0 && | if (s->frame_size > 0 && | ||||
| (s->inbuf_ptr - s->inbuf) >= s->frame_size) { | (s->inbuf_ptr - s->inbuf) >= s->frame_size) { | ||||
| @@ -2601,22 +2601,22 @@ static int decode_frame(AVCodecContext * avctx, | |||||
| } else { | } else { | ||||
| out_size = mp_decode_frame(s, out_samples); | out_size = mp_decode_frame(s, out_samples); | ||||
| } | } | ||||
| s->inbuf_ptr = s->inbuf; | |||||
| s->frame_size = 0; | |||||
| s->inbuf_ptr = s->inbuf; | |||||
| s->frame_size = 0; | |||||
| if(out_size>=0) | if(out_size>=0) | ||||
| *data_size = out_size; | |||||
| *data_size = out_size; | |||||
| else | else | ||||
| av_log(avctx, AV_LOG_DEBUG, "Error while decoding mpeg audio frame\n"); //FIXME return -1 / but also return the number of bytes consumed | av_log(avctx, AV_LOG_DEBUG, "Error while decoding mpeg audio frame\n"); //FIXME return -1 / but also return the number of bytes consumed | ||||
| break; | |||||
| } | |||||
| break; | |||||
| } | |||||
| } | } | ||||
| return buf_ptr - buf; | return buf_ptr - buf; | ||||
| } | } | ||||
| static int decode_frame_adu(AVCodecContext * avctx, | static int decode_frame_adu(AVCodecContext * avctx, | ||||
| void *data, int *data_size, | |||||
| uint8_t * buf, int buf_size) | |||||
| void *data, int *data_size, | |||||
| uint8_t * buf, int buf_size) | |||||
| { | { | ||||
| MPADecodeContext *s = avctx->priv_data; | MPADecodeContext *s = avctx->priv_data; | ||||
| uint32_t header; | uint32_t header; | ||||
| @@ -2747,8 +2747,8 @@ static int decode_close_mp3on4(AVCodecContext * avctx) | |||||
| static int decode_frame_mp3on4(AVCodecContext * avctx, | static int decode_frame_mp3on4(AVCodecContext * avctx, | ||||
| void *data, int *data_size, | |||||
| uint8_t * buf, int buf_size) | |||||
| void *data, int *data_size, | |||||
| uint8_t * buf, int buf_size) | |||||
| { | { | ||||
| MP3On4DecodeContext *s = avctx->priv_data; | MP3On4DecodeContext *s = avctx->priv_data; | ||||
| MPADecodeContext *m; | MPADecodeContext *m; | ||||
| @@ -354,7 +354,7 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){ | |||||
| r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic); | r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic); | ||||
| if(r<0 || !pic->age || !pic->type || !pic->data[0]){ | if(r<0 || !pic->age || !pic->type || !pic->data[0]){ | ||||
| av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]); | |||||
| av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]); | |||||
| return -1; | return -1; | ||||
| } | } | ||||
| @@ -913,7 +913,7 @@ int MPV_encode_init(AVCodecContext *avctx) | |||||
| s->width = avctx->width; | s->width = avctx->width; | ||||
| s->height = avctx->height; | s->height = avctx->height; | ||||
| if(avctx->gop_size > 600){ | if(avctx->gop_size > 600){ | ||||
| av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n"); | |||||
| av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n"); | |||||
| avctx->gop_size=600; | avctx->gop_size=600; | ||||
| } | } | ||||
| s->gop_size = avctx->gop_size; | s->gop_size = avctx->gop_size; | ||||
| @@ -1120,7 +1120,7 @@ int MPV_encode_init(AVCodecContext *avctx) | |||||
| s->out_format = FMT_MJPEG; | s->out_format = FMT_MJPEG; | ||||
| s->intra_only = 1; /* force intra only for jpeg */ | s->intra_only = 1; /* force intra only for jpeg */ | ||||
| s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS; | s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS; | ||||
| s->mjpeg_data_only_frames = 0; /* write all the needed headers */ | |||||
| s->mjpeg_data_only_frames = 0; /* write all the needed headers */ | |||||
| s->mjpeg_vsample[0] = 1<<chroma_v_shift; | s->mjpeg_vsample[0] = 1<<chroma_v_shift; | ||||
| s->mjpeg_vsample[1] = 1; | s->mjpeg_vsample[1] = 1; | ||||
| s->mjpeg_vsample[2] = 1; | s->mjpeg_vsample[2] = 1; | ||||
| @@ -1143,24 +1143,24 @@ int MPV_encode_init(AVCodecContext *avctx) | |||||
| return -1; | return -1; | ||||
| } | } | ||||
| s->out_format = FMT_H263; | s->out_format = FMT_H263; | ||||
| s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0; | |||||
| s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0; | |||||
| avctx->delay=0; | avctx->delay=0; | ||||
| s->low_delay=1; | s->low_delay=1; | ||||
| break; | break; | ||||
| case CODEC_ID_H263P: | case CODEC_ID_H263P: | ||||
| s->out_format = FMT_H263; | s->out_format = FMT_H263; | ||||
| s->h263_plus = 1; | s->h263_plus = 1; | ||||
| /* Fx */ | |||||
| /* Fx */ | |||||
| s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0; | s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0; | ||||
| s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0; | |||||
| s->modified_quant= s->h263_aic; | |||||
| s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0; | |||||
| s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0; | |||||
| s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0; | |||||
| s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus; | |||||
| s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0; | |||||
| s->modified_quant= s->h263_aic; | |||||
| s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0; | |||||
| s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0; | |||||
| s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0; | |||||
| s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus; | |||||
| s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0; | s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0; | ||||
| /* /Fx */ | |||||
| /* /Fx */ | |||||
| /* These are just to be sure */ | /* These are just to be sure */ | ||||
| avctx->delay=0; | avctx->delay=0; | ||||
| s->low_delay=1; | s->low_delay=1; | ||||
| @@ -2473,7 +2473,7 @@ static inline void gmc1_motion(MpegEncContext *s, | |||||
| dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2); | dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2); | ||||
| if (s->no_rounding){ | if (s->no_rounding){ | ||||
| s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16); | |||||
| s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16); | |||||
| }else{ | }else{ | ||||
| s->dsp.put_pixels_tab [0][dxy](dest_y, ptr, linesize, 16); | s->dsp.put_pixels_tab [0][dxy](dest_y, ptr, linesize, 16); | ||||
| } | } | ||||
| @@ -4148,7 +4148,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||||
| } | } | ||||
| } | } | ||||
| s->dsp.get_pixels(s->block[0], ptr_y , wrap_y); | |||||
| s->dsp.get_pixels(s->block[0], ptr_y , wrap_y); | |||||
| s->dsp.get_pixels(s->block[1], ptr_y + 8, wrap_y); | s->dsp.get_pixels(s->block[1], ptr_y + 8, wrap_y); | ||||
| s->dsp.get_pixels(s->block[2], ptr_y + dct_offset , wrap_y); | s->dsp.get_pixels(s->block[2], ptr_y + dct_offset , wrap_y); | ||||
| s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y); | s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y); | ||||
| @@ -4157,7 +4157,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||||
| skip_dct[4]= 1; | skip_dct[4]= 1; | ||||
| skip_dct[5]= 1; | skip_dct[5]= 1; | ||||
| }else{ | }else{ | ||||
| s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c); | |||||
| s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c); | |||||
| s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c); | s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c); | ||||
| } | } | ||||
| }else{ | }else{ | ||||
| @@ -4170,7 +4170,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||||
| dest_cr = s->dest[2]; | dest_cr = s->dest[2]; | ||||
| if ((!s->no_rounding) || s->pict_type==B_TYPE){ | if ((!s->no_rounding) || s->pict_type==B_TYPE){ | ||||
| op_pix = s->dsp.put_pixels_tab; | |||||
| op_pix = s->dsp.put_pixels_tab; | |||||
| op_qpix= s->dsp.put_qpel_pixels_tab; | op_qpix= s->dsp.put_qpel_pixels_tab; | ||||
| }else{ | }else{ | ||||
| op_pix = s->dsp.put_no_rnd_pixels_tab; | op_pix = s->dsp.put_no_rnd_pixels_tab; | ||||
| @@ -4208,7 +4208,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||||
| } | } | ||||
| } | } | ||||
| s->dsp.diff_pixels(s->block[0], ptr_y , dest_y , wrap_y); | |||||
| s->dsp.diff_pixels(s->block[0], ptr_y , dest_y , wrap_y); | |||||
| s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y); | s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y); | ||||
| s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset , dest_y + dct_offset , wrap_y); | s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset , dest_y + dct_offset , wrap_y); | ||||
| s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y); | s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y); | ||||
| @@ -4223,7 +4223,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||||
| /* pre quantization */ | /* pre quantization */ | ||||
| if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){ | if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){ | ||||
| //FIXME optimize | //FIXME optimize | ||||
| if(s->dsp.sad[1](NULL, ptr_y , dest_y , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1; | |||||
| if(s->dsp.sad[1](NULL, ptr_y , dest_y , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1; | |||||
| if(s->dsp.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1; | if(s->dsp.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1; | ||||
| if(s->dsp.sad[1](NULL, ptr_y +dct_offset , dest_y +dct_offset , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1; | if(s->dsp.sad[1](NULL, ptr_y +dct_offset , dest_y +dct_offset , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1; | ||||
| if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1; | if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1; | ||||
| @@ -6265,7 +6265,7 @@ static int dct_quantize_c(MpegEncContext *s, | |||||
| /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ | /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ | ||||
| if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM) | if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM) | ||||
| ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero); | |||||
| ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero); | |||||
| return last_non_zero; | return last_non_zero; | ||||
| } | } | ||||
| @@ -126,7 +126,7 @@ typedef struct ScanTable{ | |||||
| uint8_t permutated[64]; | uint8_t permutated[64]; | ||||
| uint8_t raster_end[64]; | uint8_t raster_end[64]; | ||||
| #ifdef ARCH_POWERPC | #ifdef ARCH_POWERPC | ||||
| /** Used by dct_quantise_alitvec to find last-non-zero */ | |||||
| /** Used by dct_quantise_alitvec to find last-non-zero */ | |||||
| uint8_t __align8 inverse[64]; | uint8_t __align8 inverse[64]; | ||||
| #endif | #endif | ||||
| } ScanTable; | } ScanTable; | ||||
| @@ -181,7 +181,7 @@ typedef struct Picture{ | |||||
| uint16_t *mb_var; ///< Table for MB variances | uint16_t *mb_var; ///< Table for MB variances | ||||
| uint16_t *mc_mb_var; ///< Table for motion compensated MB variances | uint16_t *mc_mb_var; ///< Table for motion compensated MB variances | ||||
| uint8_t *mb_mean; ///< Table for MB luminance | uint8_t *mb_mean; ///< Table for MB luminance | ||||
| int32_t *mb_cmp_score; ///< Table for MB cmp scores, for mb decision FIXME remove | |||||
| int32_t *mb_cmp_score; ///< Table for MB cmp scores, for mb decision FIXME remove | |||||
| int b_frame_score; /* */ | int b_frame_score; /* */ | ||||
| } Picture; | } Picture; | ||||
| @@ -245,7 +245,7 @@ typedef struct MotionEstContext{ | |||||
| uint8_t (*mv_penalty)[MAX_MV*2+1]; ///< amount of bits needed to encode a MV | uint8_t (*mv_penalty)[MAX_MV*2+1]; ///< amount of bits needed to encode a MV | ||||
| uint8_t *current_mv_penalty; | uint8_t *current_mv_penalty; | ||||
| int (*sub_motion_search)(struct MpegEncContext * s, | int (*sub_motion_search)(struct MpegEncContext * s, | ||||
| int *mx_ptr, int *my_ptr, int dmin, | |||||
| int *mx_ptr, int *my_ptr, int dmin, | |||||
| int src_index, int ref_index, | int src_index, int ref_index, | ||||
| int size, int h); | int size, int h); | ||||
| }MotionEstContext; | }MotionEstContext; | ||||
| @@ -544,24 +544,24 @@ void msmpeg4_encode_mb(MpegEncContext * s, | |||||
| handle_slices(s); | handle_slices(s); | ||||
| if (!s->mb_intra) { | if (!s->mb_intra) { | ||||
| /* compute cbp */ | |||||
| /* compute cbp */ | |||||
| set_stat(ST_INTER_MB); | set_stat(ST_INTER_MB); | ||||
| cbp = 0; | |||||
| for (i = 0; i < 6; i++) { | |||||
| if (s->block_last_index[i] >= 0) | |||||
| cbp |= 1 << (5 - i); | |||||
| } | |||||
| if (s->use_skip_mb_code && (cbp | motion_x | motion_y) == 0) { | |||||
| /* skip macroblock */ | |||||
| put_bits(&s->pb, 1, 1); | |||||
| cbp = 0; | |||||
| for (i = 0; i < 6; i++) { | |||||
| if (s->block_last_index[i] >= 0) | |||||
| cbp |= 1 << (5 - i); | |||||
| } | |||||
| if (s->use_skip_mb_code && (cbp | motion_x | motion_y) == 0) { | |||||
| /* skip macroblock */ | |||||
| put_bits(&s->pb, 1, 1); | |||||
| s->last_bits++; | s->last_bits++; | ||||
| s->misc_bits++; | |||||
| s->misc_bits++; | |||||
| s->skip_count++; | s->skip_count++; | ||||
| return; | |||||
| } | |||||
| return; | |||||
| } | |||||
| if (s->use_skip_mb_code) | if (s->use_skip_mb_code) | ||||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||||
| if(s->msmpeg4_version<=2){ | if(s->msmpeg4_version<=2){ | ||||
| put_bits(&s->pb, | put_bits(&s->pb, | ||||
| @@ -599,10 +599,10 @@ void msmpeg4_encode_mb(MpegEncContext * s, | |||||
| } | } | ||||
| s->p_tex_bits += get_bits_diff(s); | s->p_tex_bits += get_bits_diff(s); | ||||
| } else { | } else { | ||||
| /* compute cbp */ | |||||
| cbp = 0; | |||||
| /* compute cbp */ | |||||
| cbp = 0; | |||||
| coded_cbp = 0; | coded_cbp = 0; | ||||
| for (i = 0; i < 6; i++) { | |||||
| for (i = 0; i < 6; i++) { | |||||
| int val, pred; | int val, pred; | ||||
| val = (s->block_last_index[i] >= 1); | val = (s->block_last_index[i] >= 1); | ||||
| cbp |= val << (5 - i); | cbp |= val << (5 - i); | ||||
| @@ -613,7 +613,7 @@ void msmpeg4_encode_mb(MpegEncContext * s, | |||||
| val = val ^ pred; | val = val ^ pred; | ||||
| } | } | ||||
| coded_cbp |= val << (5 - i); | coded_cbp |= val << (5 - i); | ||||
| } | |||||
| } | |||||
| #if 0 | #if 0 | ||||
| if (coded_cbp) | if (coded_cbp) | ||||
| printf("cbp=%x %x\n", cbp, coded_cbp); | printf("cbp=%x %x\n", cbp, coded_cbp); | ||||
| @@ -625,12 +625,12 @@ void msmpeg4_encode_mb(MpegEncContext * s, | |||||
| v2_intra_cbpc[cbp&3][1], v2_intra_cbpc[cbp&3][0]); | v2_intra_cbpc[cbp&3][1], v2_intra_cbpc[cbp&3][0]); | ||||
| } else { | } else { | ||||
| if (s->use_skip_mb_code) | if (s->use_skip_mb_code) | ||||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||||
| put_bits(&s->pb, | put_bits(&s->pb, | ||||
| v2_mb_type[(cbp&3) + 4][1], | v2_mb_type[(cbp&3) + 4][1], | ||||
| v2_mb_type[(cbp&3) + 4][0]); | v2_mb_type[(cbp&3) + 4][0]); | ||||
| } | } | ||||
| put_bits(&s->pb, 1, 0); /* no AC prediction yet */ | |||||
| put_bits(&s->pb, 1, 0); /* no AC prediction yet */ | |||||
| put_bits(&s->pb, | put_bits(&s->pb, | ||||
| cbpy_tab[cbp>>2][1], | cbpy_tab[cbp>>2][1], | ||||
| cbpy_tab[cbp>>2][0]); | cbpy_tab[cbp>>2][0]); | ||||
| @@ -641,13 +641,13 @@ void msmpeg4_encode_mb(MpegEncContext * s, | |||||
| ff_msmp4_mb_i_table[coded_cbp][1], ff_msmp4_mb_i_table[coded_cbp][0]); | ff_msmp4_mb_i_table[coded_cbp][1], ff_msmp4_mb_i_table[coded_cbp][0]); | ||||
| } else { | } else { | ||||
| if (s->use_skip_mb_code) | if (s->use_skip_mb_code) | ||||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||||
| put_bits(&s->pb, | put_bits(&s->pb, | ||||
| table_mb_non_intra[cbp][1], | table_mb_non_intra[cbp][1], | ||||
| table_mb_non_intra[cbp][0]); | table_mb_non_intra[cbp][0]); | ||||
| } | } | ||||
| set_stat(ST_INTRA_MB); | set_stat(ST_INTRA_MB); | ||||
| put_bits(&s->pb, 1, 0); /* no AC prediction yet */ | |||||
| put_bits(&s->pb, 1, 0); /* no AC prediction yet */ | |||||
| if(s->inter_intra_pred){ | if(s->inter_intra_pred){ | ||||
| s->h263_aic_dir=0; | s->h263_aic_dir=0; | ||||
| put_bits(&s->pb, table_inter_intra[s->h263_aic_dir][1], table_inter_intra[s->h263_aic_dir][0]); | put_bits(&s->pb, table_inter_intra[s->h263_aic_dir][1], table_inter_intra[s->h263_aic_dir][0]); | ||||
| @@ -702,9 +702,9 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n, | |||||
| /* find prediction */ | /* find prediction */ | ||||
| if (n < 4) { | if (n < 4) { | ||||
| scale = s->y_dc_scale; | |||||
| scale = s->y_dc_scale; | |||||
| } else { | } else { | ||||
| scale = s->c_dc_scale; | |||||
| scale = s->c_dc_scale; | |||||
| } | } | ||||
| wrap = s->block_wrap[n]; | wrap = s->block_wrap[n]; | ||||
| @@ -727,22 +727,22 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n, | |||||
| to problems if Q could vary !) */ | to problems if Q could vary !) */ | ||||
| #if (defined(ARCH_X86) || defined(ARCH_X86_64)) && !defined PIC | #if (defined(ARCH_X86) || defined(ARCH_X86_64)) && !defined PIC | ||||
| asm volatile( | asm volatile( | ||||
| "movl %3, %%eax \n\t" | |||||
| "shrl $1, %%eax \n\t" | |||||
| "addl %%eax, %2 \n\t" | |||||
| "addl %%eax, %1 \n\t" | |||||
| "addl %0, %%eax \n\t" | |||||
| "mull %4 \n\t" | |||||
| "movl %%edx, %0 \n\t" | |||||
| "movl %1, %%eax \n\t" | |||||
| "mull %4 \n\t" | |||||
| "movl %%edx, %1 \n\t" | |||||
| "movl %2, %%eax \n\t" | |||||
| "mull %4 \n\t" | |||||
| "movl %%edx, %2 \n\t" | |||||
| : "+b" (a), "+c" (b), "+D" (c) | |||||
| : "g" (scale), "S" (inverse[scale]) | |||||
| : "%eax", "%edx" | |||||
| "movl %3, %%eax \n\t" | |||||
| "shrl $1, %%eax \n\t" | |||||
| "addl %%eax, %2 \n\t" | |||||
| "addl %%eax, %1 \n\t" | |||||
| "addl %0, %%eax \n\t" | |||||
| "mull %4 \n\t" | |||||
| "movl %%edx, %0 \n\t" | |||||
| "movl %1, %%eax \n\t" | |||||
| "mull %4 \n\t" | |||||
| "movl %%edx, %1 \n\t" | |||||
| "movl %2, %%eax \n\t" | |||||
| "mull %4 \n\t" | |||||
| "movl %%edx, %2 \n\t" | |||||
| : "+b" (a), "+c" (b), "+D" (c) | |||||
| : "g" (scale), "S" (inverse[scale]) | |||||
| : "%eax", "%edx" | |||||
| ); | ); | ||||
| #else | #else | ||||
| /* #elif defined (ARCH_ALPHA) */ | /* #elif defined (ARCH_ALPHA) */ | ||||
| @@ -750,13 +750,13 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n, | |||||
| common case. But they are costly everywhere... | common case. But they are costly everywhere... | ||||
| */ | */ | ||||
| if (scale == 8) { | if (scale == 8) { | ||||
| a = (a + (8 >> 1)) / 8; | |||||
| b = (b + (8 >> 1)) / 8; | |||||
| c = (c + (8 >> 1)) / 8; | |||||
| a = (a + (8 >> 1)) / 8; | |||||
| b = (b + (8 >> 1)) / 8; | |||||
| c = (c + (8 >> 1)) / 8; | |||||
| } else { | } else { | ||||
| a = FASTDIV((a + (scale >> 1)), scale); | |||||
| b = FASTDIV((b + (scale >> 1)), scale); | |||||
| c = FASTDIV((c + (scale >> 1)), scale); | |||||
| a = FASTDIV((a + (scale >> 1)), scale); | |||||
| b = FASTDIV((b + (scale >> 1)), scale); | |||||
| c = FASTDIV((c + (scale >> 1)), scale); | |||||
| } | } | ||||
| #endif | #endif | ||||
| /* XXX: WARNING: they did not choose the same test as MPEG4. This | /* XXX: WARNING: they did not choose the same test as MPEG4. This | ||||
| @@ -957,17 +957,17 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int | |||||
| /* AC coefs */ | /* AC coefs */ | ||||
| last_non_zero = i - 1; | last_non_zero = i - 1; | ||||
| for (; i <= last_index; i++) { | for (; i <= last_index; i++) { | ||||
| j = scantable[i]; | |||||
| level = block[j]; | |||||
| if (level) { | |||||
| run = i - last_non_zero - 1; | |||||
| last = (i == last_index); | |||||
| sign = 0; | |||||
| slevel = level; | |||||
| if (level < 0) { | |||||
| sign = 1; | |||||
| level = -level; | |||||
| } | |||||
| j = scantable[i]; | |||||
| level = block[j]; | |||||
| if (level) { | |||||
| run = i - last_non_zero - 1; | |||||
| last = (i == last_index); | |||||
| sign = 0; | |||||
| slevel = level; | |||||
| if (level < 0) { | |||||
| sign = 1; | |||||
| level = -level; | |||||
| } | |||||
| if(level<=MAX_LEVEL && run<=MAX_RUN){ | if(level<=MAX_LEVEL && run<=MAX_RUN){ | ||||
| s->ac_stats[s->mb_intra][n>3][level][run][last]++; | s->ac_stats[s->mb_intra][n>3][level][run][last]++; | ||||
| @@ -1030,8 +1030,8 @@ else | |||||
| } else { | } else { | ||||
| put_bits(&s->pb, 1, sign); | put_bits(&s->pb, 1, sign); | ||||
| } | } | ||||
| last_non_zero = i; | |||||
| } | |||||
| last_non_zero = i; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| @@ -1064,7 +1064,7 @@ static void init_h263_dc_for_msmpeg4(void) | |||||
| v = abs(level); | v = abs(level); | ||||
| while (v) { | while (v) { | ||||
| v >>= 1; | v >>= 1; | ||||
| size++; | |||||
| size++; | |||||
| } | } | ||||
| if (level < 0) | if (level < 0) | ||||
| @@ -1301,11 +1301,11 @@ return -1; | |||||
| } | } | ||||
| s->no_rounding = 1; | s->no_rounding = 1; | ||||
| if(s->avctx->debug&FF_DEBUG_PICT_INFO) | if(s->avctx->debug&FF_DEBUG_PICT_INFO) | ||||
| av_log(s->avctx, AV_LOG_DEBUG, "qscale:%d rlc:%d rl:%d dc:%d mbrl:%d slice:%d \n", | |||||
| s->qscale, | |||||
| s->rl_chroma_table_index, | |||||
| s->rl_table_index, | |||||
| s->dc_table_index, | |||||
| av_log(s->avctx, AV_LOG_DEBUG, "qscale:%d rlc:%d rl:%d dc:%d mbrl:%d slice:%d \n", | |||||
| s->qscale, | |||||
| s->rl_chroma_table_index, | |||||
| s->rl_table_index, | |||||
| s->dc_table_index, | |||||
| s->per_mb_rl_table, | s->per_mb_rl_table, | ||||
| s->slice_height); | s->slice_height); | ||||
| } else { | } else { | ||||
| @@ -1349,20 +1349,20 @@ return -1; | |||||
| } | } | ||||
| if(s->avctx->debug&FF_DEBUG_PICT_INFO) | if(s->avctx->debug&FF_DEBUG_PICT_INFO) | ||||
| av_log(s->avctx, AV_LOG_DEBUG, "skip:%d rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d \n", | |||||
| s->use_skip_mb_code, | |||||
| s->rl_table_index, | |||||
| s->rl_chroma_table_index, | |||||
| s->dc_table_index, | |||||
| s->mv_table_index, | |||||
| av_log(s->avctx, AV_LOG_DEBUG, "skip:%d rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d \n", | |||||
| s->use_skip_mb_code, | |||||
| s->rl_table_index, | |||||
| s->rl_chroma_table_index, | |||||
| s->dc_table_index, | |||||
| s->mv_table_index, | |||||
| s->per_mb_rl_table, | s->per_mb_rl_table, | ||||
| s->qscale); | s->qscale); | ||||
| if(s->flipflop_rounding){ | |||||
| s->no_rounding ^= 1; | |||||
| }else{ | |||||
| s->no_rounding = 0; | |||||
| } | |||||
| if(s->flipflop_rounding){ | |||||
| s->no_rounding ^= 1; | |||||
| }else{ | |||||
| s->no_rounding = 0; | |||||
| } | |||||
| } | } | ||||
| //printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height); | //printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height); | ||||
| @@ -1557,10 +1557,10 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) | |||||
| s->dsp.clear_blocks(s->block[0]); | s->dsp.clear_blocks(s->block[0]); | ||||
| for (i = 0; i < 6; i++) { | for (i = 0; i < 6; i++) { | ||||
| if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) | if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) | ||||
| { | |||||
| { | |||||
| av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); | av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); | ||||
| return -1; | return -1; | ||||
| } | |||||
| } | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -1593,8 +1593,8 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) | |||||
| code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3); | code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3); | ||||
| if (code < 0) | if (code < 0) | ||||
| return -1; | return -1; | ||||
| //s->mb_intra = (code & 0x40) ? 0 : 1; | |||||
| s->mb_intra = (~code & 0x40) >> 6; | |||||
| //s->mb_intra = (code & 0x40) ? 0 : 1; | |||||
| s->mb_intra = (~code & 0x40) >> 6; | |||||
| cbp = code & 0x3f; | cbp = code & 0x3f; | ||||
| } else { | } else { | ||||
| @@ -1650,10 +1650,10 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) | |||||
| s->dsp.clear_blocks(s->block[0]); | s->dsp.clear_blocks(s->block[0]); | ||||
| for (i = 0; i < 6; i++) { | for (i = 0; i < 6; i++) { | ||||
| if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) | if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) | ||||
| { | |||||
| av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); | |||||
| return -1; | |||||
| } | |||||
| { | |||||
| av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); | |||||
| return -1; | |||||
| } | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| @@ -1672,7 +1672,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||||
| qmul=1; | qmul=1; | ||||
| qadd=0; | qadd=0; | ||||
| /* DC coef */ | |||||
| /* DC coef */ | |||||
| set_stat(ST_DC); | set_stat(ST_DC); | ||||
| level = msmpeg4_decode_dc(s, n, &dc_pred_dir); | level = msmpeg4_decode_dc(s, n, &dc_pred_dir); | ||||
| @@ -1808,8 +1808,8 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||||
| } | } | ||||
| } | } | ||||
| #endif | #endif | ||||
| //level = level * qmul + (level>0) * qadd - (level<=0) * qadd ; | |||||
| if (level>0) level= level * qmul + qadd; | |||||
| //level = level * qmul + (level>0) * qadd - (level<=0) * qadd ; | |||||
| if (level>0) level= level * qmul + qadd; | |||||
| else level= level * qmul - qadd; | else level= level * qmul - qadd; | ||||
| #if 0 // waste of time too :( | #if 0 // waste of time too :( | ||||
| if(level>2048 || level<-2048){ | if(level>2048 || level<-2048){ | ||||
| @@ -45,7 +45,7 @@ Theora_decode_frame(AVCodecContext *ctx, void *outdata, int *outdata_size, | |||||
| thc->op.bytes = buf_size; | thc->op.bytes = buf_size; | ||||
| if(theora_decode_packetin(&thc->state, &thc->op)) | if(theora_decode_packetin(&thc->state, &thc->op)) | ||||
| return -1; | |||||
| return -1; | |||||
| theora_decode_YUVout(&thc->state, &yuv); | theora_decode_YUVout(&thc->state, &yuv); | ||||
| @@ -78,7 +78,7 @@ Theora_decode_init(AVCodecContext *ctx) | |||||
| uint8_t *cdp; | uint8_t *cdp; | ||||
| if(ctx->extradata_size < 6) | if(ctx->extradata_size < 6) | ||||
| return -1; | |||||
| return -1; | |||||
| theora_info_init(&thc->info); | theora_info_init(&thc->info); | ||||
| @@ -87,25 +87,25 @@ Theora_decode_init(AVCodecContext *ctx) | |||||
| size = ctx->extradata_size; | size = ctx->extradata_size; | ||||
| for(i = 0; i < 3; i++){ | for(i = 0; i < 3; i++){ | ||||
| hs = *cdp++ << 8; | |||||
| hs += *cdp++; | |||||
| size -= 2; | |||||
| hs = *cdp++ << 8; | |||||
| hs += *cdp++; | |||||
| size -= 2; | |||||
| if(hs > size){ | |||||
| av_log(ctx, AV_LOG_ERROR, "extradata too small: %i > %i\n", | |||||
| if(hs > size){ | |||||
| av_log(ctx, AV_LOG_ERROR, "extradata too small: %i > %i\n", | |||||
| hs, size); | hs, size); | ||||
| return -1; | |||||
| } | |||||
| op.packet = cdp; | |||||
| op.bytes = hs; | |||||
| op.b_o_s = !i; | |||||
| if(theora_decode_header(&thc->info, &thc->comment, &op)) | |||||
| return -1; | |||||
| op.packetno++; | |||||
| cdp += hs; | |||||
| size -= hs; | |||||
| return -1; | |||||
| } | |||||
| op.packet = cdp; | |||||
| op.bytes = hs; | |||||
| op.b_o_s = !i; | |||||
| if(theora_decode_header(&thc->info, &thc->comment, &op)) | |||||
| return -1; | |||||
| op.packetno++; | |||||
| cdp += hs; | |||||
| size -= hs; | |||||
| } | } | ||||
| theora_decode_init(&thc->state, &thc->info); | theora_decode_init(&thc->state, &thc->info); | ||||
| @@ -40,13 +40,13 @@ static int oggvorbis_init_encoder(vorbis_info *vi, AVCodecContext *avccontext) { | |||||
| return (vorbis_encode_setup_managed(vi, avccontext->channels, | return (vorbis_encode_setup_managed(vi, avccontext->channels, | ||||
| avccontext->sample_rate, -1, avccontext->bit_rate, -1) || | avccontext->sample_rate, -1, avccontext->bit_rate, -1) || | ||||
| vorbis_encode_ctl(vi, OV_ECTL_RATEMANAGE_AVG, NULL) || | |||||
| vorbis_encode_setup_init(vi)) ; | |||||
| vorbis_encode_ctl(vi, OV_ECTL_RATEMANAGE_AVG, NULL) || | |||||
| vorbis_encode_setup_init(vi)) ; | |||||
| #else | #else | ||||
| /* constant bitrate */ | /* constant bitrate */ | ||||
| return vorbis_encode_init(vi, avccontext->channels, | return vorbis_encode_init(vi, avccontext->channels, | ||||
| avccontext->sample_rate, -1, avccontext->bit_rate, -1) ; | |||||
| avccontext->sample_rate, -1, avccontext->bit_rate, -1) ; | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -58,8 +58,8 @@ static int oggvorbis_encode_init(AVCodecContext *avccontext) { | |||||
| vorbis_info_init(&context->vi) ; | vorbis_info_init(&context->vi) ; | ||||
| if(oggvorbis_init_encoder(&context->vi, avccontext) < 0) { | if(oggvorbis_init_encoder(&context->vi, avccontext) < 0) { | ||||
| av_log(avccontext, AV_LOG_ERROR, "oggvorbis_encode_init: init_encoder failed") ; | |||||
| return -1 ; | |||||
| av_log(avccontext, AV_LOG_ERROR, "oggvorbis_encode_init: init_encoder failed") ; | |||||
| return -1 ; | |||||
| } | } | ||||
| vorbis_analysis_init(&context->vd, &context->vi) ; | vorbis_analysis_init(&context->vd, &context->vi) ; | ||||
| vorbis_block_init(&context->vd, &context->vb) ; | vorbis_block_init(&context->vd, &context->vb) ; | ||||
| @@ -101,8 +101,8 @@ static int oggvorbis_encode_init(AVCodecContext *avccontext) { | |||||
| static int oggvorbis_encode_frame(AVCodecContext *avccontext, | static int oggvorbis_encode_frame(AVCodecContext *avccontext, | ||||
| unsigned char *packets, | |||||
| int buf_size, void *data) | |||||
| unsigned char *packets, | |||||
| int buf_size, void *data) | |||||
| { | { | ||||
| OggVorbisContext *context = avccontext->priv_data ; | OggVorbisContext *context = avccontext->priv_data ; | ||||
| float **buffer ; | float **buffer ; | ||||
| @@ -113,22 +113,22 @@ static int oggvorbis_encode_frame(AVCodecContext *avccontext, | |||||
| buffer = vorbis_analysis_buffer(&context->vd, samples) ; | buffer = vorbis_analysis_buffer(&context->vd, samples) ; | ||||
| if(context->vi.channels == 1) { | if(context->vi.channels == 1) { | ||||
| for(l = 0 ; l < samples ; l++) | |||||
| buffer[0][l]=audio[l]/32768.f; | |||||
| for(l = 0 ; l < samples ; l++) | |||||
| buffer[0][l]=audio[l]/32768.f; | |||||
| } else { | } else { | ||||
| for(l = 0 ; l < samples ; l++){ | |||||
| buffer[0][l]=audio[l*2]/32768.f; | |||||
| buffer[1][l]=audio[l*2+1]/32768.f; | |||||
| } | |||||
| for(l = 0 ; l < samples ; l++){ | |||||
| buffer[0][l]=audio[l*2]/32768.f; | |||||
| buffer[1][l]=audio[l*2+1]/32768.f; | |||||
| } | |||||
| } | } | ||||
| vorbis_analysis_wrote(&context->vd, samples) ; | vorbis_analysis_wrote(&context->vd, samples) ; | ||||
| while(vorbis_analysis_blockout(&context->vd, &context->vb) == 1) { | while(vorbis_analysis_blockout(&context->vd, &context->vb) == 1) { | ||||
| vorbis_analysis(&context->vb, NULL); | |||||
| vorbis_bitrate_addblock(&context->vb) ; | |||||
| vorbis_analysis(&context->vb, NULL); | |||||
| vorbis_bitrate_addblock(&context->vb) ; | |||||
| while(vorbis_bitrate_flushpacket(&context->vd, &op)) { | |||||
| while(vorbis_bitrate_flushpacket(&context->vd, &op)) { | |||||
| if(op.bytes==1) //id love to say this is a hack, bad sadly its not, appearently the end of stream decission is in libogg | if(op.bytes==1) //id love to say this is a hack, bad sadly its not, appearently the end of stream decission is in libogg | ||||
| continue; | continue; | ||||
| memcpy(context->buffer + context->buffer_index, &op, sizeof(ogg_packet)); | memcpy(context->buffer + context->buffer_index, &op, sizeof(ogg_packet)); | ||||
| @@ -136,7 +136,7 @@ static int oggvorbis_encode_frame(AVCodecContext *avccontext, | |||||
| memcpy(context->buffer + context->buffer_index, op.packet, op.bytes); | memcpy(context->buffer + context->buffer_index, op.packet, op.bytes); | ||||
| context->buffer_index += op.bytes; | context->buffer_index += op.bytes; | ||||
| // av_log(avccontext, AV_LOG_DEBUG, "e%d / %d\n", context->buffer_index, op.bytes); | // av_log(avccontext, AV_LOG_DEBUG, "e%d / %d\n", context->buffer_index, op.bytes); | ||||
| } | |||||
| } | |||||
| } | } | ||||
| l=0; | l=0; | ||||
| @@ -268,19 +268,19 @@ static inline int conv(int samples, float **pcm, char *buf, int channels) { | |||||
| float *mono ; | float *mono ; | ||||
| for(i = 0 ; i < channels ; i++){ | for(i = 0 ; i < channels ; i++){ | ||||
| ptr = &data[i]; | |||||
| mono = pcm[i] ; | |||||
| ptr = &data[i]; | |||||
| mono = pcm[i] ; | |||||
| for(j = 0 ; j < samples ; j++) { | |||||
| for(j = 0 ; j < samples ; j++) { | |||||
| val = mono[j] * 32767.f; | |||||
| val = mono[j] * 32767.f; | |||||
| if(val > 32767) val = 32767 ; | |||||
| if(val < -32768) val = -32768 ; | |||||
| if(val > 32767) val = 32767 ; | |||||
| if(val < -32768) val = -32768 ; | |||||
| *ptr = val ; | |||||
| ptr += channels; | |||||
| } | |||||
| *ptr = val ; | |||||
| ptr += channels; | |||||
| } | |||||
| } | } | ||||
| return 0 ; | return 0 ; | ||||
| @@ -311,15 +311,15 @@ static int oggvorbis_decode_frame(AVCodecContext *avccontext, | |||||
| av_log(avccontext, AV_LOG_DEBUG, "\n");*/ | av_log(avccontext, AV_LOG_DEBUG, "\n");*/ | ||||
| if(vorbis_synthesis(&context->vb, op) == 0) | if(vorbis_synthesis(&context->vb, op) == 0) | ||||
| vorbis_synthesis_blockin(&context->vd, &context->vb) ; | |||||
| vorbis_synthesis_blockin(&context->vd, &context->vb) ; | |||||
| total_samples = 0 ; | total_samples = 0 ; | ||||
| total_bytes = 0 ; | total_bytes = 0 ; | ||||
| while((samples = vorbis_synthesis_pcmout(&context->vd, &pcm)) > 0) { | while((samples = vorbis_synthesis_pcmout(&context->vd, &pcm)) > 0) { | ||||
| conv(samples, pcm, (char*)data + total_bytes, context->vi.channels) ; | |||||
| total_bytes += samples * 2 * context->vi.channels ; | |||||
| total_samples += samples ; | |||||
| conv(samples, pcm, (char*)data + total_bytes, context->vi.channels) ; | |||||
| total_bytes += samples * 2 * context->vi.channels ; | |||||
| total_samples += samples ; | |||||
| vorbis_synthesis_read(&context->vd, samples) ; | vorbis_synthesis_read(&context->vd, samples) ; | ||||
| } | } | ||||
| @@ -191,11 +191,11 @@ void av_parser_close(AVCodecParserContext *s) | |||||
| //#define END_NOT_FOUND (-100) | //#define END_NOT_FOUND (-100) | ||||
| #define PICTURE_START_CODE 0x00000100 | |||||
| #define SEQ_START_CODE 0x000001b3 | |||||
| #define EXT_START_CODE 0x000001b5 | |||||
| #define SLICE_MIN_START_CODE 0x00000101 | |||||
| #define SLICE_MAX_START_CODE 0x000001af | |||||
| #define PICTURE_START_CODE 0x00000100 | |||||
| #define SEQ_START_CODE 0x000001b3 | |||||
| #define EXT_START_CODE 0x000001b5 | |||||
| #define SLICE_MIN_START_CODE 0x00000101 | |||||
| #define SLICE_MAX_START_CODE 0x000001af | |||||
| typedef struct ParseContext1{ | typedef struct ParseContext1{ | ||||
| ParseContext pc; | ParseContext pc; | ||||
| @@ -571,7 +571,7 @@ static int mpeg4video_split(AVCodecContext *avctx, | |||||
| /*************************/ | /*************************/ | ||||
| typedef struct MpegAudioParseContext { | typedef struct MpegAudioParseContext { | ||||
| uint8_t inbuf[MPA_MAX_CODED_FRAME_SIZE]; /* input buffer */ | |||||
| uint8_t inbuf[MPA_MAX_CODED_FRAME_SIZE]; /* input buffer */ | |||||
| uint8_t *inbuf_ptr; | uint8_t *inbuf_ptr; | ||||
| int frame_size; | int frame_size; | ||||
| int free_format_frame_size; | int free_format_frame_size; | ||||
| @@ -608,8 +608,8 @@ static int mpegaudio_parse(AVCodecParserContext *s1, | |||||
| *poutbuf_size = 0; | *poutbuf_size = 0; | ||||
| buf_ptr = buf; | buf_ptr = buf; | ||||
| while (buf_size > 0) { | while (buf_size > 0) { | ||||
| len = s->inbuf_ptr - s->inbuf; | |||||
| if (s->frame_size == 0) { | |||||
| len = s->inbuf_ptr - s->inbuf; | |||||
| if (s->frame_size == 0) { | |||||
| /* special case for next header for first frame in free | /* special case for next header for first frame in free | ||||
| format case (XXX: find a simpler method) */ | format case (XXX: find a simpler method) */ | ||||
| if (s->free_format_next_header != 0) { | if (s->free_format_next_header != 0) { | ||||
| @@ -621,34 +621,34 @@ static int mpegaudio_parse(AVCodecParserContext *s1, | |||||
| s->free_format_next_header = 0; | s->free_format_next_header = 0; | ||||
| goto got_header; | goto got_header; | ||||
| } | } | ||||
| /* no header seen : find one. We need at least MPA_HEADER_SIZE | |||||
| /* no header seen : find one. We need at least MPA_HEADER_SIZE | |||||
| bytes to parse it */ | bytes to parse it */ | ||||
| len = MPA_HEADER_SIZE - len; | |||||
| if (len > buf_size) | |||||
| len = buf_size; | |||||
| if (len > 0) { | |||||
| memcpy(s->inbuf_ptr, buf_ptr, len); | |||||
| buf_ptr += len; | |||||
| buf_size -= len; | |||||
| s->inbuf_ptr += len; | |||||
| } | |||||
| if ((s->inbuf_ptr - s->inbuf) >= MPA_HEADER_SIZE) { | |||||
| len = MPA_HEADER_SIZE - len; | |||||
| if (len > buf_size) | |||||
| len = buf_size; | |||||
| if (len > 0) { | |||||
| memcpy(s->inbuf_ptr, buf_ptr, len); | |||||
| buf_ptr += len; | |||||
| buf_size -= len; | |||||
| s->inbuf_ptr += len; | |||||
| } | |||||
| if ((s->inbuf_ptr - s->inbuf) >= MPA_HEADER_SIZE) { | |||||
| got_header: | got_header: | ||||
| sr= avctx->sample_rate; | sr= avctx->sample_rate; | ||||
| header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) | | |||||
| (s->inbuf[2] << 8) | s->inbuf[3]; | |||||
| header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) | | |||||
| (s->inbuf[2] << 8) | s->inbuf[3]; | |||||
| ret = mpa_decode_header(avctx, header); | ret = mpa_decode_header(avctx, header); | ||||
| if (ret < 0) { | if (ret < 0) { | ||||
| s->header_count= -2; | s->header_count= -2; | ||||
| /* no sync found : move by one byte (inefficient, but simple!) */ | |||||
| memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); | |||||
| s->inbuf_ptr--; | |||||
| /* no sync found : move by one byte (inefficient, but simple!) */ | |||||
| memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); | |||||
| s->inbuf_ptr--; | |||||
| dprintf("skip %x\n", header); | dprintf("skip %x\n", header); | ||||
| /* reset free format frame size to give a chance | /* reset free format frame size to give a chance | ||||
| to get a new bitrate */ | to get a new bitrate */ | ||||
| s->free_format_frame_size = 0; | s->free_format_frame_size = 0; | ||||
| } else { | |||||
| } else { | |||||
| if((header&SAME_HEADER_MASK) != (s->header&SAME_HEADER_MASK) && s->header) | if((header&SAME_HEADER_MASK) != (s->header&SAME_HEADER_MASK) && s->header) | ||||
| s->header_count= -3; | s->header_count= -3; | ||||
| s->header= header; | s->header= header; | ||||
| @@ -657,26 +657,26 @@ static int mpegaudio_parse(AVCodecParserContext *s1, | |||||
| #if 0 | #if 0 | ||||
| /* free format: prepare to compute frame size */ | /* free format: prepare to compute frame size */ | ||||
| if (decode_header(s, header) == 1) { | |||||
| s->frame_size = -1; | |||||
| if (decode_header(s, header) == 1) { | |||||
| s->frame_size = -1; | |||||
| } | } | ||||
| #endif | #endif | ||||
| } | |||||
| } | |||||
| if(s->header_count <= 0) | if(s->header_count <= 0) | ||||
| avctx->sample_rate= sr; //FIXME ugly | avctx->sample_rate= sr; //FIXME ugly | ||||
| } | |||||
| } | |||||
| } else | } else | ||||
| #if 0 | #if 0 | ||||
| if (s->frame_size == -1) { | if (s->frame_size == -1) { | ||||
| /* free format : find next sync to compute frame size */ | /* free format : find next sync to compute frame size */ | ||||
| len = MPA_MAX_CODED_FRAME_SIZE - len; | |||||
| if (len > buf_size) | |||||
| len = buf_size; | |||||
| len = MPA_MAX_CODED_FRAME_SIZE - len; | |||||
| if (len > buf_size) | |||||
| len = buf_size; | |||||
| if (len == 0) { | if (len == 0) { | ||||
| /* frame too long: resync */ | |||||
| /* frame too long: resync */ | |||||
| s->frame_size = 0; | s->frame_size = 0; | ||||
| memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); | |||||
| s->inbuf_ptr--; | |||||
| memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); | |||||
| s->inbuf_ptr--; | |||||
| } else { | } else { | ||||
| uint8_t *p, *pend; | uint8_t *p, *pend; | ||||
| uint32_t header1; | uint32_t header1; | ||||
| @@ -720,19 +720,19 @@ static int mpegaudio_parse(AVCodecParserContext *s1, | |||||
| s->inbuf_ptr += len; | s->inbuf_ptr += len; | ||||
| buf_size -= len; | buf_size -= len; | ||||
| } | } | ||||
| } else | |||||
| } else | |||||
| #endif | #endif | ||||
| if (len < s->frame_size) { | if (len < s->frame_size) { | ||||
| if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE) | if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE) | ||||
| s->frame_size = MPA_MAX_CODED_FRAME_SIZE; | s->frame_size = MPA_MAX_CODED_FRAME_SIZE; | ||||
| len = s->frame_size - len; | |||||
| if (len > buf_size) | |||||
| len = buf_size; | |||||
| memcpy(s->inbuf_ptr, buf_ptr, len); | |||||
| buf_ptr += len; | |||||
| s->inbuf_ptr += len; | |||||
| buf_size -= len; | |||||
| } | |||||
| len = s->frame_size - len; | |||||
| if (len > buf_size) | |||||
| len = buf_size; | |||||
| memcpy(s->inbuf_ptr, buf_ptr, len); | |||||
| buf_ptr += len; | |||||
| s->inbuf_ptr += len; | |||||
| buf_size -= len; | |||||
| } | |||||
| // next_data: | // next_data: | ||||
| if (s->frame_size > 0 && | if (s->frame_size > 0 && | ||||
| (s->inbuf_ptr - s->inbuf) >= s->frame_size) { | (s->inbuf_ptr - s->inbuf) >= s->frame_size) { | ||||
| @@ -740,10 +740,10 @@ static int mpegaudio_parse(AVCodecParserContext *s1, | |||||
| *poutbuf = s->inbuf; | *poutbuf = s->inbuf; | ||||
| *poutbuf_size = s->inbuf_ptr - s->inbuf; | *poutbuf_size = s->inbuf_ptr - s->inbuf; | ||||
| } | } | ||||
| s->inbuf_ptr = s->inbuf; | |||||
| s->frame_size = 0; | |||||
| break; | |||||
| } | |||||
| s->inbuf_ptr = s->inbuf; | |||||
| s->frame_size = 0; | |||||
| break; | |||||
| } | |||||
| } | } | ||||
| return buf_ptr - buf; | return buf_ptr - buf; | ||||
| } | } | ||||
| @@ -783,7 +783,7 @@ static int ac3_parse(AVCodecParserContext *s1, | |||||
| const uint8_t *buf_ptr; | const uint8_t *buf_ptr; | ||||
| int len, sample_rate, bit_rate; | int len, sample_rate, bit_rate; | ||||
| static const int ac3_channels[8] = { | static const int ac3_channels[8] = { | ||||
| 2, 1, 2, 3, 3, 4, 4, 5 | |||||
| 2, 1, 2, 3, 3, 4, 4, 5 | |||||
| }; | }; | ||||
| *poutbuf = NULL; | *poutbuf = NULL; | ||||
| @@ -812,7 +812,7 @@ static int ac3_parse(AVCodecParserContext *s1, | |||||
| memmove(s->inbuf, s->inbuf + 1, AC3_HEADER_SIZE - 1); | memmove(s->inbuf, s->inbuf + 1, AC3_HEADER_SIZE - 1); | ||||
| s->inbuf_ptr--; | s->inbuf_ptr--; | ||||
| } else { | } else { | ||||
| s->frame_size = len; | |||||
| s->frame_size = len; | |||||
| /* update codec info */ | /* update codec info */ | ||||
| avctx->sample_rate = sample_rate; | avctx->sample_rate = sample_rate; | ||||
| /* set channels,except if the user explicitly requests 1 or 2 channels, XXX/FIXME this is a bit ugly */ | /* set channels,except if the user explicitly requests 1 or 2 channels, XXX/FIXME this is a bit ugly */ | ||||
| @@ -821,7 +821,7 @@ static int ac3_parse(AVCodecParserContext *s1, | |||||
| if (s->flags & A52_LFE) | if (s->flags & A52_LFE) | ||||
| avctx->channels++; | avctx->channels++; | ||||
| } | } | ||||
| avctx->bit_rate = bit_rate; | |||||
| avctx->bit_rate = bit_rate; | |||||
| avctx->frame_size = 6 * 256; | avctx->frame_size = 6 * 256; | ||||
| } | } | ||||
| } | } | ||||
| @@ -27,48 +27,48 @@ | |||||
| /* from g711.c by SUN microsystems (unrestricted use) */ | /* from g711.c by SUN microsystems (unrestricted use) */ | ||||
| #define SIGN_BIT (0x80) /* Sign bit for a A-law byte. */ | |||||
| #define QUANT_MASK (0xf) /* Quantization field mask. */ | |||||
| #define NSEGS (8) /* Number of A-law segments. */ | |||||
| #define SEG_SHIFT (4) /* Left shift for segment number. */ | |||||
| #define SEG_MASK (0x70) /* Segment field mask. */ | |||||
| #define SIGN_BIT (0x80) /* Sign bit for a A-law byte. */ | |||||
| #define QUANT_MASK (0xf) /* Quantization field mask. */ | |||||
| #define NSEGS (8) /* Number of A-law segments. */ | |||||
| #define SEG_SHIFT (4) /* Left shift for segment number. */ | |||||
| #define SEG_MASK (0x70) /* Segment field mask. */ | |||||
| #define BIAS (0x84) /* Bias for linear code. */ | |||||
| #define BIAS (0x84) /* Bias for linear code. */ | |||||
| /* | /* | ||||
| * alaw2linear() - Convert an A-law value to 16-bit linear PCM | * alaw2linear() - Convert an A-law value to 16-bit linear PCM | ||||
| * | * | ||||
| */ | */ | ||||
| static int alaw2linear(unsigned char a_val) | |||||
| static int alaw2linear(unsigned char a_val) | |||||
| { | { | ||||
| int t; | |||||
| int seg; | |||||
| int t; | |||||
| int seg; | |||||
| a_val ^= 0x55; | |||||
| a_val ^= 0x55; | |||||
| t = a_val & QUANT_MASK; | |||||
| seg = ((unsigned)a_val & SEG_MASK) >> SEG_SHIFT; | |||||
| if(seg) t= (t + t + 1 + 32) << (seg + 2); | |||||
| else t= (t + t + 1 ) << 3; | |||||
| t = a_val & QUANT_MASK; | |||||
| seg = ((unsigned)a_val & SEG_MASK) >> SEG_SHIFT; | |||||
| if(seg) t= (t + t + 1 + 32) << (seg + 2); | |||||
| else t= (t + t + 1 ) << 3; | |||||
| return ((a_val & SIGN_BIT) ? t : -t); | |||||
| return ((a_val & SIGN_BIT) ? t : -t); | |||||
| } | } | ||||
| static int ulaw2linear(unsigned char u_val) | |||||
| static int ulaw2linear(unsigned char u_val) | |||||
| { | { | ||||
| int t; | |||||
| int t; | |||||
| /* Complement to obtain normal u-law value. */ | |||||
| u_val = ~u_val; | |||||
| /* Complement to obtain normal u-law value. */ | |||||
| u_val = ~u_val; | |||||
| /* | |||||
| * Extract and bias the quantization bits. Then | |||||
| * shift up by the segment number and subtract out the bias. | |||||
| */ | |||||
| t = ((u_val & QUANT_MASK) << 3) + BIAS; | |||||
| t <<= ((unsigned)u_val & SEG_MASK) >> SEG_SHIFT; | |||||
| /* | |||||
| * Extract and bias the quantization bits. Then | |||||
| * shift up by the segment number and subtract out the bias. | |||||
| */ | |||||
| t = ((u_val & QUANT_MASK) << 3) + BIAS; | |||||
| t <<= ((unsigned)u_val & SEG_MASK) >> SEG_SHIFT; | |||||
| return ((u_val & SIGN_BIT) ? (BIAS - t) : (t - BIAS)); | |||||
| return ((u_val & SIGN_BIT) ? (BIAS - t) : (t - BIAS)); | |||||
| } | } | ||||
| /* 16384 entries per table */ | /* 16384 entries per table */ | ||||
| @@ -209,7 +209,7 @@ static inline void encode_from16(int bps, int le, int us, | |||||
| } | } | ||||
| static int pcm_encode_frame(AVCodecContext *avctx, | static int pcm_encode_frame(AVCodecContext *avctx, | ||||
| unsigned char *frame, int buf_size, void *data) | |||||
| unsigned char *frame, int buf_size, void *data) | |||||
| { | { | ||||
| int n, sample_size, v; | int n, sample_size, v; | ||||
| short *samples; | short *samples; | ||||
| @@ -397,8 +397,8 @@ static inline void decode_to16(int bps, int le, int us, | |||||
| } | } | ||||
| static int pcm_decode_frame(AVCodecContext *avctx, | static int pcm_decode_frame(AVCodecContext *avctx, | ||||
| void *data, int *data_size, | |||||
| uint8_t *buf, int buf_size) | |||||
| void *data, int *data_size, | |||||
| uint8_t *buf, int buf_size) | |||||
| { | { | ||||
| PCMDecode *s = avctx->priv_data; | PCMDecode *s = avctx->priv_data; | ||||
| int n; | int n; | ||||
| @@ -509,9 +509,9 @@ AVCodec name ## _encoder = { \ | |||||
| CODEC_TYPE_AUDIO, \ | CODEC_TYPE_AUDIO, \ | ||||
| id, \ | id, \ | ||||
| 0, \ | 0, \ | ||||
| pcm_encode_init, \ | |||||
| pcm_encode_frame, \ | |||||
| pcm_encode_close, \ | |||||
| pcm_encode_init, \ | |||||
| pcm_encode_frame, \ | |||||
| pcm_encode_close, \ | |||||
| NULL, \ | NULL, \ | ||||
| }; \ | }; \ | ||||
| AVCodec name ## _decoder = { \ | AVCodec name ## _decoder = { \ | ||||
| @@ -519,7 +519,7 @@ AVCodec name ## _decoder = { \ | |||||
| CODEC_TYPE_AUDIO, \ | CODEC_TYPE_AUDIO, \ | ||||
| id, \ | id, \ | ||||
| sizeof(PCMDecode), \ | sizeof(PCMDecode), \ | ||||
| pcm_decode_init, \ | |||||
| pcm_decode_init, \ | |||||
| NULL, \ | NULL, \ | ||||
| NULL, \ | NULL, \ | ||||
| pcm_decode_frame, \ | pcm_decode_frame, \ | ||||
| @@ -67,7 +67,7 @@ int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h | |||||
| /* | /* | ||||
| Read unaligned pixels into our vectors. The vectors are as follows: | Read unaligned pixels into our vectors. The vectors are as follows: | ||||
| pix1v: pix1[0]-pix1[15] | pix1v: pix1[0]-pix1[15] | ||||
| pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] | |||||
| pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] | |||||
| */ | */ | ||||
| tv = (vector unsigned char *) pix1; | tv = (vector unsigned char *) pix1; | ||||
| pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); | pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); | ||||
| @@ -184,7 +184,7 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int | |||||
| fact to avoid a potentially expensive unaligned read, as well | fact to avoid a potentially expensive unaligned read, as well | ||||
| as some splitting, and vector addition each time around the loop. | as some splitting, and vector addition each time around the loop. | ||||
| Read unaligned pixels into our vectors. The vectors are as follows: | Read unaligned pixels into our vectors. The vectors are as follows: | ||||
| pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] | |||||
| pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] | |||||
| Split the pixel vectors into shorts | Split the pixel vectors into shorts | ||||
| */ | */ | ||||
| tv = (vector unsigned char *) &pix2[0]; | tv = (vector unsigned char *) &pix2[0]; | ||||
| @@ -204,7 +204,7 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int | |||||
| /* | /* | ||||
| Read unaligned pixels into our vectors. The vectors are as follows: | Read unaligned pixels into our vectors. The vectors are as follows: | ||||
| pix1v: pix1[0]-pix1[15] | pix1v: pix1[0]-pix1[15] | ||||
| pix3v: pix3[0]-pix3[15] pix3iv: pix3[1]-pix3[16] | |||||
| pix3v: pix3[0]-pix3[15] pix3iv: pix3[1]-pix3[16] | |||||
| */ | */ | ||||
| tv = (vector unsigned char *) pix1; | tv = (vector unsigned char *) pix1; | ||||
| pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); | pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); | ||||
| @@ -273,7 +273,7 @@ int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||||
| for(i=0;i<h;i++) { | for(i=0;i<h;i++) { | ||||
| /* Read potentially unaligned pixels into t1 and t2 */ | |||||
| /* Read potentially unaligned pixels into t1 and t2 */ | |||||
| perm1 = vec_lvsl(0, pix1); | perm1 = vec_lvsl(0, pix1); | ||||
| pix1v = (vector unsigned char *) pix1; | pix1v = (vector unsigned char *) pix1; | ||||
| perm2 = vec_lvsl(0, pix2); | perm2 = vec_lvsl(0, pix2); | ||||
| @@ -281,12 +281,12 @@ int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||||
| t1 = vec_perm(pix1v[0], pix1v[1], perm1); | t1 = vec_perm(pix1v[0], pix1v[1], perm1); | ||||
| t2 = vec_perm(pix2v[0], pix2v[1], perm2); | t2 = vec_perm(pix2v[0], pix2v[1], perm2); | ||||
| /* Calculate a sum of abs differences vector */ | |||||
| /* Calculate a sum of abs differences vector */ | |||||
| t3 = vec_max(t1, t2); | t3 = vec_max(t1, t2); | ||||
| t4 = vec_min(t1, t2); | t4 = vec_min(t1, t2); | ||||
| t5 = vec_sub(t3, t4); | t5 = vec_sub(t3, t4); | ||||
| /* Add each 4 pixel group together and put 4 results into sad */ | |||||
| /* Add each 4 pixel group together and put 4 results into sad */ | |||||
| sad = vec_sum4s(t5, sad); | sad = vec_sum4s(t5, sad); | ||||
| pix1 += line_size; | pix1 += line_size; | ||||
| @@ -316,9 +316,9 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||||
| permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); | permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); | ||||
| for(i=0;i<h;i++) { | for(i=0;i<h;i++) { | ||||
| /* Read potentially unaligned pixels into t1 and t2 | |||||
| Since we're reading 16 pixels, and actually only want 8, | |||||
| mask out the last 8 pixels. The 0s don't change the sum. */ | |||||
| /* Read potentially unaligned pixels into t1 and t2 | |||||
| Since we're reading 16 pixels, and actually only want 8, | |||||
| mask out the last 8 pixels. The 0s don't change the sum. */ | |||||
| perm1 = vec_lvsl(0, pix1); | perm1 = vec_lvsl(0, pix1); | ||||
| pix1v = (vector unsigned char *) pix1; | pix1v = (vector unsigned char *) pix1; | ||||
| perm2 = vec_lvsl(0, pix2); | perm2 = vec_lvsl(0, pix2); | ||||
| @@ -326,12 +326,12 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||||
| t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear); | t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear); | ||||
| t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear); | t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear); | ||||
| /* Calculate a sum of abs differences vector */ | |||||
| /* Calculate a sum of abs differences vector */ | |||||
| t3 = vec_max(t1, t2); | t3 = vec_max(t1, t2); | ||||
| t4 = vec_min(t1, t2); | t4 = vec_min(t1, t2); | ||||
| t5 = vec_sub(t3, t4); | t5 = vec_sub(t3, t4); | ||||
| /* Add each 4 pixel group together and put 4 results into sad */ | |||||
| /* Add each 4 pixel group together and put 4 results into sad */ | |||||
| sad = vec_sum4s(t5, sad); | sad = vec_sum4s(t5, sad); | ||||
| pix1 += line_size; | pix1 += line_size; | ||||
| @@ -398,9 +398,9 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||||
| for(i=0;i<h;i++) { | for(i=0;i<h;i++) { | ||||
| /* Read potentially unaligned pixels into t1 and t2 | |||||
| Since we're reading 16 pixels, and actually only want 8, | |||||
| mask out the last 8 pixels. The 0s don't change the sum. */ | |||||
| /* Read potentially unaligned pixels into t1 and t2 | |||||
| Since we're reading 16 pixels, and actually only want 8, | |||||
| mask out the last 8 pixels. The 0s don't change the sum. */ | |||||
| perm1 = vec_lvsl(0, pix1); | perm1 = vec_lvsl(0, pix1); | ||||
| pix1v = (vector unsigned char *) pix1; | pix1v = (vector unsigned char *) pix1; | ||||
| perm2 = vec_lvsl(0, pix2); | perm2 = vec_lvsl(0, pix2); | ||||
| @@ -413,7 +413,7 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||||
| of the fact that abs(a-b)^2 = (a-b)^2. | of the fact that abs(a-b)^2 = (a-b)^2. | ||||
| */ | */ | ||||
| /* Calculate abs differences vector */ | |||||
| /* Calculate abs differences vector */ | |||||
| t3 = vec_max(t1, t2); | t3 = vec_max(t1, t2); | ||||
| t4 = vec_min(t1, t2); | t4 = vec_min(t1, t2); | ||||
| t5 = vec_sub(t3, t4); | t5 = vec_sub(t3, t4); | ||||
| @@ -451,7 +451,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||||
| sum = (vector unsigned int)vec_splat_u32(0); | sum = (vector unsigned int)vec_splat_u32(0); | ||||
| for(i=0;i<h;i++) { | for(i=0;i<h;i++) { | ||||
| /* Read potentially unaligned pixels into t1 and t2 */ | |||||
| /* Read potentially unaligned pixels into t1 and t2 */ | |||||
| perm1 = vec_lvsl(0, pix1); | perm1 = vec_lvsl(0, pix1); | ||||
| pix1v = (vector unsigned char *) pix1; | pix1v = (vector unsigned char *) pix1; | ||||
| perm2 = vec_lvsl(0, pix2); | perm2 = vec_lvsl(0, pix2); | ||||
| @@ -464,7 +464,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||||
| of the fact that abs(a-b)^2 = (a-b)^2. | of the fact that abs(a-b)^2 = (a-b)^2. | ||||
| */ | */ | ||||
| /* Calculate abs differences vector */ | |||||
| /* Calculate abs differences vector */ | |||||
| t3 = vec_max(t1, t2); | t3 = vec_max(t1, t2); | ||||
| t4 = vec_min(t1, t2); | t4 = vec_min(t1, t2); | ||||
| t5 = vec_sub(t3, t4); | t5 = vec_sub(t3, t4); | ||||
| @@ -498,12 +498,12 @@ int pix_sum_altivec(uint8_t * pix, int line_size) | |||||
| sad = (vector unsigned int)vec_splat_u32(0); | sad = (vector unsigned int)vec_splat_u32(0); | ||||
| for (i = 0; i < 16; i++) { | for (i = 0; i < 16; i++) { | ||||
| /* Read the potentially unaligned 16 pixels into t1 */ | |||||
| /* Read the potentially unaligned 16 pixels into t1 */ | |||||
| perm = vec_lvsl(0, pix); | perm = vec_lvsl(0, pix); | ||||
| pixv = (vector unsigned char *) pix; | pixv = (vector unsigned char *) pix; | ||||
| t1 = vec_perm(pixv[0], pixv[1], perm); | t1 = vec_perm(pixv[0], pixv[1], perm); | ||||
| /* Add each 4 pixel group together and put 4 results into sad */ | |||||
| /* Add each 4 pixel group together and put 4 results into sad */ | |||||
| sad = vec_sum4s(t1, sad); | sad = vec_sum4s(t1, sad); | ||||
| pix += line_size; | pix += line_size; | ||||
| @@ -1335,32 +1335,32 @@ POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); | |||||
| 0x00, 0x01, 0x02, 0x03, | 0x00, 0x01, 0x02, 0x03, | ||||
| 0x04, 0x05, 0x06, 0x07); | 0x04, 0x05, 0x06, 0x07); | ||||
| #define ONEITERBUTTERFLY(i, res) \ | |||||
| { \ | |||||
| register vector unsigned char src1, src2, srcO; \ | |||||
| register vector unsigned char dst1, dst2, dstO; \ | |||||
| src1 = vec_ld(stride * i, src); \ | |||||
| if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8) \ | |||||
| src2 = vec_ld((stride * i) + 16, src); \ | |||||
| srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ | |||||
| dst1 = vec_ld(stride * i, dst); \ | |||||
| if ((((stride * i) + (unsigned long)dst) & 0x0000000F) > 8) \ | |||||
| dst2 = vec_ld((stride * i) + 16, dst); \ | |||||
| dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ | |||||
| /* promote the unsigned chars to signed shorts */ \ | |||||
| /* we're in the 8x8 function, we only care for the first 8 */ \ | |||||
| register vector signed short srcV = \ | |||||
| (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ | |||||
| register vector signed short dstV = \ | |||||
| (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ | |||||
| /* substractions inside the first butterfly */ \ | |||||
| register vector signed short but0 = vec_sub(srcV, dstV); \ | |||||
| register vector signed short op1 = vec_perm(but0, but0, perm1); \ | |||||
| register vector signed short but1 = vec_mladd(but0, vprod1, op1); \ | |||||
| register vector signed short op2 = vec_perm(but1, but1, perm2); \ | |||||
| register vector signed short but2 = vec_mladd(but1, vprod2, op2); \ | |||||
| register vector signed short op3 = vec_perm(but2, but2, perm3); \ | |||||
| res = vec_mladd(but2, vprod3, op3); \ | |||||
| #define ONEITERBUTTERFLY(i, res) \ | |||||
| { \ | |||||
| register vector unsigned char src1, src2, srcO; \ | |||||
| register vector unsigned char dst1, dst2, dstO; \ | |||||
| src1 = vec_ld(stride * i, src); \ | |||||
| if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8) \ | |||||
| src2 = vec_ld((stride * i) + 16, src); \ | |||||
| srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ | |||||
| dst1 = vec_ld(stride * i, dst); \ | |||||
| if ((((stride * i) + (unsigned long)dst) & 0x0000000F) > 8) \ | |||||
| dst2 = vec_ld((stride * i) + 16, dst); \ | |||||
| dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ | |||||
| /* promote the unsigned chars to signed shorts */ \ | |||||
| /* we're in the 8x8 function, we only care for the first 8 */ \ | |||||
| register vector signed short srcV = \ | |||||
| (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ | |||||
| register vector signed short dstV = \ | |||||
| (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ | |||||
| /* substractions inside the first butterfly */ \ | |||||
| register vector signed short but0 = vec_sub(srcV, dstV); \ | |||||
| register vector signed short op1 = vec_perm(but0, but0, perm1); \ | |||||
| register vector signed short but1 = vec_mladd(but0, vprod1, op1); \ | |||||
| register vector signed short op2 = vec_perm(but1, but1, perm2); \ | |||||
| register vector signed short but2 = vec_mladd(but1, vprod2, op2); \ | |||||
| register vector signed short op3 = vec_perm(but2, but2, perm3); \ | |||||
| res = vec_mladd(but2, vprod3, op3); \ | |||||
| } | } | ||||
| ONEITERBUTTERFLY(0, temp0); | ONEITERBUTTERFLY(0, temp0); | ||||
| ONEITERBUTTERFLY(1, temp1); | ONEITERBUTTERFLY(1, temp1); | ||||
| @@ -1480,26 +1480,26 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, | |||||
| 0x00, 0x01, 0x02, 0x03, | 0x00, 0x01, 0x02, 0x03, | ||||
| 0x04, 0x05, 0x06, 0x07); | 0x04, 0x05, 0x06, 0x07); | ||||
| #define ONEITERBUTTERFLY(i, res1, res2) \ | |||||
| { \ | |||||
| #define ONEITERBUTTERFLY(i, res1, res2) \ | |||||
| { \ | |||||
| register vector unsigned char src1 asm ("v22"), src2 asm ("v23"); \ | register vector unsigned char src1 asm ("v22"), src2 asm ("v23"); \ | ||||
| register vector unsigned char dst1 asm ("v24"), dst2 asm ("v25"); \ | register vector unsigned char dst1 asm ("v24"), dst2 asm ("v25"); \ | ||||
| src1 = vec_ld(stride * i, src); \ | |||||
| src2 = vec_ld((stride * i) + 16, src); \ | |||||
| src1 = vec_ld(stride * i, src); \ | |||||
| src2 = vec_ld((stride * i) + 16, src); \ | |||||
| register vector unsigned char srcO asm ("v22") = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ | register vector unsigned char srcO asm ("v22") = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ | ||||
| dst1 = vec_ld(stride * i, dst); \ | |||||
| dst2 = vec_ld((stride * i) + 16, dst); \ | |||||
| dst1 = vec_ld(stride * i, dst); \ | |||||
| dst2 = vec_ld((stride * i) + 16, dst); \ | |||||
| register vector unsigned char dstO asm ("v23") = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ | register vector unsigned char dstO asm ("v23") = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ | ||||
| /* promote the unsigned chars to signed shorts */ \ | |||||
| /* promote the unsigned chars to signed shorts */ \ | |||||
| register vector signed short srcV asm ("v24") = \ | register vector signed short srcV asm ("v24") = \ | ||||
| (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ | |||||
| (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ | |||||
| register vector signed short dstV asm ("v25") = \ | register vector signed short dstV asm ("v25") = \ | ||||
| (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ | |||||
| (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ | |||||
| register vector signed short srcW asm ("v26") = \ | register vector signed short srcW asm ("v26") = \ | ||||
| (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \ | |||||
| (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \ | |||||
| register vector signed short dstW asm ("v27") = \ | register vector signed short dstW asm ("v27") = \ | ||||
| (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \ | |||||
| /* substractions inside the first butterfly */ \ | |||||
| (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \ | |||||
| /* substractions inside the first butterfly */ \ | |||||
| register vector signed short but0 asm ("v28") = vec_sub(srcV, dstV); \ | register vector signed short but0 asm ("v28") = vec_sub(srcV, dstV); \ | ||||
| register vector signed short but0S asm ("v29") = vec_sub(srcW, dstW); \ | register vector signed short but0S asm ("v29") = vec_sub(srcW, dstW); \ | ||||
| register vector signed short op1 asm ("v30") = vec_perm(but0, but0, perm1); \ | register vector signed short op1 asm ("v30") = vec_perm(but0, but0, perm1); \ | ||||
| @@ -1511,9 +1511,9 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, | |||||
| register vector signed short op2S asm ("v27") = vec_perm(but1S, but1S, perm2); \ | register vector signed short op2S asm ("v27") = vec_perm(but1S, but1S, perm2); \ | ||||
| register vector signed short but2S asm ("v28") = vec_mladd(but1S, vprod2, op2S); \ | register vector signed short but2S asm ("v28") = vec_mladd(but1S, vprod2, op2S); \ | ||||
| register vector signed short op3 asm ("v29") = vec_perm(but2, but2, perm3); \ | register vector signed short op3 asm ("v29") = vec_perm(but2, but2, perm3); \ | ||||
| res1 = vec_mladd(but2, vprod3, op3); \ | |||||
| res1 = vec_mladd(but2, vprod3, op3); \ | |||||
| register vector signed short op3S asm ("v30") = vec_perm(but2S, but2S, perm3); \ | register vector signed short op3S asm ("v30") = vec_perm(but2S, but2S, perm3); \ | ||||
| res2 = vec_mladd(but2S, vprod3, op3S); \ | |||||
| res2 = vec_mladd(but2S, vprod3, op3S); \ | |||||
| } | } | ||||
| ONEITERBUTTERFLY(0, temp0, temp0S); | ONEITERBUTTERFLY(0, temp0, temp0S); | ||||
| ONEITERBUTTERFLY(1, temp1, temp1S); | ONEITERBUTTERFLY(1, temp1, temp1S); | ||||
| @@ -1623,12 +1623,12 @@ POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1); | |||||
| int has_altivec(void) | int has_altivec(void) | ||||
| { | { | ||||
| #ifdef __AMIGAOS4__ | #ifdef __AMIGAOS4__ | ||||
| ULONG result = 0; | |||||
| extern struct ExecIFace *IExec; | |||||
| ULONG result = 0; | |||||
| extern struct ExecIFace *IExec; | |||||
| IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE); | |||||
| if (result == VECTORTYPE_ALTIVEC) return 1; | |||||
| return 0; | |||||
| IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE); | |||||
| if (result == VECTORTYPE_ALTIVEC) return 1; | |||||
| return 0; | |||||
| #else /* __AMIGAOS4__ */ | #else /* __AMIGAOS4__ */ | ||||
| #ifdef CONFIG_DARWIN | #ifdef CONFIG_DARWIN | ||||
| @@ -191,33 +191,33 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint | |||||
| /* from dsputil.c */ | /* from dsputil.c */ | ||||
| static inline void put_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { | static inline void put_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { | ||||
| int i; | |||||
| for (i = 0; i < h; i++) { | |||||
| uint32_t a, b; | |||||
| a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l); | |||||
| b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l); | |||||
| *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b); | |||||
| a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l); | |||||
| b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l); | |||||
| *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b); | |||||
| } | |||||
| int i; | |||||
| for (i = 0; i < h; i++) { | |||||
| uint32_t a, b; | |||||
| a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l); | |||||
| b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l); | |||||
| *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b); | |||||
| a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l); | |||||
| b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l); | |||||
| *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b); | |||||
| } | |||||
| } static inline void avg_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { | } static inline void avg_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { | ||||
| int i; | |||||
| for (i = 0; i < h; i++) { | |||||
| uint32_t a, b; | |||||
| a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l); | |||||
| b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l); | |||||
| *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b)); | |||||
| a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l); | |||||
| b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l); | |||||
| *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b)); | |||||
| } | |||||
| int i; | |||||
| for (i = 0; i < h; i++) { | |||||
| uint32_t a, b; | |||||
| a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l); | |||||
| b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l); | |||||
| *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b)); | |||||
| a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l); | |||||
| b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l); | |||||
| *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b)); | |||||
| } | |||||
| } static inline void put_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { | } static inline void put_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { | ||||
| put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); | |||||
| put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h); | |||||
| put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); | |||||
| put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h); | |||||
| } static inline void avg_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { | } static inline void avg_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { | ||||
| avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); | |||||
| avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h); | |||||
| avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); | |||||
| avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h); | |||||
| } | } | ||||
| /* UNIMPLEMENTED YET !! */ | /* UNIMPLEMENTED YET !! */ | ||||
| @@ -87,16 +87,16 @@ void powerpc_display_perf_report(void) | |||||
| { | { | ||||
| for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) | for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) | ||||
| { | { | ||||
| if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0) | |||||
| av_log(NULL, AV_LOG_INFO, | |||||
| " Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", | |||||
| perfname[i], | |||||
| j+1, | |||||
| perfdata[j][i][powerpc_data_min], | |||||
| perfdata[j][i][powerpc_data_max], | |||||
| (double)perfdata[j][i][powerpc_data_sum] / | |||||
| (double)perfdata[j][i][powerpc_data_num], | |||||
| perfdata[j][i][powerpc_data_num]); | |||||
| if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0) | |||||
| av_log(NULL, AV_LOG_INFO, | |||||
| " Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", | |||||
| perfname[i], | |||||
| j+1, | |||||
| perfdata[j][i][powerpc_data_min], | |||||
| perfdata[j][i][powerpc_data_max], | |||||
| (double)perfdata[j][i][powerpc_data_sum] / | |||||
| (double)perfdata[j][i][powerpc_data_num], | |||||
| perfdata[j][i][powerpc_data_num]); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -179,7 +179,7 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1); | |||||
| } | } | ||||
| else | else | ||||
| for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) { | for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) { | ||||
| asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory"); | |||||
| asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory"); | |||||
| } | } | ||||
| #else | #else | ||||
| memset(blocks, 0, sizeof(DCTELEM)*6*64); | memset(blocks, 0, sizeof(DCTELEM)*6*64); | ||||
| @@ -284,25 +284,25 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) | |||||
| c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec; | c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec; | ||||
| c->avg_pixels_tab[0][0] = avg_pixels16_altivec; | c->avg_pixels_tab[0][0] = avg_pixels16_altivec; | ||||
| c->avg_pixels_tab[1][0] = avg_pixels8_altivec; | c->avg_pixels_tab[1][0] = avg_pixels8_altivec; | ||||
| c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec; | |||||
| c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec; | |||||
| c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec; | c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec; | ||||
| c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec; | c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec; | ||||
| c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec; | c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec; | ||||
| c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec; | c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec; | ||||
| c->gmc1 = gmc1_altivec; | |||||
| c->gmc1 = gmc1_altivec; | |||||
| #ifdef CONFIG_DARWIN // ATM gcc-3.3 and gcc-3.4 fail to compile these in linux... | #ifdef CONFIG_DARWIN // ATM gcc-3.3 and gcc-3.4 fail to compile these in linux... | ||||
| c->hadamard8_diff[0] = hadamard8_diff16_altivec; | |||||
| c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; | |||||
| c->hadamard8_diff[0] = hadamard8_diff16_altivec; | |||||
| c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; | |||||
| #endif | #endif | ||||
| #ifdef CONFIG_ENCODERS | #ifdef CONFIG_ENCODERS | ||||
| if (avctx->dct_algo == FF_DCT_AUTO || | |||||
| avctx->dct_algo == FF_DCT_ALTIVEC) | |||||
| { | |||||
| c->fdct = fdct_altivec; | |||||
| } | |||||
| if (avctx->dct_algo == FF_DCT_AUTO || | |||||
| avctx->dct_algo == FF_DCT_ALTIVEC) | |||||
| { | |||||
| c->fdct = fdct_altivec; | |||||
| } | |||||
| #endif //CONFIG_ENCODERS | #endif //CONFIG_ENCODERS | ||||
| if (avctx->lowres==0) | if (avctx->lowres==0) | ||||
| @@ -325,14 +325,14 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) | |||||
| int i, j; | int i, j; | ||||
| for (i = 0 ; i < powerpc_perf_total ; i++) | for (i = 0 ; i < powerpc_perf_total ; i++) | ||||
| { | { | ||||
| for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) | |||||
| { | |||||
| perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL; | |||||
| perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL; | |||||
| perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL; | |||||
| perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL; | |||||
| } | |||||
| } | |||||
| for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) | |||||
| { | |||||
| perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL; | |||||
| perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL; | |||||
| perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL; | |||||
| perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL; | |||||
| } | |||||
| } | |||||
| } | } | ||||
| #endif /* POWERPC_PERFORMANCE_REPORT */ | #endif /* POWERPC_PERFORMANCE_REPORT */ | ||||
| } else | } else | ||||
| @@ -114,10 +114,10 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][ | |||||
| #define POWERPC_GET_PMC6(a) do {} while (0) | #define POWERPC_GET_PMC6(a) do {} while (0) | ||||
| #endif | #endif | ||||
| #endif /* POWERPC_MODE_64BITS */ | #endif /* POWERPC_MODE_64BITS */ | ||||
| #define POWERPC_PERF_DECLARE(a, cond) \ | |||||
| POWERP_PMC_DATATYPE \ | |||||
| pmc_start[POWERPC_NUM_PMC_ENABLED], \ | |||||
| pmc_stop[POWERPC_NUM_PMC_ENABLED], \ | |||||
| #define POWERPC_PERF_DECLARE(a, cond) \ | |||||
| POWERP_PMC_DATATYPE \ | |||||
| pmc_start[POWERPC_NUM_PMC_ENABLED], \ | |||||
| pmc_stop[POWERPC_NUM_PMC_ENABLED], \ | |||||
| pmc_loop_index; | pmc_loop_index; | ||||
| #define POWERPC_PERF_START_COUNT(a, cond) do { \ | #define POWERPC_PERF_START_COUNT(a, cond) do { \ | ||||
| POWERPC_GET_PMC6(pmc_start[5]); \ | POWERPC_GET_PMC6(pmc_start[5]); \ | ||||
| @@ -141,8 +141,8 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][ | |||||
| pmc_loop_index++) \ | pmc_loop_index++) \ | ||||
| { \ | { \ | ||||
| if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \ | if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \ | ||||
| { \ | |||||
| POWERP_PMC_DATATYPE diff = \ | |||||
| { \ | |||||
| POWERP_PMC_DATATYPE diff = \ | |||||
| pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \ | pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \ | ||||
| if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \ | if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \ | ||||
| perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \ | perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \ | ||||
| @@ -65,8 +65,8 @@ void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z) | |||||
| POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6); | POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6); | ||||
| #ifdef ALTIVEC_USE_REFERENCE_C_CODE | #ifdef ALTIVEC_USE_REFERENCE_C_CODE | ||||
| int ln = s->nbits; | int ln = s->nbits; | ||||
| int j, np, np2; | |||||
| int nblocks, nloops; | |||||
| int j, np, np2; | |||||
| int nblocks, nloops; | |||||
| register FFTComplex *p, *q; | register FFTComplex *p, *q; | ||||
| FFTComplex *exptab = s->exptab; | FFTComplex *exptab = s->exptab; | ||||
| int l; | int l; | ||||
| @@ -147,8 +147,8 @@ POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6); | |||||
| #endif | #endif | ||||
| int ln = s->nbits; | int ln = s->nbits; | ||||
| int j, np, np2; | |||||
| int nblocks, nloops; | |||||
| int j, np, np2; | |||||
| int nblocks, nloops; | |||||
| register FFTComplex *p, *q; | register FFTComplex *p, *q; | ||||
| FFTComplex *cptr, *cptr1; | FFTComplex *cptr, *cptr1; | ||||
| int k; | int k; | ||||
| @@ -30,31 +30,31 @@ | |||||
| */ | */ | ||||
| static inline vector signed char ff_vmrglb (vector signed char const A, | static inline vector signed char ff_vmrglb (vector signed char const A, | ||||
| vector signed char const B) | |||||
| vector signed char const B) | |||||
| { | { | ||||
| static const vector unsigned char lowbyte = { | static const vector unsigned char lowbyte = { | ||||
| 0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b, 0x1b, | |||||
| 0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f | |||||
| 0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b, 0x1b, | |||||
| 0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f | |||||
| }; | }; | ||||
| return vec_perm (A, B, lowbyte); | return vec_perm (A, B, lowbyte); | ||||
| } | } | ||||
| static inline vector signed short ff_vmrglh (vector signed short const A, | static inline vector signed short ff_vmrglh (vector signed short const A, | ||||
| vector signed short const B) | |||||
| vector signed short const B) | |||||
| { | { | ||||
| static const vector unsigned char lowhalf = { | static const vector unsigned char lowhalf = { | ||||
| 0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b, | |||||
| 0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f | |||||
| 0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b, | |||||
| 0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f | |||||
| }; | }; | ||||
| return vec_perm (A, B, lowhalf); | return vec_perm (A, B, lowhalf); | ||||
| } | } | ||||
| static inline vector signed int ff_vmrglw (vector signed int const A, | static inline vector signed int ff_vmrglw (vector signed int const A, | ||||
| vector signed int const B) | |||||
| vector signed int const B) | |||||
| { | { | ||||
| static const vector unsigned char lowword = { | static const vector unsigned char lowword = { | ||||
| 0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b, | |||||
| 0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f | |||||
| 0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b, | |||||
| 0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f | |||||
| }; | }; | ||||
| return vec_perm (A, B, lowword); | return vec_perm (A, B, lowword); | ||||
| } | } | ||||
| @@ -51,108 +51,108 @@ | |||||
| #define vector_s32_t vector signed int | #define vector_s32_t vector signed int | ||||
| #define vector_u32_t vector unsigned int | #define vector_u32_t vector unsigned int | ||||
| #define IDCT_HALF \ | |||||
| /* 1st stage */ \ | |||||
| t1 = vec_mradds (a1, vx7, vx1 ); \ | |||||
| t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ | |||||
| t7 = vec_mradds (a2, vx5, vx3); \ | |||||
| t3 = vec_mradds (ma2, vx3, vx5); \ | |||||
| \ | |||||
| /* 2nd stage */ \ | |||||
| t5 = vec_adds (vx0, vx4); \ | |||||
| t0 = vec_subs (vx0, vx4); \ | |||||
| t2 = vec_mradds (a0, vx6, vx2); \ | |||||
| t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \ | |||||
| t6 = vec_adds (t8, t3); \ | |||||
| t3 = vec_subs (t8, t3); \ | |||||
| t8 = vec_subs (t1, t7); \ | |||||
| t1 = vec_adds (t1, t7); \ | |||||
| \ | |||||
| /* 3rd stage */ \ | |||||
| t7 = vec_adds (t5, t2); \ | |||||
| t2 = vec_subs (t5, t2); \ | |||||
| t5 = vec_adds (t0, t4); \ | |||||
| t0 = vec_subs (t0, t4); \ | |||||
| t4 = vec_subs (t8, t3); \ | |||||
| t3 = vec_adds (t8, t3); \ | |||||
| \ | |||||
| /* 4th stage */ \ | |||||
| vy0 = vec_adds (t7, t1); \ | |||||
| vy7 = vec_subs (t7, t1); \ | |||||
| vy1 = vec_mradds (c4, t3, t5); \ | |||||
| vy6 = vec_mradds (mc4, t3, t5); \ | |||||
| vy2 = vec_mradds (c4, t4, t0); \ | |||||
| vy5 = vec_mradds (mc4, t4, t0); \ | |||||
| vy3 = vec_adds (t2, t6); \ | |||||
| #define IDCT_HALF \ | |||||
| /* 1st stage */ \ | |||||
| t1 = vec_mradds (a1, vx7, vx1 ); \ | |||||
| t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ | |||||
| t7 = vec_mradds (a2, vx5, vx3); \ | |||||
| t3 = vec_mradds (ma2, vx3, vx5); \ | |||||
| \ | |||||
| /* 2nd stage */ \ | |||||
| t5 = vec_adds (vx0, vx4); \ | |||||
| t0 = vec_subs (vx0, vx4); \ | |||||
| t2 = vec_mradds (a0, vx6, vx2); \ | |||||
| t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \ | |||||
| t6 = vec_adds (t8, t3); \ | |||||
| t3 = vec_subs (t8, t3); \ | |||||
| t8 = vec_subs (t1, t7); \ | |||||
| t1 = vec_adds (t1, t7); \ | |||||
| \ | |||||
| /* 3rd stage */ \ | |||||
| t7 = vec_adds (t5, t2); \ | |||||
| t2 = vec_subs (t5, t2); \ | |||||
| t5 = vec_adds (t0, t4); \ | |||||
| t0 = vec_subs (t0, t4); \ | |||||
| t4 = vec_subs (t8, t3); \ | |||||
| t3 = vec_adds (t8, t3); \ | |||||
| \ | |||||
| /* 4th stage */ \ | |||||
| vy0 = vec_adds (t7, t1); \ | |||||
| vy7 = vec_subs (t7, t1); \ | |||||
| vy1 = vec_mradds (c4, t3, t5); \ | |||||
| vy6 = vec_mradds (mc4, t3, t5); \ | |||||
| vy2 = vec_mradds (c4, t4, t0); \ | |||||
| vy5 = vec_mradds (mc4, t4, t0); \ | |||||
| vy3 = vec_adds (t2, t6); \ | |||||
| vy4 = vec_subs (t2, t6); | vy4 = vec_subs (t2, t6); | ||||
| #define IDCT \ | |||||
| vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ | |||||
| vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ | |||||
| vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \ | |||||
| vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \ | |||||
| vector_u16_t shift; \ | |||||
| \ | |||||
| c4 = vec_splat (constants[0], 0); \ | |||||
| a0 = vec_splat (constants[0], 1); \ | |||||
| a1 = vec_splat (constants[0], 2); \ | |||||
| a2 = vec_splat (constants[0], 3); \ | |||||
| mc4 = vec_splat (constants[0], 4); \ | |||||
| ma2 = vec_splat (constants[0], 5); \ | |||||
| bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3); \ | |||||
| \ | |||||
| zero = vec_splat_s16 (0); \ | |||||
| shift = vec_splat_u16 (4); \ | |||||
| \ | |||||
| vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \ | |||||
| vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero); \ | |||||
| vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero); \ | |||||
| vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero); \ | |||||
| vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero); \ | |||||
| vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero); \ | |||||
| vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero); \ | |||||
| vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero); \ | |||||
| \ | |||||
| IDCT_HALF \ | |||||
| \ | |||||
| vx0 = vec_mergeh (vy0, vy4); \ | |||||
| vx1 = vec_mergel (vy0, vy4); \ | |||||
| vx2 = vec_mergeh (vy1, vy5); \ | |||||
| vx3 = vec_mergel (vy1, vy5); \ | |||||
| vx4 = vec_mergeh (vy2, vy6); \ | |||||
| vx5 = vec_mergel (vy2, vy6); \ | |||||
| vx6 = vec_mergeh (vy3, vy7); \ | |||||
| vx7 = vec_mergel (vy3, vy7); \ | |||||
| \ | |||||
| vy0 = vec_mergeh (vx0, vx4); \ | |||||
| vy1 = vec_mergel (vx0, vx4); \ | |||||
| vy2 = vec_mergeh (vx1, vx5); \ | |||||
| vy3 = vec_mergel (vx1, vx5); \ | |||||
| vy4 = vec_mergeh (vx2, vx6); \ | |||||
| vy5 = vec_mergel (vx2, vx6); \ | |||||
| vy6 = vec_mergeh (vx3, vx7); \ | |||||
| vy7 = vec_mergel (vx3, vx7); \ | |||||
| \ | |||||
| vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \ | |||||
| vx1 = vec_mergel (vy0, vy4); \ | |||||
| vx2 = vec_mergeh (vy1, vy5); \ | |||||
| vx3 = vec_mergel (vy1, vy5); \ | |||||
| vx4 = vec_mergeh (vy2, vy6); \ | |||||
| vx5 = vec_mergel (vy2, vy6); \ | |||||
| vx6 = vec_mergeh (vy3, vy7); \ | |||||
| vx7 = vec_mergel (vy3, vy7); \ | |||||
| \ | |||||
| IDCT_HALF \ | |||||
| \ | |||||
| shift = vec_splat_u16 (6); \ | |||||
| vx0 = vec_sra (vy0, shift); \ | |||||
| vx1 = vec_sra (vy1, shift); \ | |||||
| vx2 = vec_sra (vy2, shift); \ | |||||
| vx3 = vec_sra (vy3, shift); \ | |||||
| vx4 = vec_sra (vy4, shift); \ | |||||
| vx5 = vec_sra (vy5, shift); \ | |||||
| vx6 = vec_sra (vy6, shift); \ | |||||
| #define IDCT \ | |||||
| vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ | |||||
| vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ | |||||
| vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \ | |||||
| vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \ | |||||
| vector_u16_t shift; \ | |||||
| \ | |||||
| c4 = vec_splat (constants[0], 0); \ | |||||
| a0 = vec_splat (constants[0], 1); \ | |||||
| a1 = vec_splat (constants[0], 2); \ | |||||
| a2 = vec_splat (constants[0], 3); \ | |||||
| mc4 = vec_splat (constants[0], 4); \ | |||||
| ma2 = vec_splat (constants[0], 5); \ | |||||
| bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3); \ | |||||
| \ | |||||
| zero = vec_splat_s16 (0); \ | |||||
| shift = vec_splat_u16 (4); \ | |||||
| \ | |||||
| vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \ | |||||
| vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero); \ | |||||
| vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero); \ | |||||
| vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero); \ | |||||
| vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero); \ | |||||
| vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero); \ | |||||
| vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero); \ | |||||
| vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero); \ | |||||
| \ | |||||
| IDCT_HALF \ | |||||
| \ | |||||
| vx0 = vec_mergeh (vy0, vy4); \ | |||||
| vx1 = vec_mergel (vy0, vy4); \ | |||||
| vx2 = vec_mergeh (vy1, vy5); \ | |||||
| vx3 = vec_mergel (vy1, vy5); \ | |||||
| vx4 = vec_mergeh (vy2, vy6); \ | |||||
| vx5 = vec_mergel (vy2, vy6); \ | |||||
| vx6 = vec_mergeh (vy3, vy7); \ | |||||
| vx7 = vec_mergel (vy3, vy7); \ | |||||
| \ | |||||
| vy0 = vec_mergeh (vx0, vx4); \ | |||||
| vy1 = vec_mergel (vx0, vx4); \ | |||||
| vy2 = vec_mergeh (vx1, vx5); \ | |||||
| vy3 = vec_mergel (vx1, vx5); \ | |||||
| vy4 = vec_mergeh (vx2, vx6); \ | |||||
| vy5 = vec_mergel (vx2, vx6); \ | |||||
| vy6 = vec_mergeh (vx3, vx7); \ | |||||
| vy7 = vec_mergel (vx3, vx7); \ | |||||
| \ | |||||
| vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \ | |||||
| vx1 = vec_mergel (vy0, vy4); \ | |||||
| vx2 = vec_mergeh (vy1, vy5); \ | |||||
| vx3 = vec_mergel (vy1, vy5); \ | |||||
| vx4 = vec_mergeh (vy2, vy6); \ | |||||
| vx5 = vec_mergel (vy2, vy6); \ | |||||
| vx6 = vec_mergeh (vy3, vy7); \ | |||||
| vx7 = vec_mergel (vy3, vy7); \ | |||||
| \ | |||||
| IDCT_HALF \ | |||||
| \ | |||||
| shift = vec_splat_u16 (6); \ | |||||
| vx0 = vec_sra (vy0, shift); \ | |||||
| vx1 = vec_sra (vy1, shift); \ | |||||
| vx2 = vec_sra (vy2, shift); \ | |||||
| vx3 = vec_sra (vy3, shift); \ | |||||
| vx4 = vec_sra (vy4, shift); \ | |||||
| vx5 = vec_sra (vy5, shift); \ | |||||
| vx6 = vec_sra (vy6, shift); \ | |||||
| vx7 = vec_sra (vy7, shift); | vx7 = vec_sra (vy7, shift); | ||||
| @@ -180,18 +180,18 @@ POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1); | |||||
| #endif | #endif | ||||
| IDCT | IDCT | ||||
| #define COPY(dest,src) \ | |||||
| tmp = vec_packsu (src, src); \ | |||||
| vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ | |||||
| #define COPY(dest,src) \ | |||||
| tmp = vec_packsu (src, src); \ | |||||
| vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ | |||||
| vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | ||||
| COPY (dest, vx0) dest += stride; | |||||
| COPY (dest, vx1) dest += stride; | |||||
| COPY (dest, vx2) dest += stride; | |||||
| COPY (dest, vx3) dest += stride; | |||||
| COPY (dest, vx4) dest += stride; | |||||
| COPY (dest, vx5) dest += stride; | |||||
| COPY (dest, vx6) dest += stride; | |||||
| COPY (dest, vx0) dest += stride; | |||||
| COPY (dest, vx1) dest += stride; | |||||
| COPY (dest, vx2) dest += stride; | |||||
| COPY (dest, vx3) dest += stride; | |||||
| COPY (dest, vx4) dest += stride; | |||||
| COPY (dest, vx5) dest += stride; | |||||
| COPY (dest, vx6) dest += stride; | |||||
| COPY (dest, vx7) | COPY (dest, vx7) | ||||
| POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1); | POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1); | ||||
| @@ -225,22 +225,22 @@ POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1); | |||||
| perm0 = vec_mergeh (p, p0); | perm0 = vec_mergeh (p, p0); | ||||
| perm1 = vec_mergeh (p, p1); | perm1 = vec_mergeh (p, p1); | ||||
| #define ADD(dest,src,perm) \ | |||||
| /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \ | |||||
| tmp = vec_ld (0, dest); \ | |||||
| tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \ | |||||
| tmp3 = vec_adds (tmp2, src); \ | |||||
| tmp = vec_packsu (tmp3, tmp3); \ | |||||
| vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ | |||||
| #define ADD(dest,src,perm) \ | |||||
| /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \ | |||||
| tmp = vec_ld (0, dest); \ | |||||
| tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \ | |||||
| tmp3 = vec_adds (tmp2, src); \ | |||||
| tmp = vec_packsu (tmp3, tmp3); \ | |||||
| vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ | |||||
| vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | ||||
| ADD (dest, vx0, perm0) dest += stride; | |||||
| ADD (dest, vx1, perm1) dest += stride; | |||||
| ADD (dest, vx2, perm0) dest += stride; | |||||
| ADD (dest, vx3, perm1) dest += stride; | |||||
| ADD (dest, vx4, perm0) dest += stride; | |||||
| ADD (dest, vx5, perm1) dest += stride; | |||||
| ADD (dest, vx6, perm0) dest += stride; | |||||
| ADD (dest, vx0, perm0) dest += stride; | |||||
| ADD (dest, vx1, perm1) dest += stride; | |||||
| ADD (dest, vx2, perm0) dest += stride; | |||||
| ADD (dest, vx3, perm1) dest += stride; | |||||
| ADD (dest, vx4, perm0) dest += stride; | |||||
| ADD (dest, vx5, perm1) dest += stride; | |||||
| ADD (dest, vx6, perm0) dest += stride; | |||||
| ADD (dest, vx7, perm1) | ADD (dest, vx7, perm1) | ||||
| POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1); | POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1); | ||||
| @@ -152,9 +152,9 @@ int dct_quantize_altivec(MpegEncContext* s, | |||||
| } | } | ||||
| // The following block could exist as a separate an altivec dct | // The following block could exist as a separate an altivec dct | ||||
| // function. However, if we put it inline, the DCT data can remain | |||||
| // in the vector local variables, as floats, which we'll use during the | |||||
| // quantize step... | |||||
| // function. However, if we put it inline, the DCT data can remain | |||||
| // in the vector local variables, as floats, which we'll use during the | |||||
| // quantize step... | |||||
| { | { | ||||
| const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f); | const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f); | ||||
| const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f); | const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f); | ||||
| @@ -206,11 +206,11 @@ int dct_quantize_altivec(MpegEncContext* s, | |||||
| z1 = vec_madd(vec_add(tmp12, tmp13), vec_0_541196100, (vector float)zero); | z1 = vec_madd(vec_add(tmp12, tmp13), vec_0_541196100, (vector float)zero); | ||||
| // dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | // dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | ||||
| // CONST_BITS-PASS1_BITS); | |||||
| // CONST_BITS-PASS1_BITS); | |||||
| row2 = vec_madd(tmp13, vec_0_765366865, z1); | row2 = vec_madd(tmp13, vec_0_765366865, z1); | ||||
| // dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), | // dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), | ||||
| // CONST_BITS-PASS1_BITS); | |||||
| // CONST_BITS-PASS1_BITS); | |||||
| row6 = vec_madd(tmp12, vec_1_847759065, z1); | row6 = vec_madd(tmp12, vec_1_847759065, z1); | ||||
| z1 = vec_add(tmp4, tmp7); // z1 = tmp4 + tmp7; | z1 = vec_add(tmp4, tmp7); // z1 = tmp4 + tmp7; | ||||
| @@ -315,7 +315,7 @@ int dct_quantize_altivec(MpegEncContext* s, | |||||
| } | } | ||||
| // Load the bias vector (We add 0.5 to the bias so that we're | // Load the bias vector (We add 0.5 to the bias so that we're | ||||
| // rounding when we convert to int, instead of flooring.) | |||||
| // rounding when we convert to int, instead of flooring.) | |||||
| { | { | ||||
| vector signed int biasInt; | vector signed int biasInt; | ||||
| const vector float negOneFloat = (vector float)FOUROF(-1.0f); | const vector float negOneFloat = (vector float)FOUROF(-1.0f); | ||||
| @@ -80,7 +80,7 @@ static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size) | |||||
| "pextlb $10, $0, $10 \n\t" | "pextlb $10, $0, $10 \n\t" | ||||
| "sq $10, 80(%1) \n\t" | "sq $10, 80(%1) \n\t" | ||||
| "pextlb $8, $0, $8 \n\t" | "pextlb $8, $0, $8 \n\t" | ||||
| "sq $8, 96(%1) \n\t" | |||||
| "sq $8, 96(%1) \n\t" | |||||
| "pextlb $9, $0, $9 \n\t" | "pextlb $9, $0, $9 \n\t" | ||||
| "sq $9, 112(%1) \n\t" | "sq $9, 112(%1) \n\t" | ||||
| ".set pop \n\t" | ".set pop \n\t" | ||||
| @@ -112,7 +112,7 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz | |||||
| asm volatile ( | asm volatile ( | ||||
| ".set push \n\t" | ".set push \n\t" | ||||
| ".set mips3 \n\t" | ".set mips3 \n\t" | ||||
| "1: \n\t" | |||||
| "1: \n\t" | |||||
| "ldr $8, 0(%1) \n\t" | "ldr $8, 0(%1) \n\t" | ||||
| "add $11, %1, %3 \n\t" | "add $11, %1, %3 \n\t" | ||||
| "ldl $8, 7(%1) \n\t" | "ldl $8, 7(%1) \n\t" | ||||
| @@ -133,7 +133,7 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz | |||||
| "bgtz %2, 1b \n\t" | "bgtz %2, 1b \n\t" | ||||
| ".set pop \n\t" | ".set pop \n\t" | ||||
| : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size) | : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size) | ||||
| : "$8", "$9", "$10", "$11", "$12", "$13", "memory" ); | |||||
| : "$8", "$9", "$10", "$11", "$12", "$13", "memory" ); | |||||
| } | } | ||||
| @@ -15,32 +15,32 @@ | |||||
| #include "../dsputil.h" | #include "../dsputil.h" | ||||
| #include "mmi.h" | #include "mmi.h" | ||||
| #define BITS_INV_ACC 5 // 4 or 5 for IEEE | |||||
| #define SHIFT_INV_ROW (16 - BITS_INV_ACC) | |||||
| #define BITS_INV_ACC 5 // 4 or 5 for IEEE | |||||
| #define SHIFT_INV_ROW (16 - BITS_INV_ACC) | |||||
| #define SHIFT_INV_COL (1 + BITS_INV_ACC) | #define SHIFT_INV_COL (1 + BITS_INV_ACC) | ||||
| #define TG1 6518 | |||||
| #define TG2 13573 | |||||
| #define TG3 21895 | |||||
| #define CS4 23170 | |||||
| #define TG1 6518 | |||||
| #define TG2 13573 | |||||
| #define TG3 21895 | |||||
| #define CS4 23170 | |||||
| #define ROUNDER_0 0 | |||||
| #define ROUNDER_1 16 | |||||
| #define ROUNDER_0 0 | |||||
| #define ROUNDER_1 16 | |||||
| #define TAB_i_04 (32+0) | |||||
| #define TAB_i_17 (32+64) | |||||
| #define TAB_i_26 (32+128) | |||||
| #define TAB_i_35 (32+192) | |||||
| #define TAB_i_04 (32+0) | |||||
| #define TAB_i_17 (32+64) | |||||
| #define TAB_i_26 (32+128) | |||||
| #define TAB_i_35 (32+192) | |||||
| #define TG_1_16 (32+256+0) | |||||
| #define TG_2_16 (32+256+16) | |||||
| #define TG_3_16 (32+256+32) | |||||
| #define COS_4_16 (32+256+48) | |||||
| #define TG_1_16 (32+256+0) | |||||
| #define TG_2_16 (32+256+16) | |||||
| #define TG_3_16 (32+256+32) | |||||
| #define COS_4_16 (32+256+48) | |||||
| #define CLIPMAX (32+256+64+0) | |||||
| #define CLIPMAX (32+256+64+0) | |||||
| static short consttable[] align16 = { | static short consttable[] align16 = { | ||||
| /* rounder 0*/ // assume SHIFT_INV_ROW == 11 | |||||
| /* rounder 0*/ // assume SHIFT_INV_ROW == 11 | |||||
| 0x3ff, 1, 0x3ff, 1, 0x3ff, 1, 0x3ff, 1, | 0x3ff, 1, 0x3ff, 1, 0x3ff, 1, 0x3ff, 1, | ||||
| /* rounder 1*/ | /* rounder 1*/ | ||||
| 0x3ff, 0, 0x3ff, 0, 0x3ff, 0, 0x3ff, 0, | 0x3ff, 0, 0x3ff, 0, 0x3ff, 0, 0x3ff, 0, | ||||
| @@ -75,274 +75,274 @@ static short consttable[] align16 = { | |||||
| #define DCT_8_INV_ROW1(blk, rowoff, taboff, rnd, outreg) { \ | #define DCT_8_INV_ROW1(blk, rowoff, taboff, rnd, outreg) { \ | ||||
| lq(blk, rowoff, $16); /* r16 = x7 x5 x3 x1 x6 x4 x2 x0 */ \ | |||||
| /*slot*/ \ | |||||
| lq($24, 0+taboff, $17); /* r17 = w */ \ | |||||
| /*delay slot $16*/ \ | |||||
| lq($24, 16+taboff, $18);/* r18 = w */ \ | |||||
| prevh($16, $2); /* r2 = x1 x3 x5 x7 x0 x2 x4 x6 */ \ | |||||
| lq($24, 32+taboff, $19);/* r19 = w */ \ | |||||
| phmadh($17, $16, $17); /* r17 = b1"b0'a1"a0' */ \ | |||||
| lq($24, 48+taboff, $20);/* r20 = w */ \ | |||||
| phmadh($18, $2, $18); /* r18 = b1'b0"a1'a0" */ \ | |||||
| phmadh($19, $16, $19); /* r19 = b3"b2'a3"a2' */ \ | |||||
| phmadh($20, $2, $20); /* r20 = b3'b2"a3'a2" */ \ | |||||
| paddw($17, $18, $17); /* r17 = (b1)(b0)(a1)(a0) */ \ | |||||
| paddw($19, $20, $19); /* r19 = (b3)(b2)(a3)(a2) */ \ | |||||
| pcpyld($19, $17, $18); /* r18 = (a3)(a2)(a1)(a0) */ \ | |||||
| pcpyud($17, $19, $20); /* r20 = (b3)(b2)(b1)(b0) */ \ | |||||
| paddw($18, rnd, $18); /* r18 = (a3)(a2)(a1)(a0) */\ | |||||
| paddw($18, $20, $17); /* r17 = ()()()(a0+b0) */ \ | |||||
| psubw($18, $20, $20); /* r20 = ()()()(a0-b0) */ \ | |||||
| psraw($17, SHIFT_INV_ROW, $17); /* r17 = (y3 y2 y1 y0) */ \ | |||||
| psraw($20, SHIFT_INV_ROW, $20); /* r20 = (y4 y5 y6 y7) */ \ | |||||
| ppach($20, $17, outreg);/* out = y4 y5 y6 y7 y3 y2 y1 y0 Note order */ \ | |||||
| lq(blk, rowoff, $16); /* r16 = x7 x5 x3 x1 x6 x4 x2 x0 */ \ | |||||
| /*slot*/ \ | |||||
| lq($24, 0+taboff, $17); /* r17 = w */ \ | |||||
| /*delay slot $16*/ \ | |||||
| lq($24, 16+taboff, $18);/* r18 = w */ \ | |||||
| prevh($16, $2); /* r2 = x1 x3 x5 x7 x0 x2 x4 x6 */ \ | |||||
| lq($24, 32+taboff, $19);/* r19 = w */ \ | |||||
| phmadh($17, $16, $17); /* r17 = b1"b0'a1"a0' */ \ | |||||
| lq($24, 48+taboff, $20);/* r20 = w */ \ | |||||
| phmadh($18, $2, $18); /* r18 = b1'b0"a1'a0" */ \ | |||||
| phmadh($19, $16, $19); /* r19 = b3"b2'a3"a2' */ \ | |||||
| phmadh($20, $2, $20); /* r20 = b3'b2"a3'a2" */ \ | |||||
| paddw($17, $18, $17); /* r17 = (b1)(b0)(a1)(a0) */ \ | |||||
| paddw($19, $20, $19); /* r19 = (b3)(b2)(a3)(a2) */ \ | |||||
| pcpyld($19, $17, $18); /* r18 = (a3)(a2)(a1)(a0) */ \ | |||||
| pcpyud($17, $19, $20); /* r20 = (b3)(b2)(b1)(b0) */ \ | |||||
| paddw($18, rnd, $18); /* r18 = (a3)(a2)(a1)(a0) */\ | |||||
| paddw($18, $20, $17); /* r17 = ()()()(a0+b0) */ \ | |||||
| psubw($18, $20, $20); /* r20 = ()()()(a0-b0) */ \ | |||||
| psraw($17, SHIFT_INV_ROW, $17); /* r17 = (y3 y2 y1 y0) */ \ | |||||
| psraw($20, SHIFT_INV_ROW, $20); /* r20 = (y4 y5 y6 y7) */ \ | |||||
| ppach($20, $17, outreg);/* out = y4 y5 y6 y7 y3 y2 y1 y0 Note order */ \ | |||||
| \ | \ | ||||
| prevh(outreg, $2); \ | |||||
| pcpyud($2, $2, $2); \ | |||||
| pcpyld($2, outreg, outreg); \ | |||||
| prevh(outreg, $2); \ | |||||
| pcpyud($2, $2, $2); \ | |||||
| pcpyld($2, outreg, outreg); \ | |||||
| } | } | ||||
| #define DCT_8_INV_COL8() \ | #define DCT_8_INV_COL8() \ | ||||
| \ | \ | ||||
| lq($24, TG_3_16, $2); /* r2 = tn3 */ \ | |||||
| lq($24, TG_3_16, $2); /* r2 = tn3 */ \ | |||||
| \ | \ | ||||
| pmulth($11, $2, $17); /* r17 = x3 * tn3 (6420) */ \ | |||||
| psraw($17, 15, $17); \ | |||||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||||
| psraw($3, 15, $3); \ | |||||
| pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \ | |||||
| psubh($17, $13, $17); /* r17 = tm35 */ \ | |||||
| pmulth($11, $2, $17); /* r17 = x3 * tn3 (6420) */ \ | |||||
| psraw($17, 15, $17); \ | |||||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||||
| psraw($3, 15, $3); \ | |||||
| pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \ | |||||
| psubh($17, $13, $17); /* r17 = tm35 */ \ | |||||
| \ | \ | ||||
| pmulth($13, $2, $18); /* r18 = x5 * tn3 (6420) */ \ | |||||
| psraw($18, 15, $18); \ | |||||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||||
| psraw($3, 15, $3); \ | |||||
| pinteh($3, $18, $18); /* r18 = x5 * tn3 */ \ | |||||
| paddh($18, $11, $18); /* r18 = tp35 */ \ | |||||
| pmulth($13, $2, $18); /* r18 = x5 * tn3 (6420) */ \ | |||||
| psraw($18, 15, $18); \ | |||||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||||
| psraw($3, 15, $3); \ | |||||
| pinteh($3, $18, $18); /* r18 = x5 * tn3 */ \ | |||||
| paddh($18, $11, $18); /* r18 = tp35 */ \ | |||||
| \ | \ | ||||
| lq($24, TG_1_16, $2); /* r2 = tn1 */ \ | |||||
| lq($24, TG_1_16, $2); /* r2 = tn1 */ \ | |||||
| \ | \ | ||||
| pmulth($15, $2, $19); /* r19 = x7 * tn1 (6420) */ \ | |||||
| psraw($19, 15, $19); \ | |||||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||||
| psraw($3, 15, $3); \ | |||||
| pinteh($3, $19, $19); /* r19 = x7 * tn1 */ \ | |||||
| paddh($19, $9, $19); /* r19 = tp17 */ \ | |||||
| pmulth($15, $2, $19); /* r19 = x7 * tn1 (6420) */ \ | |||||
| psraw($19, 15, $19); \ | |||||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||||
| psraw($3, 15, $3); \ | |||||
| pinteh($3, $19, $19); /* r19 = x7 * tn1 */ \ | |||||
| paddh($19, $9, $19); /* r19 = tp17 */ \ | |||||
| \ | \ | ||||
| pmulth($9, $2, $20); /* r20 = x1 * tn1 (6420) */ \ | |||||
| psraw($20, 15, $20); \ | |||||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||||
| psraw($3, 15, $3); \ | |||||
| pinteh($3, $20, $20); /* r20 = x1 * tn1 */ \ | |||||
| psubh($20, $15, $20); /* r20 = tm17 */ \ | |||||
| pmulth($9, $2, $20); /* r20 = x1 * tn1 (6420) */ \ | |||||
| psraw($20, 15, $20); \ | |||||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||||
| psraw($3, 15, $3); \ | |||||
| pinteh($3, $20, $20); /* r20 = x1 * tn1 */ \ | |||||
| psubh($20, $15, $20); /* r20 = tm17 */ \ | |||||
| \ | \ | ||||
| psubh($19, $18, $3); /* r3 = t1 */ \ | |||||
| paddh($20, $17, $16); /* r16 = t2 */ \ | |||||
| psubh($20, $17, $23); /* r23 = b3 */ \ | |||||
| paddh($19, $18, $20); /* r20 = b0 */ \ | |||||
| psubh($19, $18, $3); /* r3 = t1 */ \ | |||||
| paddh($20, $17, $16); /* r16 = t2 */ \ | |||||
| psubh($20, $17, $23); /* r23 = b3 */ \ | |||||
| paddh($19, $18, $20); /* r20 = b0 */ \ | |||||
| \ | \ | ||||
| lq($24, COS_4_16, $2); /* r2 = cs4 */ \ | |||||
| lq($24, COS_4_16, $2); /* r2 = cs4 */ \ | |||||
| \ | \ | ||||
| paddh($3, $16, $21); /* r21 = t1+t2 */ \ | |||||
| psubh($3, $16, $22); /* r22 = t1-t2 */ \ | |||||
| paddh($3, $16, $21); /* r21 = t1+t2 */ \ | |||||
| psubh($3, $16, $22); /* r22 = t1-t2 */ \ | |||||
| \ | \ | ||||
| pmulth($21, $2, $21); /* r21 = cs4 * (t1+t2) 6420 */ \ | |||||
| psraw($21, 15, $21); \ | |||||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||||
| psraw($3, 15, $3); \ | |||||
| pinteh($3, $21, $21); /* r21 = b1 */ \ | |||||
| pmulth($21, $2, $21); /* r21 = cs4 * (t1+t2) 6420 */ \ | |||||
| psraw($21, 15, $21); \ | |||||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||||
| psraw($3, 15, $3); \ | |||||
| pinteh($3, $21, $21); /* r21 = b1 */ \ | |||||
| \ | \ | ||||
| pmulth($22, $2, $22); /* r22 = cs4 * (t1-t2) 6420 */ \ | |||||
| psraw($22, 15, $22); \ | |||||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||||
| psraw($3, 15, $3); \ | |||||
| pinteh($3, $22, $22); /* r22 = b2 */ \ | |||||
| pmulth($22, $2, $22); /* r22 = cs4 * (t1-t2) 6420 */ \ | |||||
| psraw($22, 15, $22); \ | |||||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||||
| psraw($3, 15, $3); \ | |||||
| pinteh($3, $22, $22); /* r22 = b2 */ \ | |||||
| \ | \ | ||||
| lq($24, TG_2_16, $2); /* r2 = tn2 */ \ | |||||
| lq($24, TG_2_16, $2); /* r2 = tn2 */ \ | |||||
| \ | \ | ||||
| pmulth($10, $2, $17); /* r17 = x2 * tn2 (6420) */ \ | |||||
| psraw($17, 15, $17); \ | |||||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||||
| psraw($3, 15, $3); \ | |||||
| pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \ | |||||
| psubh($17, $14, $17); /* r17 = tm26 */ \ | |||||
| pmulth($10, $2, $17); /* r17 = x2 * tn2 (6420) */ \ | |||||
| psraw($17, 15, $17); \ | |||||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||||
| psraw($3, 15, $3); \ | |||||
| pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \ | |||||
| psubh($17, $14, $17); /* r17 = tm26 */ \ | |||||
| \ | \ | ||||
| pmulth($14, $2, $18); /* r18 = x6 * tn2 (6420) */ \ | |||||
| psraw($18, 15, $18); \ | |||||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||||
| psraw($3, 15, $3); \ | |||||
| pinteh($3, $18, $18); /* r18 = x6 * tn2 */ \ | |||||
| paddh($18, $10, $18); /* r18 = tp26 */ \ | |||||
| pmulth($14, $2, $18); /* r18 = x6 * tn2 (6420) */ \ | |||||
| psraw($18, 15, $18); \ | |||||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||||
| psraw($3, 15, $3); \ | |||||
| pinteh($3, $18, $18); /* r18 = x6 * tn2 */ \ | |||||
| paddh($18, $10, $18); /* r18 = tp26 */ \ | |||||
| \ | \ | ||||
| paddh($8, $12, $2); /* r2 = tp04 */ \ | |||||
| psubh($8, $12, $3); /* r3 = tm04 */ \ | |||||
| paddh($8, $12, $2); /* r2 = tp04 */ \ | |||||
| psubh($8, $12, $3); /* r3 = tm04 */ \ | |||||
| \ | \ | ||||
| paddh($2, $18, $16); /* r16 = a0 */ \ | |||||
| psubh($2, $18, $19); /* r19 = a3 */ \ | |||||
| psubh($3, $17, $18); /* r18 = a2 */ \ | |||||
| paddh($3, $17, $17); /* r17 = a1 */ | |||||
| paddh($2, $18, $16); /* r16 = a0 */ \ | |||||
| psubh($2, $18, $19); /* r19 = a3 */ \ | |||||
| psubh($3, $17, $18); /* r18 = a2 */ \ | |||||
| paddh($3, $17, $17); /* r17 = a1 */ | |||||
| #define DCT_8_INV_COL8_STORE(blk) \ | #define DCT_8_INV_COL8_STORE(blk) \ | ||||
| \ | \ | ||||
| paddh($16, $20, $2); /* y0 a0+b0 */ \ | |||||
| psubh($16, $20, $16); /* y7 a0-b0 */ \ | |||||
| psrah($2, SHIFT_INV_COL, $2); \ | |||||
| psrah($16, SHIFT_INV_COL, $16); \ | |||||
| sq($2, 0, blk); \ | |||||
| sq($16, 112, blk); \ | |||||
| paddh($16, $20, $2); /* y0 a0+b0 */ \ | |||||
| psubh($16, $20, $16); /* y7 a0-b0 */ \ | |||||
| psrah($2, SHIFT_INV_COL, $2); \ | |||||
| psrah($16, SHIFT_INV_COL, $16); \ | |||||
| sq($2, 0, blk); \ | |||||
| sq($16, 112, blk); \ | |||||
| \ | \ | ||||
| paddh($17, $21, $3); /* y1 a1+b1 */ \ | |||||
| psubh($17, $21, $17); /* y6 a1-b1 */ \ | |||||
| psrah($3, SHIFT_INV_COL, $3); \ | |||||
| psrah($17, SHIFT_INV_COL, $17); \ | |||||
| sq($3, 16, blk); \ | |||||
| sq($17, 96, blk); \ | |||||
| paddh($17, $21, $3); /* y1 a1+b1 */ \ | |||||
| psubh($17, $21, $17); /* y6 a1-b1 */ \ | |||||
| psrah($3, SHIFT_INV_COL, $3); \ | |||||
| psrah($17, SHIFT_INV_COL, $17); \ | |||||
| sq($3, 16, blk); \ | |||||
| sq($17, 96, blk); \ | |||||
| \ | \ | ||||
| paddh($18, $22, $2); /* y2 a2+b2 */ \ | |||||
| psubh($18, $22, $18); /* y5 a2-b2 */ \ | |||||
| psrah($2, SHIFT_INV_COL, $2); \ | |||||
| psrah($18, SHIFT_INV_COL, $18); \ | |||||
| sq($2, 32, blk); \ | |||||
| sq($18, 80, blk); \ | |||||
| paddh($18, $22, $2); /* y2 a2+b2 */ \ | |||||
| psubh($18, $22, $18); /* y5 a2-b2 */ \ | |||||
| psrah($2, SHIFT_INV_COL, $2); \ | |||||
| psrah($18, SHIFT_INV_COL, $18); \ | |||||
| sq($2, 32, blk); \ | |||||
| sq($18, 80, blk); \ | |||||
| \ | \ | ||||
| paddh($19, $23, $3); /* y3 a3+b3 */ \ | |||||
| psubh($19, $23, $19); /* y4 a3-b3 */ \ | |||||
| psrah($3, SHIFT_INV_COL, $3); \ | |||||
| psrah($19, SHIFT_INV_COL, $19); \ | |||||
| sq($3, 48, blk); \ | |||||
| sq($19, 64, blk); | |||||
| paddh($19, $23, $3); /* y3 a3+b3 */ \ | |||||
| psubh($19, $23, $19); /* y4 a3-b3 */ \ | |||||
| psrah($3, SHIFT_INV_COL, $3); \ | |||||
| psrah($19, SHIFT_INV_COL, $19); \ | |||||
| sq($3, 48, blk); \ | |||||
| sq($19, 64, blk); | |||||
| #define DCT_8_INV_COL8_PMS() \ | #define DCT_8_INV_COL8_PMS() \ | ||||
| paddh($16, $20, $2); /* y0 a0+b0 */ \ | |||||
| psubh($16, $20, $20); /* y7 a0-b0 */ \ | |||||
| psrah($2, SHIFT_INV_COL, $16); \ | |||||
| psrah($20, SHIFT_INV_COL, $20); \ | |||||
| paddh($16, $20, $2); /* y0 a0+b0 */ \ | |||||
| psubh($16, $20, $20); /* y7 a0-b0 */ \ | |||||
| psrah($2, SHIFT_INV_COL, $16); \ | |||||
| psrah($20, SHIFT_INV_COL, $20); \ | |||||
| \ | \ | ||||
| paddh($17, $21, $3); /* y1 a1+b1 */ \ | |||||
| psubh($17, $21, $21); /* y6 a1-b1 */ \ | |||||
| psrah($3, SHIFT_INV_COL, $17); \ | |||||
| psrah($21, SHIFT_INV_COL, $21); \ | |||||
| paddh($17, $21, $3); /* y1 a1+b1 */ \ | |||||
| psubh($17, $21, $21); /* y6 a1-b1 */ \ | |||||
| psrah($3, SHIFT_INV_COL, $17); \ | |||||
| psrah($21, SHIFT_INV_COL, $21); \ | |||||
| \ | \ | ||||
| paddh($18, $22, $2); /* y2 a2+b2 */ \ | |||||
| psubh($18, $22, $22); /* y5 a2-b2 */ \ | |||||
| psrah($2, SHIFT_INV_COL, $18); \ | |||||
| psrah($22, SHIFT_INV_COL, $22); \ | |||||
| paddh($18, $22, $2); /* y2 a2+b2 */ \ | |||||
| psubh($18, $22, $22); /* y5 a2-b2 */ \ | |||||
| psrah($2, SHIFT_INV_COL, $18); \ | |||||
| psrah($22, SHIFT_INV_COL, $22); \ | |||||
| \ | \ | ||||
| paddh($19, $23, $3); /* y3 a3+b3 */ \ | |||||
| psubh($19, $23, $23); /* y4 a3-b3 */ \ | |||||
| psrah($3, SHIFT_INV_COL, $19); \ | |||||
| psrah($23, SHIFT_INV_COL, $23); | |||||
| #define PUT(rs) \ | |||||
| pminh(rs, $11, $2); \ | |||||
| pmaxh($2, $0, $2); \ | |||||
| ppacb($0, $2, $2); \ | |||||
| sd3(2, 0, 4); \ | |||||
| __asm__ __volatile__ ("add $4, $5, $4"); | |||||
| paddh($19, $23, $3); /* y3 a3+b3 */ \ | |||||
| psubh($19, $23, $23); /* y4 a3-b3 */ \ | |||||
| psrah($3, SHIFT_INV_COL, $19); \ | |||||
| psrah($23, SHIFT_INV_COL, $23); | |||||
| #define PUT(rs) \ | |||||
| pminh(rs, $11, $2); \ | |||||
| pmaxh($2, $0, $2); \ | |||||
| ppacb($0, $2, $2); \ | |||||
| sd3(2, 0, 4); \ | |||||
| __asm__ __volatile__ ("add $4, $5, $4"); | |||||
| #define DCT_8_INV_COL8_PUT() \ | #define DCT_8_INV_COL8_PUT() \ | ||||
| PUT($16); \ | |||||
| PUT($17); \ | |||||
| PUT($18); \ | |||||
| PUT($19); \ | |||||
| PUT($23); \ | |||||
| PUT($22); \ | |||||
| PUT($21); \ | |||||
| PUT($20); | |||||
| #define ADD(rs) \ | |||||
| ld3(4, 0, 2); \ | |||||
| pextlb($0, $2, $2); \ | |||||
| paddh($2, rs, $2); \ | |||||
| pminh($2, $11, $2); \ | |||||
| pmaxh($2, $0, $2); \ | |||||
| ppacb($0, $2, $2); \ | |||||
| sd3(2, 0, 4); \ | |||||
| __asm__ __volatile__ ("add $4, $5, $4"); | |||||
| PUT($16); \ | |||||
| PUT($17); \ | |||||
| PUT($18); \ | |||||
| PUT($19); \ | |||||
| PUT($23); \ | |||||
| PUT($22); \ | |||||
| PUT($21); \ | |||||
| PUT($20); | |||||
| #define ADD(rs) \ | |||||
| ld3(4, 0, 2); \ | |||||
| pextlb($0, $2, $2); \ | |||||
| paddh($2, rs, $2); \ | |||||
| pminh($2, $11, $2); \ | |||||
| pmaxh($2, $0, $2); \ | |||||
| ppacb($0, $2, $2); \ | |||||
| sd3(2, 0, 4); \ | |||||
| __asm__ __volatile__ ("add $4, $5, $4"); | |||||
| /*fixme: schedule*/ | /*fixme: schedule*/ | ||||
| #define DCT_8_INV_COL8_ADD() \ | #define DCT_8_INV_COL8_ADD() \ | ||||
| ADD($16); \ | |||||
| ADD($17); \ | |||||
| ADD($18); \ | |||||
| ADD($19); \ | |||||
| ADD($23); \ | |||||
| ADD($22); \ | |||||
| ADD($21); \ | |||||
| ADD($20); | |||||
| ADD($16); \ | |||||
| ADD($17); \ | |||||
| ADD($18); \ | |||||
| ADD($19); \ | |||||
| ADD($23); \ | |||||
| ADD($22); \ | |||||
| ADD($21); \ | |||||
| ADD($20); | |||||
| void ff_mmi_idct(int16_t * block) | void ff_mmi_idct(int16_t * block) | ||||
| { | { | ||||
| /* $4 = block */ | |||||
| __asm__ __volatile__("la $24, %0"::"m"(consttable[0])); | |||||
| lq($24, ROUNDER_0, $8); | |||||
| lq($24, ROUNDER_1, $7); | |||||
| DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8); | |||||
| DCT_8_INV_ROW1($4, 16, TAB_i_17, $7, $9); | |||||
| DCT_8_INV_ROW1($4, 32, TAB_i_26, $7, $10); | |||||
| DCT_8_INV_ROW1($4, 48, TAB_i_35, $7, $11); | |||||
| DCT_8_INV_ROW1($4, 64, TAB_i_04, $7, $12); | |||||
| DCT_8_INV_ROW1($4, 80, TAB_i_35, $7, $13); | |||||
| DCT_8_INV_ROW1($4, 96, TAB_i_26, $7, $14); | |||||
| DCT_8_INV_ROW1($4, 112, TAB_i_17, $7, $15); | |||||
| DCT_8_INV_COL8(); | |||||
| DCT_8_INV_COL8_STORE($4); | |||||
| //let savedtemp regs be saved | |||||
| __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | |||||
| /* $4 = block */ | |||||
| __asm__ __volatile__("la $24, %0"::"m"(consttable[0])); | |||||
| lq($24, ROUNDER_0, $8); | |||||
| lq($24, ROUNDER_1, $7); | |||||
| DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8); | |||||
| DCT_8_INV_ROW1($4, 16, TAB_i_17, $7, $9); | |||||
| DCT_8_INV_ROW1($4, 32, TAB_i_26, $7, $10); | |||||
| DCT_8_INV_ROW1($4, 48, TAB_i_35, $7, $11); | |||||
| DCT_8_INV_ROW1($4, 64, TAB_i_04, $7, $12); | |||||
| DCT_8_INV_ROW1($4, 80, TAB_i_35, $7, $13); | |||||
| DCT_8_INV_ROW1($4, 96, TAB_i_26, $7, $14); | |||||
| DCT_8_INV_ROW1($4, 112, TAB_i_17, $7, $15); | |||||
| DCT_8_INV_COL8(); | |||||
| DCT_8_INV_COL8_STORE($4); | |||||
| //let savedtemp regs be saved | |||||
| __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | |||||
| } | } | ||||
| void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block) | void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block) | ||||
| { | { | ||||
| /* $4 = dest, $5 = line_size, $6 = block */ | |||||
| __asm__ __volatile__("la $24, %0"::"m"(consttable[0])); | |||||
| lq($24, ROUNDER_0, $8); | |||||
| lq($24, ROUNDER_1, $7); | |||||
| DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8); | |||||
| DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9); | |||||
| DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10); | |||||
| DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11); | |||||
| DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12); | |||||
| DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13); | |||||
| DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14); | |||||
| DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15); | |||||
| DCT_8_INV_COL8(); | |||||
| lq($24, CLIPMAX, $11); | |||||
| DCT_8_INV_COL8_PMS(); | |||||
| DCT_8_INV_COL8_PUT(); | |||||
| //let savedtemp regs be saved | |||||
| __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | |||||
| /* $4 = dest, $5 = line_size, $6 = block */ | |||||
| __asm__ __volatile__("la $24, %0"::"m"(consttable[0])); | |||||
| lq($24, ROUNDER_0, $8); | |||||
| lq($24, ROUNDER_1, $7); | |||||
| DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8); | |||||
| DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9); | |||||
| DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10); | |||||
| DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11); | |||||
| DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12); | |||||
| DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13); | |||||
| DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14); | |||||
| DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15); | |||||
| DCT_8_INV_COL8(); | |||||
| lq($24, CLIPMAX, $11); | |||||
| DCT_8_INV_COL8_PMS(); | |||||
| DCT_8_INV_COL8_PUT(); | |||||
| //let savedtemp regs be saved | |||||
| __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | |||||
| } | } | ||||
| void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | ||||
| { | { | ||||
| /* $4 = dest, $5 = line_size, $6 = block */ | |||||
| __asm__ __volatile__("la $24, %0"::"m"(consttable[0])); | |||||
| lq($24, ROUNDER_0, $8); | |||||
| lq($24, ROUNDER_1, $7); | |||||
| DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8); | |||||
| DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9); | |||||
| DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10); | |||||
| DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11); | |||||
| DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12); | |||||
| DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13); | |||||
| DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14); | |||||
| DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15); | |||||
| DCT_8_INV_COL8(); | |||||
| lq($24, CLIPMAX, $11); | |||||
| DCT_8_INV_COL8_PMS(); | |||||
| DCT_8_INV_COL8_ADD(); | |||||
| //let savedtemp regs be saved | |||||
| __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | |||||
| /* $4 = dest, $5 = line_size, $6 = block */ | |||||
| __asm__ __volatile__("la $24, %0"::"m"(consttable[0])); | |||||
| lq($24, ROUNDER_0, $8); | |||||
| lq($24, ROUNDER_1, $7); | |||||
| DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8); | |||||
| DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9); | |||||
| DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10); | |||||
| DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11); | |||||
| DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12); | |||||
| DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13); | |||||
| DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14); | |||||
| DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15); | |||||
| DCT_8_INV_COL8(); | |||||
| lq($24, CLIPMAX, $11); | |||||
| DCT_8_INV_COL8_PMS(); | |||||
| DCT_8_INV_COL8_ADD(); | |||||
| //let savedtemp regs be saved | |||||
| __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | |||||
| } | } | ||||
| @@ -5,148 +5,148 @@ | |||||
| /* | /* | ||||
| #define r0 $zero | #define r0 $zero | ||||
| #define r1 $at //assembler! | |||||
| #define r2 $v0 //return | |||||
| #define r3 $v1 //return | |||||
| #define r4 $a0 //arg | |||||
| #define r5 $a1 //arg | |||||
| #define r6 $a2 //arg | |||||
| #define r7 $a3 //arg | |||||
| #define r8 $t0 //temp | |||||
| #define r9 $t1 //temp | |||||
| #define r10 $t2 //temp | |||||
| #define r11 $t3 //temp | |||||
| #define r12 $t4 //temp | |||||
| #define r13 $t5 //temp | |||||
| #define r14 $t6 //temp | |||||
| #define r15 $t7 //temp | |||||
| #define r16 $s0 //saved temp | |||||
| #define r17 $s1 //saved temp | |||||
| #define r18 $s2 //saved temp | |||||
| #define r19 $s3 //saved temp | |||||
| #define r20 $s4 //saved temp | |||||
| #define r21 $s5 //saved temp | |||||
| #define r22 $s6 //saved temp | |||||
| #define r23 $s7 //saved temp | |||||
| #define r24 $t8 //temp | |||||
| #define r25 $t9 //temp | |||||
| #define r26 $k0 //kernel | |||||
| #define r27 $k1 //kernel | |||||
| #define r28 $gp //global ptr | |||||
| #define r29 $sp //stack ptr | |||||
| #define r30 $fp //frame ptr | |||||
| #define r31 $ra //return addr | |||||
| #define r1 $at //assembler! | |||||
| #define r2 $v0 //return | |||||
| #define r3 $v1 //return | |||||
| #define r4 $a0 //arg | |||||
| #define r5 $a1 //arg | |||||
| #define r6 $a2 //arg | |||||
| #define r7 $a3 //arg | |||||
| #define r8 $t0 //temp | |||||
| #define r9 $t1 //temp | |||||
| #define r10 $t2 //temp | |||||
| #define r11 $t3 //temp | |||||
| #define r12 $t4 //temp | |||||
| #define r13 $t5 //temp | |||||
| #define r14 $t6 //temp | |||||
| #define r15 $t7 //temp | |||||
| #define r16 $s0 //saved temp | |||||
| #define r17 $s1 //saved temp | |||||
| #define r18 $s2 //saved temp | |||||
| #define r19 $s3 //saved temp | |||||
| #define r20 $s4 //saved temp | |||||
| #define r21 $s5 //saved temp | |||||
| #define r22 $s6 //saved temp | |||||
| #define r23 $s7 //saved temp | |||||
| #define r24 $t8 //temp | |||||
| #define r25 $t9 //temp | |||||
| #define r26 $k0 //kernel | |||||
| #define r27 $k1 //kernel | |||||
| #define r28 $gp //global ptr | |||||
| #define r29 $sp //stack ptr | |||||
| #define r30 $fp //frame ptr | |||||
| #define r31 $ra //return addr | |||||
| */ | */ | ||||
| #define lq(base, off, reg) \ | |||||
| __asm__ __volatile__ ("lq " #reg ", %0("#base ")" : : "i" (off) ) | |||||
| #define lq(base, off, reg) \ | |||||
| __asm__ __volatile__ ("lq " #reg ", %0("#base ")" : : "i" (off) ) | |||||
| #define lq2(mem, reg) \ | |||||
| __asm__ __volatile__ ("lq " #reg ", %0" : : "r" (mem)) | |||||
| #define lq2(mem, reg) \ | |||||
| __asm__ __volatile__ ("lq " #reg ", %0" : : "r" (mem)) | |||||
| #define sq(reg, off, base) \ | |||||
| __asm__ __volatile__ ("sq " #reg ", %0("#base ")" : : "i" (off) ) | |||||
| #define sq(reg, off, base) \ | |||||
| __asm__ __volatile__ ("sq " #reg ", %0("#base ")" : : "i" (off) ) | |||||
| /* | /* | ||||
| #define ld(base, off, reg) \ | |||||
| __asm__ __volatile__ ("ld " #reg ", " #off "("#base ")") | |||||
| #define ld(base, off, reg) \ | |||||
| __asm__ __volatile__ ("ld " #reg ", " #off "("#base ")") | |||||
| */ | */ | ||||
| #define ld3(base, off, reg) \ | |||||
| __asm__ __volatile__ (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off))) | |||||
| #define ld3(base, off, reg) \ | |||||
| __asm__ __volatile__ (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off))) | |||||
| #define ldr3(base, off, reg) \ | |||||
| __asm__ __volatile__ (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off))) | |||||
| #define ldr3(base, off, reg) \ | |||||
| __asm__ __volatile__ (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off))) | |||||
| #define ldl3(base, off, reg) \ | |||||
| __asm__ __volatile__ (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off))) | |||||
| #define ldl3(base, off, reg) \ | |||||
| __asm__ __volatile__ (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off))) | |||||
| /* | /* | ||||
| #define sd(reg, off, base) \ | |||||
| __asm__ __volatile__ ("sd " #reg ", " #off "("#base ")") | |||||
| #define sd(reg, off, base) \ | |||||
| __asm__ __volatile__ ("sd " #reg ", " #off "("#base ")") | |||||
| */ | */ | ||||
| //seems assembler has bug encoding mnemonic 'sd', so DIY | //seems assembler has bug encoding mnemonic 'sd', so DIY | ||||
| #define sd3(reg, off, base) \ | |||||
| __asm__ __volatile__ (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off))) | |||||
| #define sd3(reg, off, base) \ | |||||
| __asm__ __volatile__ (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off))) | |||||
| #define sw(reg, off, base) \ | |||||
| __asm__ __volatile__ ("sw " #reg ", " #off "("#base ")") | |||||
| #define sw(reg, off, base) \ | |||||
| __asm__ __volatile__ ("sw " #reg ", " #off "("#base ")") | |||||
| #define sq2(reg, mem) \ | |||||
| __asm__ __volatile__ ("sq " #reg ", %0" : : "m" (*(mem))) | |||||
| #define sq2(reg, mem) \ | |||||
| __asm__ __volatile__ ("sq " #reg ", %0" : : "m" (*(mem))) | |||||
| #define pinth(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pinth " #rd ", " #rs ", " #rt ) | |||||
| #define pinth(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pinth " #rd ", " #rs ", " #rt ) | |||||
| #define phmadh(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("phmadh " #rd ", " #rs ", " #rt ) | |||||
| #define phmadh(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("phmadh " #rd ", " #rs ", " #rt ) | |||||
| #define pcpyud(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pcpyud " #rd ", " #rs ", " #rt ) | |||||
| #define pcpyud(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pcpyud " #rd ", " #rs ", " #rt ) | |||||
| #define pcpyld(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pcpyld " #rd ", " #rs ", " #rt ) | |||||
| #define pcpyld(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pcpyld " #rd ", " #rs ", " #rt ) | |||||
| #define pcpyh(rt, rd) \ | |||||
| __asm__ __volatile__ ("pcpyh " #rd ", " #rt ) | |||||
| #define pcpyh(rt, rd) \ | |||||
| __asm__ __volatile__ ("pcpyh " #rd ", " #rt ) | |||||
| #define paddw(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("paddw " #rd ", " #rs ", " #rt ) | |||||
| #define paddw(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("paddw " #rd ", " #rs ", " #rt ) | |||||
| #define pextlw(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pextlw " #rd ", " #rs ", " #rt ) | |||||
| #define pextlw(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pextlw " #rd ", " #rs ", " #rt ) | |||||
| #define pextuw(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pextuw " #rd ", " #rs ", " #rt ) | |||||
| #define pextuw(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pextuw " #rd ", " #rs ", " #rt ) | |||||
| #define pextlh(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pextlh " #rd ", " #rs ", " #rt ) | |||||
| #define pextlh(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pextlh " #rd ", " #rs ", " #rt ) | |||||
| #define pextuh(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pextuh " #rd ", " #rs ", " #rt ) | |||||
| #define pextuh(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pextuh " #rd ", " #rs ", " #rt ) | |||||
| #define psubw(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("psubw " #rd ", " #rs ", " #rt ) | |||||
| #define psubw(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("psubw " #rd ", " #rs ", " #rt ) | |||||
| #define psraw(rt, sa, rd) \ | |||||
| __asm__ __volatile__ ("psraw " #rd ", " #rt ", %0" : : "i"(sa) ) | |||||
| #define psraw(rt, sa, rd) \ | |||||
| __asm__ __volatile__ ("psraw " #rd ", " #rt ", %0" : : "i"(sa) ) | |||||
| #define ppach(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("ppach " #rd ", " #rs ", " #rt ) | |||||
| #define ppach(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("ppach " #rd ", " #rs ", " #rt ) | |||||
| #define ppacb(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("ppacb " #rd ", " #rs ", " #rt ) | |||||
| #define ppacb(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("ppacb " #rd ", " #rs ", " #rt ) | |||||
| #define prevh(rt, rd) \ | |||||
| __asm__ __volatile__ ("prevh " #rd ", " #rt ) | |||||
| #define prevh(rt, rd) \ | |||||
| __asm__ __volatile__ ("prevh " #rd ", " #rt ) | |||||
| #define pmulth(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pmulth " #rd ", " #rs ", " #rt ) | |||||
| #define pmulth(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pmulth " #rd ", " #rs ", " #rt ) | |||||
| #define pmaxh(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pmaxh " #rd ", " #rs ", " #rt ) | |||||
| #define pmaxh(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pmaxh " #rd ", " #rs ", " #rt ) | |||||
| #define pminh(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pminh " #rd ", " #rs ", " #rt ) | |||||
| #define pminh(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pminh " #rd ", " #rs ", " #rt ) | |||||
| #define pinteh(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pinteh " #rd ", " #rs ", " #rt ) | |||||
| #define pinteh(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pinteh " #rd ", " #rs ", " #rt ) | |||||
| #define paddh(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("paddh " #rd ", " #rs ", " #rt ) | |||||
| #define paddh(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("paddh " #rd ", " #rs ", " #rt ) | |||||
| #define psubh(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("psubh " #rd ", " #rs ", " #rt ) | |||||
| #define psubh(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("psubh " #rd ", " #rs ", " #rt ) | |||||
| #define psrah(rt, sa, rd) \ | |||||
| __asm__ __volatile__ ("psrah " #rd ", " #rt ", %0" : : "i"(sa) ) | |||||
| #define psrah(rt, sa, rd) \ | |||||
| __asm__ __volatile__ ("psrah " #rd ", " #rt ", %0" : : "i"(sa) ) | |||||
| #define pmfhl_uw(rd) \ | |||||
| __asm__ __volatile__ ("pmfhl.uw " #rd) | |||||
| #define pmfhl_uw(rd) \ | |||||
| __asm__ __volatile__ ("pmfhl.uw " #rd) | |||||
| #define pextlb(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pextlb " #rd ", " #rs ", " #rt ) | |||||
| #define pextlb(rs, rt, rd) \ | |||||
| __asm__ __volatile__ ("pextlb " #rd ", " #rs ", " #rt ) | |||||
| #endif | #endif | ||||
| @@ -41,7 +41,7 @@ static void dct_unquantize_h263_mmi(MpegEncContext *s, | |||||
| level = block[0] * s->c_dc_scale; | level = block[0] * s->c_dc_scale; | ||||
| }else { | }else { | ||||
| qadd = 0; | qadd = 0; | ||||
| level = block[0]; | |||||
| level = block[0]; | |||||
| } | } | ||||
| nCoeffs= 63; //does not allways use zigzag table | nCoeffs= 63; //does not allways use zigzag table | ||||
| } else { | } else { | ||||
| @@ -49,29 +49,29 @@ static void dct_unquantize_h263_mmi(MpegEncContext *s, | |||||
| } | } | ||||
| asm volatile( | asm volatile( | ||||
| "add $14, $0, %3 \n\t" | |||||
| "pcpyld $8, %0, %0 \n\t" | |||||
| "pcpyh $8, $8 \n\t" //r8 = qmul | |||||
| "pcpyld $9, %1, %1 \n\t" | |||||
| "pcpyh $9, $9 \n\t" //r9 = qadd | |||||
| "add $14, $0, %3 \n\t" | |||||
| "pcpyld $8, %0, %0 \n\t" | |||||
| "pcpyh $8, $8 \n\t" //r8 = qmul | |||||
| "pcpyld $9, %1, %1 \n\t" | |||||
| "pcpyh $9, $9 \n\t" //r9 = qadd | |||||
| ".p2align 2 \n\t" | ".p2align 2 \n\t" | ||||
| "1: \n\t" | |||||
| "lq $10, 0($14) \n\t" //r10 = level | |||||
| "addi $14, $14, 16 \n\t" //block+=8 | |||||
| "addi %2, %2, -8 \n\t" | |||||
| "pcgth $11, $0, $10 \n\t" //r11 = level < 0 ? -1 : 0 | |||||
| "pcgth $12, $10, $0 \n\t" //r12 = level > 0 ? -1 : 0 | |||||
| "por $12, $11, $12 \n\t" | |||||
| "pmulth $10, $10, $8 \n\t" | |||||
| "paddh $13, $9, $11 \n\t" | |||||
| "1: \n\t" | |||||
| "lq $10, 0($14) \n\t" //r10 = level | |||||
| "addi $14, $14, 16 \n\t" //block+=8 | |||||
| "addi %2, %2, -8 \n\t" | |||||
| "pcgth $11, $0, $10 \n\t" //r11 = level < 0 ? -1 : 0 | |||||
| "pcgth $12, $10, $0 \n\t" //r12 = level > 0 ? -1 : 0 | |||||
| "por $12, $11, $12 \n\t" | |||||
| "pmulth $10, $10, $8 \n\t" | |||||
| "paddh $13, $9, $11 \n\t" | |||||
| "pxor $13, $13, $11 \n\t" //r13 = level < 0 ? -qadd : qadd | "pxor $13, $13, $11 \n\t" //r13 = level < 0 ? -qadd : qadd | ||||
| "pmfhl.uw $11 \n\t" | |||||
| "pinteh $10, $11, $10 \n\t" //r10 = level * qmul | |||||
| "paddh $10, $10, $13 \n\t" | |||||
| "pmfhl.uw $11 \n\t" | |||||
| "pinteh $10, $11, $10 \n\t" //r10 = level * qmul | |||||
| "paddh $10, $10, $13 \n\t" | |||||
| "pand $10, $10, $12 \n\t" | "pand $10, $10, $12 \n\t" | ||||
| "sq $10, -16($14) \n\t" | |||||
| "bgez %2, 1b \n\t" | |||||
| :: "r"(qmul), "r" (qadd), "r" (nCoeffs), "r" (block) : "$8", "$9", "$10", "$11", "$12", "$13", "$14", "memory" ); | |||||
| "sq $10, -16($14) \n\t" | |||||
| "bgez %2, 1b \n\t" | |||||
| :: "r"(qmul), "r" (qadd), "r" (nCoeffs), "r" (block) : "$8", "$9", "$10", "$11", "$12", "$13", "$14", "memory" ); | |||||
| if(s->mb_intra) | if(s->mb_intra) | ||||
| block[0]= level; | block[0]= level; | ||||