Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -96,7 +96,7 @@ unknown_opt: | |||
| if(po->u.func2_arg(opt+1, arg)<0) | |||
| goto unknown_opt; | |||
| } else { | |||
| po->u.func_arg(arg); | |||
| po->u.func_arg(arg); | |||
| } | |||
| } else { | |||
| parse_arg_file(opt); | |||
| @@ -122,8 +122,8 @@ void print_error(const char *filename, int err) | |||
| break; | |||
| case AVERROR_IO: | |||
| fprintf(stderr, "%s: I/O error occured\n" | |||
| "Usually that means that input file is truncated and/or corrupted.\n", | |||
| filename); | |||
| "Usually that means that input file is truncated and/or corrupted.\n", | |||
| filename); | |||
| break; | |||
| case AVERROR_NOMEM: | |||
| fprintf(stderr, "%s: memory allocation error occured\n", filename); | |||
| @@ -688,26 +688,26 @@ fi | |||
| needmdynamicnopic="no" | |||
| if test $targetos = Darwin; then | |||
| if test -n "`$cc -v 2>&1 | grep xlc`"; then | |||
| CFLAGS="$CFLAGS -qpdf2 -qlanglvl=extc99 -qmaxmem=-1 -qarch=auto -qtune=auto" | |||
| CFLAGS="$CFLAGS -qpdf2 -qlanglvl=extc99 -qmaxmem=-1 -qarch=auto -qtune=auto" | |||
| else | |||
| gcc_version="`$cc -v 2>&1 | grep version | cut -d ' ' -f3-`" | |||
| case "$gcc_version" in | |||
| *2.95*) | |||
| CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer" | |||
| ;; | |||
| *[34].*) | |||
| CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer -force_cpusubtype_ALL -Wno-sign-compare" | |||
| if test "$lshared" = no; then | |||
| needmdynamicnopic="yes" | |||
| fi | |||
| ;; | |||
| *) | |||
| CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer" | |||
| if test "$lshared" = no; then | |||
| needmdynamicnopic="yes" | |||
| fi | |||
| ;; | |||
| esac | |||
| gcc_version="`$cc -v 2>&1 | grep version | cut -d ' ' -f3-`" | |||
| case "$gcc_version" in | |||
| *2.95*) | |||
| CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer" | |||
| ;; | |||
| *[34].*) | |||
| CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer -force_cpusubtype_ALL -Wno-sign-compare" | |||
| if test "$lshared" = no; then | |||
| needmdynamicnopic="yes" | |||
| fi | |||
| ;; | |||
| *) | |||
| CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer" | |||
| if test "$lshared" = no; then | |||
| needmdynamicnopic="yes" | |||
| fi | |||
| ;; | |||
| esac | |||
| fi | |||
| fi | |||
| @@ -725,62 +725,62 @@ TUNECPU="generic" | |||
| POWERPCMODE="32bits" | |||
| if test $tune != "generic"; then | |||
| case $tune in | |||
| 601|ppc601|PowerPC601) | |||
| CFLAGS="$CFLAGS -mcpu=601" | |||
| if test $altivec = "yes"; then | |||
| echo "WARNING: Tuning for PPC601 but AltiVec enabled!"; | |||
| fi | |||
| TUNECPU=ppc601 | |||
| ;; | |||
| 603*|ppc603*|PowerPC603*) | |||
| CFLAGS="$CFLAGS -mcpu=603" | |||
| if test $altivec = "yes"; then | |||
| echo "WARNING: Tuning for PPC603 but AltiVec enabled!"; | |||
| fi | |||
| TUNECPU=ppc603 | |||
| ;; | |||
| 604*|ppc604*|PowerPC604*) | |||
| CFLAGS="$CFLAGS -mcpu=604" | |||
| if test $altivec = "yes"; then | |||
| echo "WARNING: Tuning for PPC604 but AltiVec enabled!"; | |||
| fi | |||
| TUNECPU=ppc604 | |||
| ;; | |||
| G3|g3|75*|ppc75*|PowerPC75*) | |||
| CFLAGS="$CFLAGS -mcpu=750 -mtune=750 -mpowerpc-gfxopt" | |||
| if test $altivec = "yes"; then | |||
| echo "WARNING: Tuning for PPC75x but AltiVec enabled!"; | |||
| fi | |||
| TUNECPU=ppc750 | |||
| ;; | |||
| G4|g4|745*|ppc745*|PowerPC745*) | |||
| CFLAGS="$CFLAGS -mcpu=7450 -mtune=7450 -mpowerpc-gfxopt" | |||
| if test $altivec = "no"; then | |||
| echo "WARNING: Tuning for PPC745x but AltiVec disabled!"; | |||
| fi | |||
| TUNECPU=ppc7450 | |||
| ;; | |||
| 74*|ppc74*|PowerPC74*) | |||
| CFLAGS="$CFLAGS -mcpu=7400 -mtune=7400 -mpowerpc-gfxopt" | |||
| if test $altivec = "no"; then | |||
| echo "WARNING: Tuning for PPC74xx but AltiVec disabled!"; | |||
| fi | |||
| TUNECPU=ppc7400 | |||
| ;; | |||
| G5|g5|970|ppc970|PowerPC970|power4*|Power4*) | |||
| CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc-gfxopt -mpowerpc64" | |||
| if test $altivec = "no"; then | |||
| echo "WARNING: Tuning for PPC970 but AltiVec disabled!"; | |||
| fi | |||
| TUNECPU=ppc970 | |||
| 601|ppc601|PowerPC601) | |||
| CFLAGS="$CFLAGS -mcpu=601" | |||
| if test $altivec = "yes"; then | |||
| echo "WARNING: Tuning for PPC601 but AltiVec enabled!"; | |||
| fi | |||
| TUNECPU=ppc601 | |||
| ;; | |||
| 603*|ppc603*|PowerPC603*) | |||
| CFLAGS="$CFLAGS -mcpu=603" | |||
| if test $altivec = "yes"; then | |||
| echo "WARNING: Tuning for PPC603 but AltiVec enabled!"; | |||
| fi | |||
| TUNECPU=ppc603 | |||
| ;; | |||
| 604*|ppc604*|PowerPC604*) | |||
| CFLAGS="$CFLAGS -mcpu=604" | |||
| if test $altivec = "yes"; then | |||
| echo "WARNING: Tuning for PPC604 but AltiVec enabled!"; | |||
| fi | |||
| TUNECPU=ppc604 | |||
| ;; | |||
| G3|g3|75*|ppc75*|PowerPC75*) | |||
| CFLAGS="$CFLAGS -mcpu=750 -mtune=750 -mpowerpc-gfxopt" | |||
| if test $altivec = "yes"; then | |||
| echo "WARNING: Tuning for PPC75x but AltiVec enabled!"; | |||
| fi | |||
| TUNECPU=ppc750 | |||
| ;; | |||
| G4|g4|745*|ppc745*|PowerPC745*) | |||
| CFLAGS="$CFLAGS -mcpu=7450 -mtune=7450 -mpowerpc-gfxopt" | |||
| if test $altivec = "no"; then | |||
| echo "WARNING: Tuning for PPC745x but AltiVec disabled!"; | |||
| fi | |||
| TUNECPU=ppc7450 | |||
| ;; | |||
| 74*|ppc74*|PowerPC74*) | |||
| CFLAGS="$CFLAGS -mcpu=7400 -mtune=7400 -mpowerpc-gfxopt" | |||
| if test $altivec = "no"; then | |||
| echo "WARNING: Tuning for PPC74xx but AltiVec disabled!"; | |||
| fi | |||
| TUNECPU=ppc7400 | |||
| ;; | |||
| G5|g5|970|ppc970|PowerPC970|power4*|Power4*) | |||
| CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc-gfxopt -mpowerpc64" | |||
| if test $altivec = "no"; then | |||
| echo "WARNING: Tuning for PPC970 but AltiVec disabled!"; | |||
| fi | |||
| TUNECPU=ppc970 | |||
| POWERPCMODE="64bits" | |||
| ;; | |||
| i[3456]86|pentium|pentiumpro|pentium-mmx|pentium[234]|prescott|k6|k6-[23]|athlon|athlon-tbird|athlon-4|athlon-[mx]p|winchip-c6|winchip2|c3|nocona|athlon64|k8|opteron|athlon-fx) | |||
| CFLAGS="$CFLAGS -march=$tune" | |||
| ;; | |||
| *) | |||
| echo "WARNING: Unknown CPU \"$tune\", ignored." | |||
| ;; | |||
| ;; | |||
| i[3456]86|pentium|pentiumpro|pentium-mmx|pentium[234]|prescott|k6|k6-[23]|athlon|athlon-tbird|athlon-4|athlon-[mx]p|winchip-c6|winchip2|c3|nocona|athlon64|k8|opteron|athlon-fx) | |||
| CFLAGS="$CFLAGS -march=$tune" | |||
| ;; | |||
| *) | |||
| echo "WARNING: Unknown CPU \"$tune\", ignored." | |||
| ;; | |||
| esac | |||
| fi | |||
| @@ -876,8 +876,8 @@ if test -z "$cross_prefix" ; then | |||
| cat > $TMPC << EOF | |||
| #include <inttypes.h> | |||
| int main(int argc, char ** argv){ | |||
| volatile uint32_t i=0x01234567; | |||
| return (*((uint8_t*)(&i))) == 0x67; | |||
| volatile uint32_t i=0x01234567; | |||
| return (*((uint8_t*)(&i))) == 0x67; | |||
| } | |||
| EOF | |||
| @@ -912,8 +912,8 @@ $cc -o $TMPE $TMPC 2>/dev/null || inttypes="no" | |||
| cat > $TMPC << EOF | |||
| #include <inttypes.h> | |||
| int main(int argc, char ** argv){ | |||
| volatile uint_fast64_t i=0x01234567; | |||
| return 0; | |||
| volatile uint_fast64_t i=0x01234567; | |||
| return 0; | |||
| } | |||
| EOF | |||
| @@ -1152,10 +1152,10 @@ fi | |||
| case "`$cc -v 2>&1 | grep version`" in | |||
| *gcc*) | |||
| CFLAGS="-Wall -Wno-switch $CFLAGS" | |||
| ;; | |||
| CFLAGS="-Wall -Wno-switch $CFLAGS" | |||
| ;; | |||
| *) | |||
| ;; | |||
| ;; | |||
| esac | |||
| if test "$sdl" = "no" ; then | |||
| @@ -1163,7 +1163,7 @@ if test "$sdl" = "no" ; then | |||
| fi | |||
| if test "$debug" = "yes"; then | |||
| CFLAGS="-g $CFLAGS" | |||
| CFLAGS="-g $CFLAGS" | |||
| fi | |||
| if test "$optimize" = "small"; then | |||
| @@ -1173,10 +1173,10 @@ fi | |||
| if test "$optimize" = "yes"; then | |||
| if test -n "`$cc -v 2>&1 | grep xlc`"; then | |||
| CFLAGS="$CFLAGS -O5" | |||
| LDFLAGS="$LDFLAGS -O5" | |||
| CFLAGS="$CFLAGS -O5" | |||
| LDFLAGS="$LDFLAGS -O5" | |||
| else | |||
| CFLAGS="-O3 $CFLAGS" | |||
| CFLAGS="-O3 $CFLAGS" | |||
| fi | |||
| fi | |||
| @@ -1793,9 +1793,9 @@ done | |||
| diff $TMPH config.h >/dev/null 2>&1 | |||
| if test $? -ne 0 ; then | |||
| mv -f $TMPH config.h | |||
| mv -f $TMPH config.h | |||
| else | |||
| echo "config.h is unchanged" | |||
| echo "config.h is unchanged" | |||
| fi | |||
| rm -f $TMPO $TMPC $TMPE $TMPS $TMPH | |||
| @@ -25,37 +25,37 @@ main(int argc, char *argv[]) | |||
| if (argc < 3) | |||
| { | |||
| printf("Usage: %s <infile.swf> <outfile.swf>\n", argv[0]); | |||
| exit(1); | |||
| printf("Usage: %s <infile.swf> <outfile.swf>\n", argv[0]); | |||
| exit(1); | |||
| } | |||
| fd_in = open(argv[1], O_RDONLY); | |||
| if (fd_in < 0) | |||
| { | |||
| perror("Error while opening: "); | |||
| exit(1); | |||
| perror("Error while opening: "); | |||
| exit(1); | |||
| } | |||
| fd_out = open(argv[2], O_WRONLY|O_CREAT, 00644); | |||
| if (fd_out < 0) | |||
| { | |||
| perror("Error while opening: "); | |||
| close(fd_in); | |||
| exit(1); | |||
| perror("Error while opening: "); | |||
| close(fd_in); | |||
| exit(1); | |||
| } | |||
| if (read(fd_in, &buf_in, 8) != 8) | |||
| { | |||
| printf("Header error\n"); | |||
| close(fd_in); | |||
| close(fd_out); | |||
| exit(1); | |||
| printf("Header error\n"); | |||
| close(fd_in); | |||
| close(fd_out); | |||
| exit(1); | |||
| } | |||
| if (buf_in[0] != 'C' || buf_in[1] != 'W' || buf_in[2] != 'S') | |||
| { | |||
| printf("Not a compressed flash file\n"); | |||
| exit(1); | |||
| printf("Not a compressed flash file\n"); | |||
| exit(1); | |||
| } | |||
| fstat(fd_in, &statbuf); | |||
| @@ -75,48 +75,48 @@ main(int argc, char *argv[]) | |||
| for (i = 0; i < comp_len-4;) | |||
| { | |||
| int ret, len = read(fd_in, &buf_in, 1024); | |||
| int ret, len = read(fd_in, &buf_in, 1024); | |||
| dbgprintf("read %d bytes\n", len); | |||
| dbgprintf("read %d bytes\n", len); | |||
| last_out = zstream.total_out; | |||
| last_out = zstream.total_out; | |||
| zstream.next_in = &buf_in[0]; | |||
| zstream.avail_in = len; | |||
| zstream.next_out = &buf_out[0]; | |||
| zstream.avail_out = 1024; | |||
| zstream.next_in = &buf_in[0]; | |||
| zstream.avail_in = len; | |||
| zstream.next_out = &buf_out[0]; | |||
| zstream.avail_out = 1024; | |||
| ret = inflate(&zstream, Z_SYNC_FLUSH); | |||
| if (ret == Z_STREAM_END || ret == Z_BUF_ERROR) | |||
| break; | |||
| if (ret != Z_OK) | |||
| { | |||
| printf("Error while decompressing: %d\n", ret); | |||
| inflateEnd(&zstream); | |||
| exit(1); | |||
| } | |||
| ret = inflate(&zstream, Z_SYNC_FLUSH); | |||
| if (ret == Z_STREAM_END || ret == Z_BUF_ERROR) | |||
| break; | |||
| if (ret != Z_OK) | |||
| { | |||
| printf("Error while decompressing: %d\n", ret); | |||
| inflateEnd(&zstream); | |||
| exit(1); | |||
| } | |||
| dbgprintf("a_in: %d t_in: %d a_out: %d t_out: %d -- %d out\n", | |||
| zstream.avail_in, zstream.total_in, zstream.avail_out, zstream.total_out, | |||
| zstream.total_out-last_out); | |||
| dbgprintf("a_in: %d t_in: %d a_out: %d t_out: %d -- %d out\n", | |||
| zstream.avail_in, zstream.total_in, zstream.avail_out, zstream.total_out, | |||
| zstream.total_out-last_out); | |||
| write(fd_out, &buf_out, zstream.total_out-last_out); | |||
| write(fd_out, &buf_out, zstream.total_out-last_out); | |||
| i += len; | |||
| i += len; | |||
| } | |||
| if (zstream.total_out != uncomp_len-8) | |||
| { | |||
| printf("Size mismatch (%d != %d), updating header...\n", | |||
| zstream.total_out, uncomp_len-8); | |||
| printf("Size mismatch (%d != %d), updating header...\n", | |||
| zstream.total_out, uncomp_len-8); | |||
| buf_in[0] = (zstream.total_out+8) & 0xff; | |||
| buf_in[1] = (zstream.total_out+8 >> 8) & 0xff; | |||
| buf_in[2] = (zstream.total_out+8 >> 16) & 0xff; | |||
| buf_in[3] = (zstream.total_out+8 >> 24) & 0xff; | |||
| buf_in[0] = (zstream.total_out+8) & 0xff; | |||
| buf_in[1] = (zstream.total_out+8 >> 8) & 0xff; | |||
| buf_in[2] = (zstream.total_out+8 >> 16) & 0xff; | |||
| buf_in[3] = (zstream.total_out+8 >> 24) & 0xff; | |||
| lseek(fd_out, 4, SEEK_SET); | |||
| write(fd_out, &buf_in, 4); | |||
| lseek(fd_out, 4, SEEK_SET); | |||
| write(fd_out, &buf_in, 4); | |||
| } | |||
| inflateEnd(&zstream); | |||
| @@ -39,24 +39,24 @@ $ibase = ""; | |||
| while ($_ = shift) { | |||
| if (/^-D(.*)$/) { | |||
| if ($1 ne "") { | |||
| $flag = $1; | |||
| } else { | |||
| $flag = shift; | |||
| } | |||
| $value = ""; | |||
| ($flag, $value) = ($flag =~ /^([^=]+)(?:=(.+))?/); | |||
| die "no flag specified for -D\n" | |||
| unless $flag ne ""; | |||
| die "flags may only contain letters, digits, hyphens, dashes and underscores\n" | |||
| unless $flag =~ /^[a-zA-Z0-9_-]+$/; | |||
| $defs{$flag} = $value; | |||
| if ($1 ne "") { | |||
| $flag = $1; | |||
| } else { | |||
| $flag = shift; | |||
| } | |||
| $value = ""; | |||
| ($flag, $value) = ($flag =~ /^([^=]+)(?:=(.+))?/); | |||
| die "no flag specified for -D\n" | |||
| unless $flag ne ""; | |||
| die "flags may only contain letters, digits, hyphens, dashes and underscores\n" | |||
| unless $flag =~ /^[a-zA-Z0-9_-]+$/; | |||
| $defs{$flag} = $value; | |||
| } elsif (/^-/) { | |||
| usage(); | |||
| usage(); | |||
| } else { | |||
| $in = $_, next unless defined $in; | |||
| $out = $_, next unless defined $out; | |||
| usage(); | |||
| $in = $_, next unless defined $in; | |||
| $out = $_, next unless defined $out; | |||
| usage(); | |||
| } | |||
| } | |||
| @@ -76,13 +76,13 @@ while(defined $inf) { | |||
| while(<$inf>) { | |||
| # Certain commands are discarded without further processing. | |||
| /^\@(?: | |||
| [a-z]+index # @*index: useful only in complete manual | |||
| |need # @need: useful only in printed manual | |||
| |(?:end\s+)?group # @group .. @end group: ditto | |||
| |page # @page: ditto | |||
| |node # @node: useful only in .info file | |||
| |(?:end\s+)?ifnottex # @ifnottex .. @end ifnottex: use contents | |||
| )\b/x and next; | |||
| [a-z]+index # @*index: useful only in complete manual | |||
| |need # @need: useful only in printed manual | |||
| |(?:end\s+)?group # @group .. @end group: ditto | |||
| |page # @page: ditto | |||
| |node # @node: useful only in .info file | |||
| |(?:end\s+)?ifnottex # @ifnottex .. @end ifnottex: use contents | |||
| )\b/x and next; | |||
| chomp; | |||
| @@ -92,38 +92,38 @@ while(<$inf>) { | |||
| # Identify a man title but keep only the one we are interested in. | |||
| /^\@c\s+man\s+title\s+([A-Za-z0-9-]+)\s+(.+)/ and do { | |||
| if (exists $defs{$1}) { | |||
| $fn = $1; | |||
| $tl = postprocess($2); | |||
| } | |||
| next; | |||
| if (exists $defs{$1}) { | |||
| $fn = $1; | |||
| $tl = postprocess($2); | |||
| } | |||
| next; | |||
| }; | |||
| # Look for blocks surrounded by @c man begin SECTION ... @c man end. | |||
| # This really oughta be @ifman ... @end ifman and the like, but such | |||
| # would require rev'ing all other Texinfo translators. | |||
| /^\@c\s+man\s+begin\s+([A-Z]+)\s+([A-Za-z0-9-]+)/ and do { | |||
| $output = 1 if exists $defs{$2}; | |||
| $output = 1 if exists $defs{$2}; | |||
| $sect = $1; | |||
| next; | |||
| next; | |||
| }; | |||
| /^\@c\s+man\s+begin\s+([A-Z]+)/ and $sect = $1, $output = 1, next; | |||
| /^\@c\s+man\s+end/ and do { | |||
| $sects{$sect} = "" unless exists $sects{$sect}; | |||
| $sects{$sect} .= postprocess($section); | |||
| $section = ""; | |||
| $output = 0; | |||
| next; | |||
| $sects{$sect} = "" unless exists $sects{$sect}; | |||
| $sects{$sect} .= postprocess($section); | |||
| $section = ""; | |||
| $output = 0; | |||
| next; | |||
| }; | |||
| # handle variables | |||
| /^\@set\s+([a-zA-Z0-9_-]+)\s*(.*)$/ and do { | |||
| $defs{$1} = $2; | |||
| next; | |||
| $defs{$1} = $2; | |||
| next; | |||
| }; | |||
| /^\@clear\s+([a-zA-Z0-9_-]+)/ and do { | |||
| delete $defs{$1}; | |||
| next; | |||
| delete $defs{$1}; | |||
| next; | |||
| }; | |||
| next unless $output; | |||
| @@ -135,55 +135,55 @@ while(<$inf>) { | |||
| # End-block handler goes up here because it needs to operate even | |||
| # if we are skipping. | |||
| /^\@end\s+([a-z]+)/ and do { | |||
| # Ignore @end foo, where foo is not an operation which may | |||
| # cause us to skip, if we are presently skipping. | |||
| my $ended = $1; | |||
| next if $skipping && $ended !~ /^(?:ifset|ifclear|ignore|menu|iftex)$/; | |||
| die "\@end $ended without \@$ended at line $.\n" unless defined $endw; | |||
| die "\@$endw ended by \@end $ended at line $.\n" unless $ended eq $endw; | |||
| $endw = pop @endwstack; | |||
| if ($ended =~ /^(?:ifset|ifclear|ignore|menu|iftex)$/) { | |||
| $skipping = pop @skstack; | |||
| next; | |||
| } elsif ($ended =~ /^(?:example|smallexample|display)$/) { | |||
| $shift = ""; | |||
| $_ = ""; # need a paragraph break | |||
| } elsif ($ended =~ /^(?:itemize|enumerate|[fv]?table)$/) { | |||
| $_ = "\n=back\n"; | |||
| $ic = pop @icstack; | |||
| } else { | |||
| die "unknown command \@end $ended at line $.\n"; | |||
| } | |||
| # Ignore @end foo, where foo is not an operation which may | |||
| # cause us to skip, if we are presently skipping. | |||
| my $ended = $1; | |||
| next if $skipping && $ended !~ /^(?:ifset|ifclear|ignore|menu|iftex)$/; | |||
| die "\@end $ended without \@$ended at line $.\n" unless defined $endw; | |||
| die "\@$endw ended by \@end $ended at line $.\n" unless $ended eq $endw; | |||
| $endw = pop @endwstack; | |||
| if ($ended =~ /^(?:ifset|ifclear|ignore|menu|iftex)$/) { | |||
| $skipping = pop @skstack; | |||
| next; | |||
| } elsif ($ended =~ /^(?:example|smallexample|display)$/) { | |||
| $shift = ""; | |||
| $_ = ""; # need a paragraph break | |||
| } elsif ($ended =~ /^(?:itemize|enumerate|[fv]?table)$/) { | |||
| $_ = "\n=back\n"; | |||
| $ic = pop @icstack; | |||
| } else { | |||
| die "unknown command \@end $ended at line $.\n"; | |||
| } | |||
| }; | |||
| # We must handle commands which can cause skipping even while we | |||
| # are skipping, otherwise we will not process nested conditionals | |||
| # correctly. | |||
| /^\@ifset\s+([a-zA-Z0-9_-]+)/ and do { | |||
| push @endwstack, $endw; | |||
| push @skstack, $skipping; | |||
| $endw = "ifset"; | |||
| $skipping = 1 unless exists $defs{$1}; | |||
| next; | |||
| push @endwstack, $endw; | |||
| push @skstack, $skipping; | |||
| $endw = "ifset"; | |||
| $skipping = 1 unless exists $defs{$1}; | |||
| next; | |||
| }; | |||
| /^\@ifclear\s+([a-zA-Z0-9_-]+)/ and do { | |||
| push @endwstack, $endw; | |||
| push @skstack, $skipping; | |||
| $endw = "ifclear"; | |||
| $skipping = 1 if exists $defs{$1}; | |||
| next; | |||
| push @endwstack, $endw; | |||
| push @skstack, $skipping; | |||
| $endw = "ifclear"; | |||
| $skipping = 1 if exists $defs{$1}; | |||
| next; | |||
| }; | |||
| /^\@(ignore|menu|iftex)\b/ and do { | |||
| push @endwstack, $endw; | |||
| push @skstack, $skipping; | |||
| $endw = $1; | |||
| $skipping = 1; | |||
| next; | |||
| push @endwstack, $endw; | |||
| push @skstack, $skipping; | |||
| $endw = $1; | |||
| $skipping = 1; | |||
| next; | |||
| }; | |||
| next if $skipping; | |||
| @@ -210,85 +210,85 @@ while(<$inf>) { | |||
| # Inside a verbatim block, handle @var specially. | |||
| if ($shift ne "") { | |||
| s/\@var\{([^\}]*)\}/<$1>/g; | |||
| s/\@var\{([^\}]*)\}/<$1>/g; | |||
| } | |||
| # POD doesn't interpret E<> inside a verbatim block. | |||
| if ($shift eq "") { | |||
| s/</</g; | |||
| s/>/>/g; | |||
| s/</</g; | |||
| s/>/>/g; | |||
| } else { | |||
| s/</</g; | |||
| s/>/>/g; | |||
| s/</</g; | |||
| s/>/>/g; | |||
| } | |||
| # Single line command handlers. | |||
| /^\@include\s+(.+)$/ and do { | |||
| push @instack, $inf; | |||
| $inf = gensym(); | |||
| # Try cwd and $ibase. | |||
| open($inf, "<" . $1) | |||
| or open($inf, "<" . $ibase . "/" . $1) | |||
| or die "cannot open $1 or $ibase/$1: $!\n"; | |||
| next; | |||
| push @instack, $inf; | |||
| $inf = gensym(); | |||
| # Try cwd and $ibase. | |||
| open($inf, "<" . $1) | |||
| or open($inf, "<" . $ibase . "/" . $1) | |||
| or die "cannot open $1 or $ibase/$1: $!\n"; | |||
| next; | |||
| }; | |||
| /^\@(?:section|unnumbered|unnumberedsec|center)\s+(.+)$/ | |||
| and $_ = "\n=head2 $1\n"; | |||
| and $_ = "\n=head2 $1\n"; | |||
| /^\@subsection\s+(.+)$/ | |||
| and $_ = "\n=head3 $1\n"; | |||
| and $_ = "\n=head3 $1\n"; | |||
| # Block command handlers: | |||
| /^\@itemize\s+(\@[a-z]+|\*|-)/ and do { | |||
| push @endwstack, $endw; | |||
| push @icstack, $ic; | |||
| $ic = $1; | |||
| $_ = "\n=over 4\n"; | |||
| $endw = "itemize"; | |||
| push @endwstack, $endw; | |||
| push @icstack, $ic; | |||
| $ic = $1; | |||
| $_ = "\n=over 4\n"; | |||
| $endw = "itemize"; | |||
| }; | |||
| /^\@enumerate(?:\s+([a-zA-Z0-9]+))?/ and do { | |||
| push @endwstack, $endw; | |||
| push @icstack, $ic; | |||
| if (defined $1) { | |||
| $ic = $1 . "."; | |||
| } else { | |||
| $ic = "1."; | |||
| } | |||
| $_ = "\n=over 4\n"; | |||
| $endw = "enumerate"; | |||
| push @endwstack, $endw; | |||
| push @icstack, $ic; | |||
| if (defined $1) { | |||
| $ic = $1 . "."; | |||
| } else { | |||
| $ic = "1."; | |||
| } | |||
| $_ = "\n=over 4\n"; | |||
| $endw = "enumerate"; | |||
| }; | |||
| /^\@([fv]?table)\s+(\@[a-z]+)/ and do { | |||
| push @endwstack, $endw; | |||
| push @icstack, $ic; | |||
| $endw = $1; | |||
| $ic = $2; | |||
| $ic =~ s/\@(?:samp|strong|key|gcctabopt|option|env)/B/; | |||
| $ic =~ s/\@(?:code|kbd)/C/; | |||
| $ic =~ s/\@(?:dfn|var|emph|cite|i)/I/; | |||
| $ic =~ s/\@(?:file)/F/; | |||
| $_ = "\n=over 4\n"; | |||
| push @endwstack, $endw; | |||
| push @icstack, $ic; | |||
| $endw = $1; | |||
| $ic = $2; | |||
| $ic =~ s/\@(?:samp|strong|key|gcctabopt|option|env)/B/; | |||
| $ic =~ s/\@(?:code|kbd)/C/; | |||
| $ic =~ s/\@(?:dfn|var|emph|cite|i)/I/; | |||
| $ic =~ s/\@(?:file)/F/; | |||
| $_ = "\n=over 4\n"; | |||
| }; | |||
| /^\@((?:small)?example|display)/ and do { | |||
| push @endwstack, $endw; | |||
| $endw = $1; | |||
| $shift = "\t"; | |||
| $_ = ""; # need a paragraph break | |||
| push @endwstack, $endw; | |||
| $endw = $1; | |||
| $shift = "\t"; | |||
| $_ = ""; # need a paragraph break | |||
| }; | |||
| /^\@itemx?\s*(.+)?$/ and do { | |||
| if (defined $1) { | |||
| # Entity escapes prevent munging by the <> processing below. | |||
| $_ = "\n=item $ic\<$1\>\n"; | |||
| } else { | |||
| $_ = "\n=item $ic\n"; | |||
| $ic =~ y/A-Ya-y/B-Zb-z/; | |||
| $ic =~ s/(\d+)/$1 + 1/eg; | |||
| } | |||
| if (defined $1) { | |||
| # Entity escapes prevent munging by the <> processing below. | |||
| $_ = "\n=item $ic\<$1\>\n"; | |||
| } else { | |||
| $_ = "\n=item $ic\n"; | |||
| $ic =~ y/A-Ya-y/B-Zb-z/; | |||
| $ic =~ s/(\d+)/$1 + 1/eg; | |||
| } | |||
| }; | |||
| $section .= $shift.$_."\n"; | |||
| @@ -304,13 +304,13 @@ $sects{NAME} = "$fn \- $tl\n"; | |||
| $sects{FOOTNOTES} .= "=back\n" if exists $sects{FOOTNOTES}; | |||
| for $sect (qw(NAME SYNOPSIS DESCRIPTION OPTIONS EXAMPLES ENVIRONMENT FILES | |||
| BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) { | |||
| BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) { | |||
| if(exists $sects{$sect}) { | |||
| $head = $sect; | |||
| $head =~ s/SEEALSO/SEE ALSO/; | |||
| print "=head1 $head\n\n"; | |||
| print scalar unmunge ($sects{$sect}); | |||
| print "\n"; | |||
| $head = $sect; | |||
| $head =~ s/SEEALSO/SEE ALSO/; | |||
| print "=head1 $head\n\n"; | |||
| print scalar unmunge ($sects{$sect}); | |||
| print "\n"; | |||
| } | |||
| } | |||
| @@ -325,13 +325,13 @@ sub postprocess | |||
| # @value{foo} is replaced by whatever 'foo' is defined as. | |||
| while (m/(\@value\{([a-zA-Z0-9_-]+)\})/g) { | |||
| if (! exists $defs{$2}) { | |||
| print STDERR "Option $2 not defined\n"; | |||
| s/\Q$1\E//; | |||
| } else { | |||
| $value = $defs{$2}; | |||
| s/\Q$1\E/$value/; | |||
| } | |||
| if (! exists $defs{$2}) { | |||
| print STDERR "Option $2 not defined\n"; | |||
| s/\Q$1\E//; | |||
| } else { | |||
| $value = $defs{$2}; | |||
| s/\Q$1\E/$value/; | |||
| } | |||
| } | |||
| # Formatting commands. | |||
| @@ -381,9 +381,9 @@ sub postprocess | |||
| # processing because otherwise the regexp will choke on formatting | |||
| # inside @footnote. | |||
| while (/\@footnote/g) { | |||
| s/\@footnote\{([^\}]+)\}/[$fnno]/; | |||
| add_footnote($1, $fnno); | |||
| $fnno++; | |||
| s/\@footnote\{([^\}]+)\}/[$fnno]/; | |||
| add_footnote($1, $fnno); | |||
| $fnno++; | |||
| } | |||
| return $_; | |||
| @@ -406,7 +406,7 @@ sub unmunge | |||
| sub add_footnote | |||
| { | |||
| unless (exists $sects{FOOTNOTES}) { | |||
| $sects{FOOTNOTES} = "\n=over 4\n\n"; | |||
| $sects{FOOTNOTES} = "\n=over 4\n\n"; | |||
| } | |||
| $sects{FOOTNOTES} .= "=item $fnno.\n\n"; $fnno++; | |||
| @@ -419,9 +419,9 @@ sub add_footnote | |||
| my $genseq = 0; | |||
| sub gensym | |||
| { | |||
| my $name = "GEN" . $genseq++; | |||
| my $ref = \*{$name}; | |||
| delete $::{$name}; | |||
| return $ref; | |||
| my $name = "GEN" . $genseq++; | |||
| my $ref = \*{$name}; | |||
| delete $::{$name}; | |||
| return $ref; | |||
| } | |||
| } | |||
| @@ -579,7 +579,7 @@ static void do_audio_out(AVFormatContext *s, | |||
| break; | |||
| } | |||
| ret = avcodec_encode_audio(enc, audio_out, size_out, | |||
| (short *)buftmp); | |||
| (short *)buftmp); | |||
| audio_size += ret; | |||
| pkt.stream_index= ost->index; | |||
| pkt.data= audio_out; | |||
| @@ -821,10 +821,10 @@ static void do_video_out(AVFormatContext *s, | |||
| padcolor); | |||
| } | |||
| if (enc->pix_fmt != PIX_FMT_YUV420P) { | |||
| if (enc->pix_fmt != PIX_FMT_YUV420P) { | |||
| int size; | |||
| av_free(buf); | |||
| av_free(buf); | |||
| /* create temporary picture */ | |||
| size = avpicture_get_size(enc->pix_fmt, enc->width, enc->height); | |||
| buf = av_malloc(size); | |||
| @@ -842,7 +842,7 @@ static void do_video_out(AVFormatContext *s, | |||
| goto the_end; | |||
| } | |||
| } | |||
| } | |||
| } else if (ost->video_crop) { | |||
| picture_crop_temp.data[0] = formatted_picture->data[0] + | |||
| (ost->topBand * formatted_picture->linesize[0]) + ost->leftBand; | |||
| @@ -921,7 +921,7 @@ static void do_video_out(AVFormatContext *s, | |||
| avoid any copies. We support temorarily the older | |||
| method. */ | |||
| AVFrame* old_frame = enc->coded_frame; | |||
| enc->coded_frame = dec->coded_frame; //FIXME/XXX remove this hack | |||
| enc->coded_frame = dec->coded_frame; //FIXME/XXX remove this hack | |||
| pkt.data= (uint8_t *)final_picture; | |||
| pkt.size= sizeof(AVPicture); | |||
| if(dec->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE) | |||
| @@ -930,7 +930,7 @@ static void do_video_out(AVFormatContext *s, | |||
| pkt.flags |= PKT_FLAG_KEY; | |||
| av_interleaved_write_frame(s, &pkt); | |||
| enc->coded_frame = old_frame; | |||
| enc->coded_frame = old_frame; | |||
| } else { | |||
| AVFrame big_picture; | |||
| @@ -1044,8 +1044,8 @@ static void do_video_stats(AVFormatContext *os, AVOutputStream *ost, | |||
| } | |||
| static void print_report(AVFormatContext **output_files, | |||
| AVOutputStream **ost_table, int nb_ostreams, | |||
| int is_last_report) | |||
| AVOutputStream **ost_table, int nb_ostreams, | |||
| int is_last_report) | |||
| { | |||
| char buf[1024]; | |||
| AVOutputStream *ost; | |||
| @@ -1138,9 +1138,9 @@ static void print_report(AVFormatContext **output_files, | |||
| "size=%8.0fkB time=%0.1f bitrate=%6.1fkbits/s", | |||
| (double)total_size / 1024, ti1, bitrate); | |||
| if (verbose > 1) | |||
| snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " dup=%d drop=%d", | |||
| nb_frames_dup, nb_frames_drop); | |||
| if (verbose > 1) | |||
| snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " dup=%d drop=%d", | |||
| nb_frames_dup, nb_frames_drop); | |||
| if (verbose >= 0) | |||
| fprintf(stderr, "%s \r", buf); | |||
| @@ -1323,7 +1323,7 @@ static int output_packet(AVInputStream *ist, int ist_index, | |||
| } | |||
| #endif | |||
| /* if output time reached then transcode raw format, | |||
| encode packets and output them */ | |||
| encode packets and output them */ | |||
| if (start_time == 0 || ist->pts >= start_time) | |||
| for(i=0;i<nb_ostreams;i++) { | |||
| int frame_size; | |||
| @@ -1898,7 +1898,7 @@ static int av_encode(AVFormatContext **output_files, | |||
| /* init pts */ | |||
| for(i=0;i<nb_istreams;i++) { | |||
| ist = ist_table[i]; | |||
| is = input_files[ist->file_index]; | |||
| is = input_files[ist->file_index]; | |||
| ist->pts = 0; | |||
| ist->next_pts = av_rescale_q(ist->st->start_time, ist->st->time_base, AV_TIME_BASE_Q); | |||
| if(ist->st->start_time == AV_NOPTS_VALUE) | |||
| @@ -2273,7 +2273,7 @@ static void opt_frame_rate(const char *arg) | |||
| { | |||
| if (parse_frame_rate(&frame_rate, &frame_rate_base, arg) < 0) { | |||
| fprintf(stderr, "Incorrect frame rate\n"); | |||
| exit(1); | |||
| exit(1); | |||
| } | |||
| } | |||
| @@ -2289,7 +2289,7 @@ static void opt_frame_crop_top(const char *arg) | |||
| exit(1); | |||
| } | |||
| if ((frame_topBand) >= frame_height){ | |||
| fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); | |||
| fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); | |||
| exit(1); | |||
| } | |||
| frame_height -= frame_topBand; | |||
| @@ -2307,7 +2307,7 @@ static void opt_frame_crop_bottom(const char *arg) | |||
| exit(1); | |||
| } | |||
| if ((frame_bottomBand) >= frame_height){ | |||
| fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); | |||
| fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); | |||
| exit(1); | |||
| } | |||
| frame_height -= frame_bottomBand; | |||
| @@ -2325,7 +2325,7 @@ static void opt_frame_crop_left(const char *arg) | |||
| exit(1); | |||
| } | |||
| if ((frame_leftBand) >= frame_width){ | |||
| fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); | |||
| fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); | |||
| exit(1); | |||
| } | |||
| frame_width -= frame_leftBand; | |||
| @@ -2343,7 +2343,7 @@ static void opt_frame_crop_right(const char *arg) | |||
| exit(1); | |||
| } | |||
| if ((frame_rightBand) >= frame_width){ | |||
| fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); | |||
| fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); | |||
| exit(1); | |||
| } | |||
| frame_width -= frame_rightBand; | |||
| @@ -2364,7 +2364,7 @@ static void opt_frame_size(const char *arg) | |||
| #define SCALEBITS 10 | |||
| #define ONE_HALF (1 << (SCALEBITS - 1)) | |||
| #define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5)) | |||
| #define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5)) | |||
| #define RGB_TO_Y(r, g, b) \ | |||
| ((FIX(0.29900) * (r) + FIX(0.58700) * (g) + \ | |||
| @@ -2462,16 +2462,16 @@ static void opt_frame_aspect_ratio(const char *arg) | |||
| p = strchr(arg, ':'); | |||
| if (p) { | |||
| x = strtol(arg, (char **)&arg, 10); | |||
| if (arg == p) | |||
| y = strtol(arg+1, (char **)&arg, 10); | |||
| if (x > 0 && y > 0) | |||
| ar = (double)x / (double)y; | |||
| if (arg == p) | |||
| y = strtol(arg+1, (char **)&arg, 10); | |||
| if (x > 0 && y > 0) | |||
| ar = (double)x / (double)y; | |||
| } else | |||
| ar = strtod(arg, (char **)&arg); | |||
| if (!ar) { | |||
| fprintf(stderr, "Incorrect aspect ratio specification.\n"); | |||
| exit(1); | |||
| exit(1); | |||
| } | |||
| frame_aspect_ratio = ar; | |||
| } | |||
| @@ -2957,8 +2957,8 @@ static void opt_input_file(const char *filename) | |||
| } | |||
| frame_height = enc->height; | |||
| frame_width = enc->width; | |||
| frame_aspect_ratio = av_q2d(enc->sample_aspect_ratio) * enc->width / enc->height; | |||
| frame_pix_fmt = enc->pix_fmt; | |||
| frame_aspect_ratio = av_q2d(enc->sample_aspect_ratio) * enc->width / enc->height; | |||
| frame_pix_fmt = enc->pix_fmt; | |||
| rfps = ic->streams[i]->r_frame_rate.num; | |||
| rfps_base = ic->streams[i]->r_frame_rate.den; | |||
| enc->workaround_bugs = workaround_bugs; | |||
| @@ -3454,7 +3454,7 @@ static void opt_output_file(const char *filename) | |||
| oc->timestamp = rec_timestamp; | |||
| if (str_title) | |||
| if (str_title) | |||
| pstrcpy(oc->title, sizeof(oc->title), str_title); | |||
| if (str_author) | |||
| pstrcpy(oc->author, sizeof(oc->author), str_author); | |||
| @@ -3490,11 +3490,11 @@ static void opt_output_file(const char *filename) | |||
| fprintf(stderr, "Not overwriting - exiting\n"); | |||
| exit(1); | |||
| } | |||
| } | |||
| else { | |||
| } | |||
| else { | |||
| fprintf(stderr,"File '%s' already exists. Exiting.\n", filename); | |||
| exit(1); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -3579,14 +3579,14 @@ static void prepare_grab(void) | |||
| fmt1 = av_find_input_format(video_grab_format); | |||
| vp->device = video_device; | |||
| vp->channel = video_channel; | |||
| vp->standard = video_standard; | |||
| vp->standard = video_standard; | |||
| if (av_open_input_file(&ic, "", fmt1, 0, vp) < 0) { | |||
| fprintf(stderr, "Could not find video grab device\n"); | |||
| exit(1); | |||
| } | |||
| /* If not enough info to get the stream parameters, we decode the | |||
| first frames to get it. */ | |||
| if ((ic->ctx_flags & AVFMTCTX_NOHEADER) && av_find_stream_info(ic) < 0) { | |||
| if ((ic->ctx_flags & AVFMTCTX_NOHEADER) && av_find_stream_info(ic) < 0) { | |||
| fprintf(stderr, "Could not find video grab parameters\n"); | |||
| exit(1); | |||
| } | |||
| @@ -4276,11 +4276,11 @@ int main(int argc, char **argv) | |||
| for(i=0;i<nb_output_files;i++) { | |||
| /* maybe av_close_output_file ??? */ | |||
| AVFormatContext *s = output_files[i]; | |||
| int j; | |||
| int j; | |||
| if (!(s->oformat->flags & AVFMT_NOFILE)) | |||
| url_fclose(&s->pb); | |||
| for(j=0;j<s->nb_streams;j++) | |||
| av_free(s->streams[j]); | |||
| url_fclose(&s->pb); | |||
| for(j=0;j<s->nb_streams;j++) | |||
| av_free(s->streams[j]); | |||
| av_free(s); | |||
| } | |||
| for(i=0;i<nb_input_files;i++) | |||
| @@ -1649,7 +1649,7 @@ static int stream_component_open(VideoState *is, int stream_index) | |||
| memset(&is->audio_pkt, 0, sizeof(is->audio_pkt)); | |||
| packet_queue_init(&is->audioq); | |||
| SDL_PauseAudio(0); | |||
| SDL_PauseAudio(0); | |||
| break; | |||
| case CODEC_TYPE_VIDEO: | |||
| is->video_stream = stream_index; | |||
| @@ -1937,11 +1937,11 @@ static int decode_thread(void *arg) | |||
| } | |||
| ret = av_read_frame(ic, pkt); | |||
| if (ret < 0) { | |||
| if (url_ferror(&ic->pb) == 0) { | |||
| if (url_ferror(&ic->pb) == 0) { | |||
| SDL_Delay(100); /* wait for user event */ | |||
| continue; | |||
| } else | |||
| break; | |||
| continue; | |||
| } else | |||
| break; | |||
| } | |||
| if (pkt->stream_index == is->audio_stream) { | |||
| packet_queue_put(&is->audioq, pkt); | |||
| @@ -2224,23 +2224,23 @@ void event_loop(void) | |||
| } | |||
| break; | |||
| case SDL_MOUSEBUTTONDOWN: | |||
| if (cur_stream) { | |||
| int ns, hh, mm, ss; | |||
| int tns, thh, tmm, tss; | |||
| tns = cur_stream->ic->duration/1000000LL; | |||
| thh = tns/3600; | |||
| tmm = (tns%3600)/60; | |||
| tss = (tns%60); | |||
| frac = (double)event.button.x/(double)cur_stream->width; | |||
| ns = frac*tns; | |||
| hh = ns/3600; | |||
| mm = (ns%3600)/60; | |||
| ss = (ns%60); | |||
| fprintf(stderr, "Seek to %2.0f%% (%2d:%02d:%02d) of total duration (%2d:%02d:%02d) \n", frac*100, | |||
| hh, mm, ss, thh, tmm, tss); | |||
| stream_seek(cur_stream, (int64_t)(cur_stream->ic->start_time+frac*cur_stream->ic->duration), 0); | |||
| } | |||
| break; | |||
| if (cur_stream) { | |||
| int ns, hh, mm, ss; | |||
| int tns, thh, tmm, tss; | |||
| tns = cur_stream->ic->duration/1000000LL; | |||
| thh = tns/3600; | |||
| tmm = (tns%3600)/60; | |||
| tss = (tns%60); | |||
| frac = (double)event.button.x/(double)cur_stream->width; | |||
| ns = frac*tns; | |||
| hh = ns/3600; | |||
| mm = (ns%3600)/60; | |||
| ss = (ns%60); | |||
| fprintf(stderr, "Seek to %2.0f%% (%2d:%02d:%02d) of total duration (%2d:%02d:%02d) \n", frac*100, | |||
| hh, mm, ss, thh, tmm, tss); | |||
| stream_seek(cur_stream, (int64_t)(cur_stream->ic->start_time+frac*cur_stream->ic->duration), 0); | |||
| } | |||
| break; | |||
| case SDL_VIDEORESIZE: | |||
| if (cur_stream) { | |||
| screen = SDL_SetVideoMode(event.resize.w, event.resize.h, 0, | |||
| @@ -2452,7 +2452,7 @@ int main(int argc, char **argv) | |||
| if (dpy) { | |||
| fs_screen_width = DisplayWidth(dpy, DefaultScreen(dpy)); | |||
| fs_screen_height = DisplayHeight(dpy, DefaultScreen(dpy)); | |||
| XCloseDisplay(dpy); | |||
| XCloseDisplay(dpy); | |||
| } | |||
| } | |||
| #endif | |||
| @@ -1204,7 +1204,7 @@ static int http_parse_request(HTTPContext *c) | |||
| pstrcpy(c->protocol, sizeof(c->protocol), protocol); | |||
| if (ffserver_debug) | |||
| http_log("New connection: %s %s\n", cmd, url); | |||
| http_log("New connection: %s %s\n", cmd, url); | |||
| /* find the filename and the optional info string in the request */ | |||
| p = url; | |||
| @@ -2001,7 +2001,7 @@ static int http_prepare_data(HTTPContext *c) | |||
| c->fmt_ctx.nb_streams = c->stream->nb_streams; | |||
| for(i=0;i<c->fmt_ctx.nb_streams;i++) { | |||
| AVStream *st; | |||
| AVStream *src; | |||
| AVStream *src; | |||
| st = av_mallocz(sizeof(AVStream)); | |||
| st->codec= avcodec_alloc_context(); | |||
| c->fmt_ctx.streams[i] = st; | |||
| @@ -2012,8 +2012,8 @@ static int http_prepare_data(HTTPContext *c) | |||
| else | |||
| src = c->stream->feed->streams[c->stream->feed_streams[i]]; | |||
| *st = *src; | |||
| st->priv_data = 0; | |||
| *st = *src; | |||
| st->priv_data = 0; | |||
| st->codec->frame_number = 0; /* XXX: should be done in | |||
| AVStream, not in codec */ | |||
| /* I'm pretty sure that this is not correct... | |||
| @@ -2452,8 +2452,8 @@ static int http_receive_data(HTTPContext *c) | |||
| s.priv_data = av_mallocz(fmt_in->priv_data_size); | |||
| if (!s.priv_data) | |||
| goto fail; | |||
| } else | |||
| s.priv_data = NULL; | |||
| } else | |||
| s.priv_data = NULL; | |||
| if (fmt_in->read_header(&s, 0) < 0) { | |||
| av_freep(&s.priv_data); | |||
| @@ -3868,20 +3868,20 @@ static int parse_ffconfig(const char *filename) | |||
| feed->child_argv[i] = av_malloc(30 + strlen(feed->filename)); | |||
| snprintf(feed->child_argv[i], 30+strlen(feed->filename), | |||
| "http://%s:%d/%s", | |||
| (my_http_addr.sin_addr.s_addr == INADDR_ANY) ? "127.0.0.1" : | |||
| inet_ntoa(my_http_addr.sin_addr), | |||
| ntohs(my_http_addr.sin_port), feed->filename); | |||
| if (ffserver_debug) | |||
| { | |||
| int j; | |||
| fprintf(stdout, "Launch commandline: "); | |||
| for (j = 0; j <= i; j++) | |||
| fprintf(stdout, "%s ", feed->child_argv[j]); | |||
| fprintf(stdout, "\n"); | |||
| } | |||
| snprintf(feed->child_argv[i], 30+strlen(feed->filename), | |||
| "http://%s:%d/%s", | |||
| (my_http_addr.sin_addr.s_addr == INADDR_ANY) ? "127.0.0.1" : | |||
| inet_ntoa(my_http_addr.sin_addr), | |||
| ntohs(my_http_addr.sin_port), feed->filename); | |||
| if (ffserver_debug) | |||
| { | |||
| int j; | |||
| fprintf(stdout, "Launch commandline: "); | |||
| for (j = 0; j <= i; j++) | |||
| fprintf(stdout, "%s ", feed->child_argv[j]); | |||
| fprintf(stdout, "\n"); | |||
| } | |||
| } | |||
| } else if (!strcasecmp(cmd, "ReadOnlyFile")) { | |||
| if (feed) { | |||
| @@ -4074,8 +4074,8 @@ static int parse_ffconfig(const char *filename) | |||
| if (stream) { | |||
| audio_enc.sample_rate = atoi(arg); | |||
| } | |||
| } else if (!strcasecmp(cmd, "AudioQuality")) { | |||
| get_arg(arg, sizeof(arg), &p); | |||
| } else if (!strcasecmp(cmd, "AudioQuality")) { | |||
| get_arg(arg, sizeof(arg), &p); | |||
| if (stream) { | |||
| // audio_enc.quality = atof(arg) * 1000; | |||
| } | |||
| @@ -44,11 +44,11 @@ const enum PixelFormat pixfmt_rgb24[] = {PIX_FMT_BGR24, PIX_FMT_RGBA32, -1}; | |||
| */ | |||
| typedef struct EightBpsContext { | |||
| AVCodecContext *avctx; | |||
| AVFrame pic; | |||
| AVCodecContext *avctx; | |||
| AVFrame pic; | |||
| unsigned char planes; | |||
| unsigned char planemap[4]; | |||
| unsigned char planes; | |||
| unsigned char planemap[4]; | |||
| } EightBpsContext; | |||
| @@ -59,87 +59,87 @@ typedef struct EightBpsContext { | |||
| */ | |||
| static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size) | |||
| { | |||
| EightBpsContext * const c = (EightBpsContext *)avctx->priv_data; | |||
| unsigned char *encoded = (unsigned char *)buf; | |||
| unsigned char *pixptr, *pixptr_end; | |||
| unsigned int height = avctx->height; // Real image height | |||
| unsigned int dlen, p, row; | |||
| unsigned char *lp, *dp; | |||
| unsigned char count; | |||
| unsigned int px_inc; | |||
| unsigned int planes = c->planes; | |||
| unsigned char *planemap = c->planemap; | |||
| if(c->pic.data[0]) | |||
| avctx->release_buffer(avctx, &c->pic); | |||
| c->pic.reference = 0; | |||
| c->pic.buffer_hints = FF_BUFFER_HINTS_VALID; | |||
| if(avctx->get_buffer(avctx, &c->pic) < 0){ | |||
| av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |||
| return -1; | |||
| } | |||
| /* Set data pointer after line lengths */ | |||
| dp = encoded + planes * (height << 1); | |||
| /* Ignore alpha plane, don't know what to do with it */ | |||
| if (planes == 4) | |||
| planes--; | |||
| px_inc = planes + (avctx->pix_fmt == PIX_FMT_RGBA32); | |||
| for (p = 0; p < planes; p++) { | |||
| /* Lines length pointer for this plane */ | |||
| lp = encoded + p * (height << 1); | |||
| /* Decode a plane */ | |||
| for(row = 0; row < height; row++) { | |||
| pixptr = c->pic.data[0] + row * c->pic.linesize[0] + planemap[p]; | |||
| pixptr_end = pixptr + c->pic.linesize[0]; | |||
| dlen = be2me_16(*(unsigned short *)(lp+row*2)); | |||
| /* Decode a row of this plane */ | |||
| while(dlen > 0) { | |||
| if(dp + 1 >= buf+buf_size) return -1; | |||
| if ((count = *dp++) <= 127) { | |||
| count++; | |||
| dlen -= count + 1; | |||
| if (pixptr + count * px_inc > pixptr_end) | |||
| break; | |||
| if(dp + count > buf+buf_size) return -1; | |||
| while(count--) { | |||
| *pixptr = *dp++; | |||
| pixptr += px_inc; | |||
| } | |||
| } else { | |||
| count = 257 - count; | |||
| if (pixptr + count * px_inc > pixptr_end) | |||
| break; | |||
| while(count--) { | |||
| *pixptr = *dp; | |||
| pixptr += px_inc; | |||
| } | |||
| dp++; | |||
| dlen -= 2; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| if (avctx->palctrl) { | |||
| memcpy (c->pic.data[1], avctx->palctrl->palette, AVPALETTE_SIZE); | |||
| if (avctx->palctrl->palette_changed) { | |||
| c->pic.palette_has_changed = 1; | |||
| avctx->palctrl->palette_changed = 0; | |||
| } else | |||
| c->pic.palette_has_changed = 0; | |||
| } | |||
| *data_size = sizeof(AVFrame); | |||
| *(AVFrame*)data = c->pic; | |||
| /* always report that the buffer was completely consumed */ | |||
| return buf_size; | |||
| EightBpsContext * const c = (EightBpsContext *)avctx->priv_data; | |||
| unsigned char *encoded = (unsigned char *)buf; | |||
| unsigned char *pixptr, *pixptr_end; | |||
| unsigned int height = avctx->height; // Real image height | |||
| unsigned int dlen, p, row; | |||
| unsigned char *lp, *dp; | |||
| unsigned char count; | |||
| unsigned int px_inc; | |||
| unsigned int planes = c->planes; | |||
| unsigned char *planemap = c->planemap; | |||
| if(c->pic.data[0]) | |||
| avctx->release_buffer(avctx, &c->pic); | |||
| c->pic.reference = 0; | |||
| c->pic.buffer_hints = FF_BUFFER_HINTS_VALID; | |||
| if(avctx->get_buffer(avctx, &c->pic) < 0){ | |||
| av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |||
| return -1; | |||
| } | |||
| /* Set data pointer after line lengths */ | |||
| dp = encoded + planes * (height << 1); | |||
| /* Ignore alpha plane, don't know what to do with it */ | |||
| if (planes == 4) | |||
| planes--; | |||
| px_inc = planes + (avctx->pix_fmt == PIX_FMT_RGBA32); | |||
| for (p = 0; p < planes; p++) { | |||
| /* Lines length pointer for this plane */ | |||
| lp = encoded + p * (height << 1); | |||
| /* Decode a plane */ | |||
| for(row = 0; row < height; row++) { | |||
| pixptr = c->pic.data[0] + row * c->pic.linesize[0] + planemap[p]; | |||
| pixptr_end = pixptr + c->pic.linesize[0]; | |||
| dlen = be2me_16(*(unsigned short *)(lp+row*2)); | |||
| /* Decode a row of this plane */ | |||
| while(dlen > 0) { | |||
| if(dp + 1 >= buf+buf_size) return -1; | |||
| if ((count = *dp++) <= 127) { | |||
| count++; | |||
| dlen -= count + 1; | |||
| if (pixptr + count * px_inc > pixptr_end) | |||
| break; | |||
| if(dp + count > buf+buf_size) return -1; | |||
| while(count--) { | |||
| *pixptr = *dp++; | |||
| pixptr += px_inc; | |||
| } | |||
| } else { | |||
| count = 257 - count; | |||
| if (pixptr + count * px_inc > pixptr_end) | |||
| break; | |||
| while(count--) { | |||
| *pixptr = *dp; | |||
| pixptr += px_inc; | |||
| } | |||
| dp++; | |||
| dlen -= 2; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| if (avctx->palctrl) { | |||
| memcpy (c->pic.data[1], avctx->palctrl->palette, AVPALETTE_SIZE); | |||
| if (avctx->palctrl->palette_changed) { | |||
| c->pic.palette_has_changed = 1; | |||
| avctx->palctrl->palette_changed = 0; | |||
| } else | |||
| c->pic.palette_has_changed = 0; | |||
| } | |||
| *data_size = sizeof(AVFrame); | |||
| *(AVFrame*)data = c->pic; | |||
| /* always report that the buffer was completely consumed */ | |||
| return buf_size; | |||
| } | |||
| @@ -150,53 +150,53 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 | |||
| */ | |||
| static int decode_init(AVCodecContext *avctx) | |||
| { | |||
| EightBpsContext * const c = (EightBpsContext *)avctx->priv_data; | |||
| EightBpsContext * const c = (EightBpsContext *)avctx->priv_data; | |||
| c->avctx = avctx; | |||
| avctx->has_b_frames = 0; | |||
| c->avctx = avctx; | |||
| avctx->has_b_frames = 0; | |||
| c->pic.data[0] = NULL; | |||
| c->pic.data[0] = NULL; | |||
| if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) { | |||
| return 1; | |||
| } | |||
| switch (avctx->bits_per_sample) { | |||
| case 8: | |||
| avctx->pix_fmt = PIX_FMT_PAL8; | |||
| c->planes = 1; | |||
| c->planemap[0] = 0; // 1st plane is palette indexes | |||
| if (avctx->palctrl == NULL) { | |||
| av_log(avctx, AV_LOG_ERROR, "Error: PAL8 format but no palette from demuxer.\n"); | |||
| return -1; | |||
| } | |||
| break; | |||
| case 24: | |||
| avctx->pix_fmt = avctx->get_format(avctx, pixfmt_rgb24); | |||
| c->planes = 3; | |||
| c->planemap[0] = 2; // 1st plane is red | |||
| c->planemap[1] = 1; // 2nd plane is green | |||
| c->planemap[2] = 0; // 3rd plane is blue | |||
| break; | |||
| case 32: | |||
| avctx->pix_fmt = PIX_FMT_RGBA32; | |||
| c->planes = 4; | |||
| switch (avctx->bits_per_sample) { | |||
| case 8: | |||
| avctx->pix_fmt = PIX_FMT_PAL8; | |||
| c->planes = 1; | |||
| c->planemap[0] = 0; // 1st plane is palette indexes | |||
| if (avctx->palctrl == NULL) { | |||
| av_log(avctx, AV_LOG_ERROR, "Error: PAL8 format but no palette from demuxer.\n"); | |||
| return -1; | |||
| } | |||
| break; | |||
| case 24: | |||
| avctx->pix_fmt = avctx->get_format(avctx, pixfmt_rgb24); | |||
| c->planes = 3; | |||
| c->planemap[0] = 2; // 1st plane is red | |||
| c->planemap[1] = 1; // 2nd plane is green | |||
| c->planemap[2] = 0; // 3rd plane is blue | |||
| break; | |||
| case 32: | |||
| avctx->pix_fmt = PIX_FMT_RGBA32; | |||
| c->planes = 4; | |||
| #ifdef WORDS_BIGENDIAN | |||
| c->planemap[0] = 1; // 1st plane is red | |||
| c->planemap[1] = 2; // 2nd plane is green | |||
| c->planemap[2] = 3; // 3rd plane is blue | |||
| c->planemap[3] = 0; // 4th plane is alpha??? | |||
| c->planemap[0] = 1; // 1st plane is red | |||
| c->planemap[1] = 2; // 2nd plane is green | |||
| c->planemap[2] = 3; // 3rd plane is blue | |||
| c->planemap[3] = 0; // 4th plane is alpha??? | |||
| #else | |||
| c->planemap[0] = 2; // 1st plane is red | |||
| c->planemap[1] = 1; // 2nd plane is green | |||
| c->planemap[2] = 0; // 3rd plane is blue | |||
| c->planemap[3] = 3; // 4th plane is alpha??? | |||
| c->planemap[0] = 2; // 1st plane is red | |||
| c->planemap[1] = 1; // 2nd plane is green | |||
| c->planemap[2] = 0; // 3rd plane is blue | |||
| c->planemap[3] = 3; // 4th plane is alpha??? | |||
| #endif | |||
| break; | |||
| default: | |||
| av_log(avctx, AV_LOG_ERROR, "Error: Unsupported color depth: %u.\n", avctx->bits_per_sample); | |||
| return -1; | |||
| } | |||
| break; | |||
| default: | |||
| av_log(avctx, AV_LOG_ERROR, "Error: Unsupported color depth: %u.\n", avctx->bits_per_sample); | |||
| return -1; | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -211,24 +211,24 @@ static int decode_init(AVCodecContext *avctx) | |||
| */ | |||
| static int decode_end(AVCodecContext *avctx) | |||
| { | |||
| EightBpsContext * const c = (EightBpsContext *)avctx->priv_data; | |||
| EightBpsContext * const c = (EightBpsContext *)avctx->priv_data; | |||
| if (c->pic.data[0]) | |||
| avctx->release_buffer(avctx, &c->pic); | |||
| if (c->pic.data[0]) | |||
| avctx->release_buffer(avctx, &c->pic); | |||
| return 0; | |||
| return 0; | |||
| } | |||
| AVCodec eightbps_decoder = { | |||
| "8bps", | |||
| CODEC_TYPE_VIDEO, | |||
| CODEC_ID_8BPS, | |||
| sizeof(EightBpsContext), | |||
| decode_init, | |||
| NULL, | |||
| decode_end, | |||
| decode_frame, | |||
| CODEC_CAP_DR1, | |||
| "8bps", | |||
| CODEC_TYPE_VIDEO, | |||
| CODEC_ID_8BPS, | |||
| sizeof(EightBpsContext), | |||
| decode_init, | |||
| NULL, | |||
| decode_end, | |||
| decode_frame, | |||
| CODEC_CAP_DR1, | |||
| }; | |||
| @@ -58,11 +58,11 @@ typedef struct AC3DecodeState { | |||
| a52_state_t* (*a52_init)(uint32_t mm_accel); | |||
| sample_t* (*a52_samples)(a52_state_t * state); | |||
| int (*a52_syncinfo)(uint8_t * buf, int * flags, | |||
| int * sample_rate, int * bit_rate); | |||
| int * sample_rate, int * bit_rate); | |||
| int (*a52_frame)(a52_state_t * state, uint8_t * buf, int * flags, | |||
| sample_t * level, sample_t bias); | |||
| sample_t * level, sample_t bias); | |||
| void (*a52_dynrng)(a52_state_t * state, | |||
| sample_t (* call) (sample_t, void *), void * data); | |||
| sample_t (* call) (sample_t, void *), void * data); | |||
| int (*a52_block)(a52_state_t * state); | |||
| void (*a52_free)(a52_state_t * state); | |||
| @@ -105,7 +105,7 @@ static int a52_decode_init(AVCodecContext *avctx) | |||
| if (!s->a52_init || !s->a52_samples || !s->a52_syncinfo | |||
| || !s->a52_frame || !s->a52_block || !s->a52_free) | |||
| { | |||
| dlclose(s->handle); | |||
| dlclose(s->handle); | |||
| return -1; | |||
| } | |||
| #else | |||
| @@ -130,22 +130,22 @@ static int a52_decode_init(AVCodecContext *avctx) | |||
| static inline int blah (int32_t i) | |||
| { | |||
| if (i > 0x43c07fff) | |||
| return 32767; | |||
| return 32767; | |||
| else if (i < 0x43bf8000) | |||
| return -32768; | |||
| return -32768; | |||
| return i - 0x43c00000; | |||
| } | |||
| static inline void float_to_int (float * _f, int16_t * s16, int nchannels) | |||
| { | |||
| int i, j, c; | |||
| int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format | |||
| int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format | |||
| j = 0; | |||
| nchannels *= 256; | |||
| for (i = 0; i < 256; i++) { | |||
| for (c = 0; c < nchannels; c += 256) | |||
| s16[j++] = blah (f[i + c]); | |||
| for (c = 0; c < nchannels; c += 256) | |||
| s16[j++] = blah (f[i + c]); | |||
| } | |||
| } | |||
| @@ -164,7 +164,7 @@ static int a52_decode_frame(AVCodecContext *avctx, | |||
| short *out_samples = data; | |||
| float level; | |||
| static const int ac3_channels[8] = { | |||
| 2, 1, 2, 3, 3, 4, 4, 5 | |||
| 2, 1, 2, 3, 3, 4, 4, 5 | |||
| }; | |||
| buf_ptr = buf; | |||
| @@ -186,20 +186,20 @@ static int a52_decode_frame(AVCodecContext *avctx, | |||
| memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1); | |||
| s->inbuf_ptr--; | |||
| } else { | |||
| s->frame_size = len; | |||
| s->frame_size = len; | |||
| /* update codec info */ | |||
| avctx->sample_rate = sample_rate; | |||
| s->channels = ac3_channels[s->flags & 7]; | |||
| if (s->flags & A52_LFE) | |||
| s->channels++; | |||
| if (avctx->channels == 0) | |||
| /* No specific number of channel requested */ | |||
| avctx->channels = s->channels; | |||
| else if (s->channels < avctx->channels) { | |||
| av_log(avctx, AV_LOG_ERROR, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len); | |||
| avctx->channels = s->channels; | |||
| } | |||
| avctx->bit_rate = bit_rate; | |||
| s->channels++; | |||
| if (avctx->channels == 0) | |||
| /* No specific number of channel requested */ | |||
| avctx->channels = s->channels; | |||
| else if (s->channels < avctx->channels) { | |||
| av_log(avctx, AV_LOG_ERROR, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len); | |||
| avctx->channels = s->channels; | |||
| } | |||
| avctx->bit_rate = bit_rate; | |||
| } | |||
| } | |||
| } else if (len < s->frame_size) { | |||
| @@ -54,23 +54,23 @@ stream_samples_t samples; | |||
| static inline int blah (int32_t i) | |||
| { | |||
| if (i > 0x43c07fff) | |||
| return 32767; | |||
| return 32767; | |||
| else if (i < 0x43bf8000) | |||
| return -32768; | |||
| return -32768; | |||
| else | |||
| return i - 0x43c00000; | |||
| return i - 0x43c00000; | |||
| } | |||
| static inline void float_to_int (float * _f, int16_t * s16, int nchannels) | |||
| { | |||
| int i, j, c; | |||
| int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format | |||
| int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format | |||
| j = 0; | |||
| nchannels *= 256; | |||
| for (i = 0; i < 256; i++) { | |||
| for (c = 0; c < nchannels; c += 256) | |||
| s16[j++] = blah (f[i + c]); | |||
| for (c = 0; c < nchannels; c += 256) | |||
| s16[j++] = blah (f[i + c]); | |||
| } | |||
| } | |||
| @@ -89,7 +89,7 @@ static int ac3_decode_frame(AVCodecContext *avctx, | |||
| short *out_samples = data; | |||
| float level; | |||
| static const int ac3_channels[8] = { | |||
| 2, 1, 2, 3, 3, 4, 4, 5 | |||
| 2, 1, 2, 3, 3, 4, 4, 5 | |||
| }; | |||
| buf_ptr = buf; | |||
| @@ -111,20 +111,20 @@ static int ac3_decode_frame(AVCodecContext *avctx, | |||
| memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1); | |||
| s->inbuf_ptr--; | |||
| } else { | |||
| s->frame_size = len; | |||
| s->frame_size = len; | |||
| /* update codec info */ | |||
| avctx->sample_rate = sample_rate; | |||
| s->channels = ac3_channels[s->flags & 7]; | |||
| if (s->flags & AC3_LFE) | |||
| s->channels++; | |||
| if (avctx->channels == 0) | |||
| /* No specific number of channel requested */ | |||
| avctx->channels = s->channels; | |||
| else if (s->channels < avctx->channels) { | |||
| s->channels++; | |||
| if (avctx->channels == 0) | |||
| /* No specific number of channel requested */ | |||
| avctx->channels = s->channels; | |||
| else if (s->channels < avctx->channels) { | |||
| av_log( avctx, AV_LOG_INFO, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len); | |||
| avctx->channels = s->channels; | |||
| } | |||
| avctx->bit_rate = bit_rate; | |||
| avctx->channels = s->channels; | |||
| } | |||
| avctx->bit_rate = bit_rate; | |||
| } | |||
| } | |||
| } else if (len < s->frame_size) { | |||
| @@ -337,8 +337,8 @@ static void fft_init(int ln) | |||
| /* do a 2^n point complex fft on 2^ln points. */ | |||
| static void fft(IComplex *z, int ln) | |||
| { | |||
| int j, l, np, np2; | |||
| int nblocks, nloops; | |||
| int j, l, np, np2; | |||
| int nblocks, nloops; | |||
| register IComplex *p,*q; | |||
| int tmp_re, tmp_im; | |||
| @@ -472,7 +472,7 @@ static void compute_exp_strategy(uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNEL | |||
| exp_strategy[i][ch] = EXP_REUSE; | |||
| } | |||
| if (is_lfe) | |||
| return; | |||
| return; | |||
| /* now select the encoding strategy type : if exponents are often | |||
| recoded, we use a coarse encoding */ | |||
| @@ -493,7 +493,7 @@ static void compute_exp_strategy(uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNEL | |||
| exp_strategy[i][ch] = EXP_D15; | |||
| break; | |||
| } | |||
| i = j; | |||
| i = j; | |||
| } | |||
| } | |||
| @@ -553,9 +553,9 @@ static int encode_exp(uint8_t encoded_exp[N/2], | |||
| /* Decrease the delta between each groups to within 2 | |||
| * so that they can be differentially encoded */ | |||
| for (i=1;i<=nb_groups;i++) | |||
| exp1[i] = FFMIN(exp1[i], exp1[i-1] + 2); | |||
| exp1[i] = FFMIN(exp1[i], exp1[i-1] + 2); | |||
| for (i=nb_groups-1;i>=0;i--) | |||
| exp1[i] = FFMIN(exp1[i], exp1[i+1] + 2); | |||
| exp1[i] = FFMIN(exp1[i], exp1[i+1] + 2); | |||
| /* now we have the exponent values the decoder will see */ | |||
| encoded_exp[0] = exp1[0]; | |||
| @@ -708,8 +708,8 @@ static int compute_bit_allocation(AC3EncodeContext *s, | |||
| if(i==0) frame_bits += 4; | |||
| } | |||
| frame_bits += 2 * s->nb_channels; /* chexpstr[2] * c */ | |||
| if (s->lfe) | |||
| frame_bits++; /* lfeexpstr */ | |||
| if (s->lfe) | |||
| frame_bits++; /* lfeexpstr */ | |||
| for(ch=0;ch<s->nb_channels;ch++) { | |||
| if (exp_strategy[i][ch] != EXP_REUSE) | |||
| frame_bits += 6 + 2; /* chbwcod[6], gainrng[2] */ | |||
| @@ -736,11 +736,11 @@ static int compute_bit_allocation(AC3EncodeContext *s, | |||
| csnroffst = s->csnroffst; | |||
| while (csnroffst >= 0 && | |||
| bit_alloc(s, bap, encoded_exp, exp_strategy, frame_bits, csnroffst, 0) < 0) | |||
| csnroffst -= SNR_INC1; | |||
| bit_alloc(s, bap, encoded_exp, exp_strategy, frame_bits, csnroffst, 0) < 0) | |||
| csnroffst -= SNR_INC1; | |||
| if (csnroffst < 0) { | |||
| av_log(NULL, AV_LOG_ERROR, "Yack, Error !!!\n"); | |||
| return -1; | |||
| av_log(NULL, AV_LOG_ERROR, "Yack, Error !!!\n"); | |||
| return -1; | |||
| } | |||
| while ((csnroffst + SNR_INC1) <= 63 && | |||
| bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, | |||
| @@ -815,19 +815,19 @@ static int AC3_encode_init(AVCodecContext *avctx) | |||
| int i, j, ch; | |||
| float alpha; | |||
| static const uint8_t acmod_defs[6] = { | |||
| 0x01, /* C */ | |||
| 0x02, /* L R */ | |||
| 0x03, /* L C R */ | |||
| 0x06, /* L R SL SR */ | |||
| 0x07, /* L C R SL SR */ | |||
| 0x07, /* L C R SL SR (+LFE) */ | |||
| 0x01, /* C */ | |||
| 0x02, /* L R */ | |||
| 0x03, /* L C R */ | |||
| 0x06, /* L R SL SR */ | |||
| 0x07, /* L C R SL SR */ | |||
| 0x07, /* L C R SL SR (+LFE) */ | |||
| }; | |||
| avctx->frame_size = AC3_FRAME_SIZE; | |||
| /* number of channels */ | |||
| if (channels < 1 || channels > 6) | |||
| return -1; | |||
| return -1; | |||
| s->acmod = acmod_defs[channels - 1]; | |||
| s->lfe = (channels == 6) ? 1 : 0; | |||
| s->nb_all_channels = channels; | |||
| @@ -871,7 +871,7 @@ static int AC3_encode_init(AVCodecContext *avctx) | |||
| s->nb_coefs[ch] = ((s->chbwcod[ch] + 12) * 3) + 37; | |||
| } | |||
| if (s->lfe) { | |||
| s->nb_coefs[s->lfe_channel] = 7; /* fixed */ | |||
| s->nb_coefs[s->lfe_channel] = 7; /* fixed */ | |||
| } | |||
| /* initial snr offset */ | |||
| s->csnroffst = 40; | |||
| @@ -907,9 +907,9 @@ static void output_frame_header(AC3EncodeContext *s, unsigned char *frame) | |||
| put_bits(&s->pb, 3, s->bsmod); | |||
| put_bits(&s->pb, 3, s->acmod); | |||
| if ((s->acmod & 0x01) && s->acmod != 0x01) | |||
| put_bits(&s->pb, 2, 1); /* XXX -4.5 dB */ | |||
| put_bits(&s->pb, 2, 1); /* XXX -4.5 dB */ | |||
| if (s->acmod & 0x04) | |||
| put_bits(&s->pb, 2, 1); /* XXX -6 dB */ | |||
| put_bits(&s->pb, 2, 1); /* XXX -6 dB */ | |||
| if (s->acmod == 0x02) | |||
| put_bits(&s->pb, 2, 0); /* surround not indicated */ | |||
| put_bits(&s->pb, 1, s->lfe); /* LFE */ | |||
| @@ -995,20 +995,20 @@ static void output_audio_block(AC3EncodeContext *s, | |||
| if (s->acmod == 2) | |||
| { | |||
| if(block_num==0) | |||
| { | |||
| /* first block must define rematrixing (rematstr) */ | |||
| put_bits(&s->pb, 1, 1); | |||
| /* dummy rematrixing rematflg(1:4)=0 */ | |||
| for (rbnd=0;rbnd<4;rbnd++) | |||
| put_bits(&s->pb, 1, 0); | |||
| } | |||
| else | |||
| { | |||
| /* no matrixing (but should be used in the future) */ | |||
| put_bits(&s->pb, 1, 0); | |||
| } | |||
| if(block_num==0) | |||
| { | |||
| /* first block must define rematrixing (rematstr) */ | |||
| put_bits(&s->pb, 1, 1); | |||
| /* dummy rematrixing rematflg(1:4)=0 */ | |||
| for (rbnd=0;rbnd<4;rbnd++) | |||
| put_bits(&s->pb, 1, 0); | |||
| } | |||
| else | |||
| { | |||
| /* no matrixing (but should be used in the future) */ | |||
| put_bits(&s->pb, 1, 0); | |||
| } | |||
| } | |||
| #if defined(DEBUG) | |||
| @@ -1023,7 +1023,7 @@ static void output_audio_block(AC3EncodeContext *s, | |||
| } | |||
| if (s->lfe) { | |||
| put_bits(&s->pb, 1, exp_strategy[s->lfe_channel]); | |||
| put_bits(&s->pb, 1, exp_strategy[s->lfe_channel]); | |||
| } | |||
| for(ch=0;ch<s->nb_channels;ch++) { | |||
| @@ -1047,7 +1047,7 @@ static void output_audio_block(AC3EncodeContext *s, | |||
| group_size = 4; | |||
| break; | |||
| } | |||
| nb_groups = (s->nb_coefs[ch] + (group_size * 3) - 4) / (3 * group_size); | |||
| nb_groups = (s->nb_coefs[ch] + (group_size * 3) - 4) / (3 * group_size); | |||
| p = encoded_exp[ch]; | |||
| /* first exponent */ | |||
| @@ -1075,8 +1075,8 @@ static void output_audio_block(AC3EncodeContext *s, | |||
| put_bits(&s->pb, 7, ((delta0 * 5 + delta1) * 5) + delta2); | |||
| } | |||
| if (ch != s->lfe_channel) | |||
| put_bits(&s->pb, 2, 0); /* no gain range info */ | |||
| if (ch != s->lfe_channel) | |||
| put_bits(&s->pb, 2, 0); /* no gain range info */ | |||
| } | |||
| /* bit allocation info */ | |||
| @@ -300,7 +300,7 @@ static inline unsigned char adpcm_yamaha_compress_sample(ADPCMChannelStatus *c, | |||
| } | |||
| static int adpcm_encode_frame(AVCodecContext *avctx, | |||
| unsigned char *frame, int buf_size, void *data) | |||
| unsigned char *frame, int buf_size, void *data) | |||
| { | |||
| int n, i, st; | |||
| short *samples; | |||
| @@ -431,8 +431,8 @@ static int adpcm_decode_init(AVCodecContext * avctx) | |||
| switch(avctx->codec->id) { | |||
| case CODEC_ID_ADPCM_CT: | |||
| c->status[0].step = c->status[1].step = 511; | |||
| break; | |||
| c->status[0].step = c->status[1].step = 511; | |||
| break; | |||
| default: | |||
| break; | |||
| } | |||
| @@ -498,16 +498,16 @@ static inline short adpcm_ct_expand_nibble(ADPCMChannelStatus *c, char nibble) | |||
| predictor = c->predictor; | |||
| /* predictor update is not so trivial: predictor is multiplied on 254/256 before updating */ | |||
| if(sign) | |||
| predictor = ((predictor * 254) >> 8) - diff; | |||
| predictor = ((predictor * 254) >> 8) - diff; | |||
| else | |||
| predictor = ((predictor * 254) >> 8) + diff; | |||
| predictor = ((predictor * 254) >> 8) + diff; | |||
| /* calculate new step and clamp it to range 511..32767 */ | |||
| new_step = (ct_adpcm_table[nibble & 7] * c->step) >> 8; | |||
| c->step = new_step; | |||
| if(c->step < 511) | |||
| c->step = 511; | |||
| c->step = 511; | |||
| if(c->step > 32767) | |||
| c->step = 32767; | |||
| c->step = 32767; | |||
| CLAMP_TO_SHORT(predictor); | |||
| c->predictor = predictor; | |||
| @@ -612,8 +612,8 @@ static void xa_decode(short *out, const unsigned char *in, | |||
| } | |||
| static int adpcm_decode_frame(AVCodecContext *avctx, | |||
| void *data, int *data_size, | |||
| uint8_t *buf, int buf_size) | |||
| void *data, int *data_size, | |||
| uint8_t *buf, int buf_size) | |||
| { | |||
| ADPCMContext *c = avctx->priv_data; | |||
| ADPCMChannelStatus *cs; | |||
| @@ -701,7 +701,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx, | |||
| cs->predictor -= 0x10000; | |||
| CLAMP_TO_SHORT(cs->predictor); | |||
| // XXX: is this correct ??: *samples++ = cs->predictor; | |||
| // XXX: is this correct ??: *samples++ = cs->predictor; | |||
| cs->step_index = *src++; | |||
| if (cs->step_index < 0) cs->step_index = 0; | |||
| @@ -710,19 +710,19 @@ static int adpcm_decode_frame(AVCodecContext *avctx, | |||
| } | |||
| for(m=4; src < (buf + buf_size);) { | |||
| *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[0] & 0x0F, 3); | |||
| *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[0] & 0x0F, 3); | |||
| if (st) | |||
| *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[4] & 0x0F, 3); | |||
| *samples++ = adpcm_ima_expand_nibble(&c->status[0], (src[0] >> 4) & 0x0F, 3); | |||
| if (st) { | |||
| if (st) { | |||
| *samples++ = adpcm_ima_expand_nibble(&c->status[1], (src[4] >> 4) & 0x0F, 3); | |||
| if (!--m) { | |||
| m=4; | |||
| src+=4; | |||
| } | |||
| } | |||
| src++; | |||
| } | |||
| if (!--m) { | |||
| m=4; | |||
| src+=4; | |||
| } | |||
| } | |||
| src++; | |||
| } | |||
| break; | |||
| case CODEC_ID_ADPCM_4XM: | |||
| cs = &(c->status[0]); | |||
| @@ -739,13 +739,13 @@ static int adpcm_decode_frame(AVCodecContext *avctx, | |||
| m= (buf_size - (src - buf))>>st; | |||
| for(i=0; i<m; i++) { | |||
| *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] & 0x0F, 4); | |||
| *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] & 0x0F, 4); | |||
| if (st) | |||
| *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] & 0x0F, 4); | |||
| *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] >> 4, 4); | |||
| if (st) | |||
| if (st) | |||
| *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] >> 4, 4); | |||
| } | |||
| } | |||
| src += m<<st; | |||
| @@ -958,7 +958,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx, | |||
| } | |||
| break; | |||
| case CODEC_ID_ADPCM_CT: | |||
| while (src < buf + buf_size) { | |||
| while (src < buf + buf_size) { | |||
| if (st) { | |||
| *samples++ = adpcm_ct_expand_nibble(&c->status[0], | |||
| (src[0] >> 4) & 0x0F); | |||
| @@ -970,78 +970,78 @@ static int adpcm_decode_frame(AVCodecContext *avctx, | |||
| *samples++ = adpcm_ct_expand_nibble(&c->status[0], | |||
| src[0] & 0x0F); | |||
| } | |||
| src++; | |||
| src++; | |||
| } | |||
| break; | |||
| case CODEC_ID_ADPCM_SWF: | |||
| { | |||
| GetBitContext gb; | |||
| const int *table; | |||
| int k0, signmask; | |||
| int size = buf_size*8; | |||
| init_get_bits(&gb, buf, size); | |||
| // first frame, read bits & inital values | |||
| if (!c->nb_bits) | |||
| { | |||
| c->nb_bits = get_bits(&gb, 2)+2; | |||
| // av_log(NULL,AV_LOG_INFO,"nb_bits: %d\n", c->nb_bits); | |||
| } | |||
| table = swf_index_tables[c->nb_bits-2]; | |||
| k0 = 1 << (c->nb_bits-2); | |||
| signmask = 1 << (c->nb_bits-1); | |||
| while (get_bits_count(&gb) <= size) | |||
| { | |||
| int i; | |||
| c->nb_samples++; | |||
| // wrap around at every 4096 samples... | |||
| if ((c->nb_samples & 0xfff) == 1) | |||
| { | |||
| for (i = 0; i <= st; i++) | |||
| { | |||
| *samples++ = c->status[i].predictor = get_sbits(&gb, 16); | |||
| c->status[i].step_index = get_bits(&gb, 6); | |||
| } | |||
| } | |||
| // similar to IMA adpcm | |||
| for (i = 0; i <= st; i++) | |||
| { | |||
| int delta = get_bits(&gb, c->nb_bits); | |||
| int step = step_table[c->status[i].step_index]; | |||
| long vpdiff = 0; // vpdiff = (delta+0.5)*step/4 | |||
| int k = k0; | |||
| do { | |||
| if (delta & k) | |||
| vpdiff += step; | |||
| step >>= 1; | |||
| k >>= 1; | |||
| } while(k); | |||
| vpdiff += step; | |||
| if (delta & signmask) | |||
| c->status[i].predictor -= vpdiff; | |||
| else | |||
| c->status[i].predictor += vpdiff; | |||
| c->status[i].step_index += table[delta & (~signmask)]; | |||
| c->status[i].step_index = clip(c->status[i].step_index, 0, 88); | |||
| c->status[i].predictor = clip(c->status[i].predictor, -32768, 32767); | |||
| *samples++ = c->status[i].predictor; | |||
| } | |||
| } | |||
| // src += get_bits_count(&gb)*8; | |||
| src += size; | |||
| break; | |||
| GetBitContext gb; | |||
| const int *table; | |||
| int k0, signmask; | |||
| int size = buf_size*8; | |||
| init_get_bits(&gb, buf, size); | |||
| // first frame, read bits & inital values | |||
| if (!c->nb_bits) | |||
| { | |||
| c->nb_bits = get_bits(&gb, 2)+2; | |||
| // av_log(NULL,AV_LOG_INFO,"nb_bits: %d\n", c->nb_bits); | |||
| } | |||
| table = swf_index_tables[c->nb_bits-2]; | |||
| k0 = 1 << (c->nb_bits-2); | |||
| signmask = 1 << (c->nb_bits-1); | |||
| while (get_bits_count(&gb) <= size) | |||
| { | |||
| int i; | |||
| c->nb_samples++; | |||
| // wrap around at every 4096 samples... | |||
| if ((c->nb_samples & 0xfff) == 1) | |||
| { | |||
| for (i = 0; i <= st; i++) | |||
| { | |||
| *samples++ = c->status[i].predictor = get_sbits(&gb, 16); | |||
| c->status[i].step_index = get_bits(&gb, 6); | |||
| } | |||
| } | |||
| // similar to IMA adpcm | |||
| for (i = 0; i <= st; i++) | |||
| { | |||
| int delta = get_bits(&gb, c->nb_bits); | |||
| int step = step_table[c->status[i].step_index]; | |||
| long vpdiff = 0; // vpdiff = (delta+0.5)*step/4 | |||
| int k = k0; | |||
| do { | |||
| if (delta & k) | |||
| vpdiff += step; | |||
| step >>= 1; | |||
| k >>= 1; | |||
| } while(k); | |||
| vpdiff += step; | |||
| if (delta & signmask) | |||
| c->status[i].predictor -= vpdiff; | |||
| else | |||
| c->status[i].predictor += vpdiff; | |||
| c->status[i].step_index += table[delta & (~signmask)]; | |||
| c->status[i].step_index = clip(c->status[i].step_index, 0, 88); | |||
| c->status[i].predictor = clip(c->status[i].predictor, -32768, 32767); | |||
| *samples++ = c->status[i].predictor; | |||
| } | |||
| } | |||
| // src += get_bits_count(&gb)*8; | |||
| src += size; | |||
| break; | |||
| } | |||
| case CODEC_ID_ADPCM_YAMAHA: | |||
| while (src < buf + buf_size) { | |||
| @@ -35,7 +35,7 @@ void avcodec_register_all(void) | |||
| static int inited = 0; | |||
| if (inited != 0) | |||
| return; | |||
| return; | |||
| inited = 1; | |||
| /* encoders */ | |||
| @@ -84,24 +84,24 @@ static inline uint64_t WORD_VEC(uint64_t x) | |||
| } *) (p))->__l) = l; \ | |||
| } while (0) | |||
| struct unaligned_long { uint64_t l; } __attribute__((packed)); | |||
| #define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul)) | |||
| #define uldq(a) (((const struct unaligned_long *) (a))->l) | |||
| #define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul)) | |||
| #define uldq(a) (((const struct unaligned_long *) (a))->l) | |||
| #if GNUC_PREREQ(3,3) | |||
| #define prefetch(p) __builtin_prefetch((p), 0, 1) | |||
| #define prefetch_en(p) __builtin_prefetch((p), 0, 0) | |||
| #define prefetch_m(p) __builtin_prefetch((p), 1, 1) | |||
| #define prefetch_men(p) __builtin_prefetch((p), 1, 0) | |||
| #define cmpbge __builtin_alpha_cmpbge | |||
| #define cmpbge __builtin_alpha_cmpbge | |||
| /* Avoid warnings. */ | |||
| #define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b)) | |||
| #define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b)) | |||
| #define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b)) | |||
| #define zap __builtin_alpha_zap | |||
| #define zapnot __builtin_alpha_zapnot | |||
| #define amask __builtin_alpha_amask | |||
| #define implver __builtin_alpha_implver | |||
| #define rpcc __builtin_alpha_rpcc | |||
| #define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b)) | |||
| #define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b)) | |||
| #define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b)) | |||
| #define zap __builtin_alpha_zap | |||
| #define zapnot __builtin_alpha_zapnot | |||
| #define amask __builtin_alpha_amask | |||
| #define implver __builtin_alpha_implver | |||
| #define rpcc __builtin_alpha_rpcc | |||
| #else | |||
| #define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory") | |||
| #define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory") | |||
| @@ -113,26 +113,26 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); | |||
| #define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) | |||
| #define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) | |||
| #define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) | |||
| #define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; }) | |||
| #define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; }) | |||
| #define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; }) | |||
| #define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; }) | |||
| #define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; }) | |||
| #define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; }) | |||
| #endif | |||
| #define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory") | |||
| #if GNUC_PREREQ(3,3) && defined(__alpha_max__) | |||
| #define minub8 __builtin_alpha_minub8 | |||
| #define minsb8 __builtin_alpha_minsb8 | |||
| #define minuw4 __builtin_alpha_minuw4 | |||
| #define minsw4 __builtin_alpha_minsw4 | |||
| #define maxub8 __builtin_alpha_maxub8 | |||
| #define maxsb8 __builtin_alpha_maxsb8 | |||
| #define maxuw4 __builtin_alpha_maxuw4 | |||
| #define maxsw4 __builtin_alpha_maxsw4 | |||
| #define perr __builtin_alpha_perr | |||
| #define pklb __builtin_alpha_pklb | |||
| #define pkwb __builtin_alpha_pkwb | |||
| #define unpkbl __builtin_alpha_unpkbl | |||
| #define unpkbw __builtin_alpha_unpkbw | |||
| #define minub8 __builtin_alpha_minub8 | |||
| #define minsb8 __builtin_alpha_minsb8 | |||
| #define minuw4 __builtin_alpha_minuw4 | |||
| #define minsw4 __builtin_alpha_minsw4 | |||
| #define maxub8 __builtin_alpha_maxub8 | |||
| #define maxsb8 __builtin_alpha_maxsb8 | |||
| #define maxuw4 __builtin_alpha_maxuw4 | |||
| #define maxsw4 __builtin_alpha_maxsw4 | |||
| #define perr __builtin_alpha_perr | |||
| #define pklb __builtin_alpha_pklb | |||
| #define pkwb __builtin_alpha_pkwb | |||
| #define unpkbl __builtin_alpha_unpkbl | |||
| #define unpkbw __builtin_alpha_unpkbw | |||
| #else | |||
| #define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | |||
| #define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | |||
| @@ -143,13 +143,13 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); | |||
| #define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | |||
| #define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | |||
| #define perr(a, b) ({ uint64_t __r; asm (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; }) | |||
| #define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |||
| #define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |||
| #define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |||
| #define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |||
| #define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |||
| #define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |||
| #define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |||
| #define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |||
| #endif | |||
| #elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */ | |||
| #elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */ | |||
| #include <c_asm.h> | |||
| #define ldq(p) (*(const uint64_t *) (p)) | |||
| @@ -157,7 +157,7 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); | |||
| #define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0) | |||
| #define stl(l, p) do { *(int32_t *) (p) = (l); } while (0) | |||
| #define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a) | |||
| #define uldq(a) (*(const __unaligned uint64_t *) (a)) | |||
| #define uldq(a) (*(const __unaligned uint64_t *) (a)) | |||
| #define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b) | |||
| #define extql(a, b) asm ("extql %a0,%a1,%v0", a, b) | |||
| #define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b) | |||
| @@ -166,7 +166,7 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); | |||
| #define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b) | |||
| #define amask(a) asm ("amask %a0,%v0", a) | |||
| #define implver() asm ("implver %v0") | |||
| #define rpcc() asm ("rpcc %v0") | |||
| #define rpcc() asm ("rpcc %v0") | |||
| #define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b) | |||
| #define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b) | |||
| #define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b) | |||
| @@ -71,7 +71,7 @@ $unaligned: | |||
| addq a1, a2, a1 | |||
| nop | |||
| ldq_u t4, 0(a1) | |||
| ldq_u t4, 0(a1) | |||
| ldq_u t5, 8(a1) | |||
| addq a1, a2, a1 | |||
| nop | |||
| @@ -120,20 +120,20 @@ $aligned: | |||
| addq a1, a2, a1 | |||
| ldq t3, 0(a1) | |||
| addq a0, a2, t4 | |||
| addq a1, a2, a1 | |||
| addq t4, a2, t5 | |||
| subq a3, 4, a3 | |||
| addq a0, a2, t4 | |||
| addq a1, a2, a1 | |||
| addq t4, a2, t5 | |||
| subq a3, 4, a3 | |||
| stq t0, 0(a0) | |||
| addq t5, a2, t6 | |||
| stq t1, 0(t4) | |||
| addq t6, a2, a0 | |||
| stq t0, 0(a0) | |||
| addq t5, a2, t6 | |||
| stq t1, 0(t4) | |||
| addq t6, a2, a0 | |||
| stq t2, 0(t5) | |||
| stq t3, 0(t6) | |||
| stq t2, 0(t5) | |||
| stq t3, 0(t6) | |||
| bne a3, $aligned | |||
| bne a3, $aligned | |||
| ret | |||
| .end put_pixels_axp_asm | |||
| @@ -116,7 +116,7 @@ int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| return result; | |||
| } | |||
| #if 0 /* now done in assembly */ | |||
| #if 0 /* now done in assembly */ | |||
| int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) | |||
| { | |||
| int result = 0; | |||
| @@ -285,7 +285,7 @@ void simple_idct_axp(DCTELEM *block) | |||
| stq(v, block + 1 * 4); | |||
| stq(w, block + 2 * 4); | |||
| stq(w, block + 3 * 4); | |||
| block += 4 * 4; | |||
| block += 4 * 4; | |||
| } | |||
| } else { | |||
| for (i = 0; i < 8; i++) | |||
| @@ -301,7 +301,7 @@ static int amr_nb_decode_frame(AVCodecContext * avctx, | |||
| static int amr_nb_encode_frame(AVCodecContext *avctx, | |||
| unsigned char *frame/*out*/, int buf_size, void *data/*in*/) | |||
| unsigned char *frame/*out*/, int buf_size, void *data/*in*/) | |||
| { | |||
| short serial_data[250] = {0}; | |||
| @@ -440,7 +440,7 @@ static int amr_nb_decode_frame(AVCodecContext * avctx, | |||
| } | |||
| static int amr_nb_encode_frame(AVCodecContext *avctx, | |||
| unsigned char *frame/*out*/, int buf_size, void *data/*in*/) | |||
| unsigned char *frame/*out*/, int buf_size, void *data/*in*/) | |||
| { | |||
| AMRContext *s = (AMRContext*)avctx->priv_data; | |||
| int written; | |||
| @@ -584,7 +584,7 @@ static int amr_wb_encode_close(AVCodecContext * avctx) | |||
| } | |||
| static int amr_wb_encode_frame(AVCodecContext *avctx, | |||
| unsigned char *frame/*out*/, int buf_size, void *data/*in*/) | |||
| unsigned char *frame/*out*/, int buf_size, void *data/*in*/) | |||
| { | |||
| AMRWBContext *s = (AMRWBContext*) avctx->priv_data; | |||
| int size = E_IF_encode(s->state, s->mode, data, frame, s->allow_dtx); | |||
| @@ -205,13 +205,13 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx) | |||
| #endif | |||
| c->idct_put= j_rev_dct_ARM_put; | |||
| c->idct_add= j_rev_dct_ARM_add; | |||
| c->idct = j_rev_dct_ARM; | |||
| c->idct = j_rev_dct_ARM; | |||
| c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */ | |||
| } else if (idct_algo==FF_IDCT_SIMPLEARM){ | |||
| c->idct_put= simple_idct_ARM_put; | |||
| c->idct_add= simple_idct_ARM_add; | |||
| c->idct = simple_idct_ARM; | |||
| c->idct_permutation_type= FF_NO_IDCT_PERM; | |||
| c->idct_put= simple_idct_ARM_put; | |||
| c->idct_add= simple_idct_ARM_add; | |||
| c->idct = simple_idct_ARM; | |||
| c->idct_permutation_type= FF_NO_IDCT_PERM; | |||
| #ifdef HAVE_IPP | |||
| } else if (idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_IPP){ | |||
| #else | |||
| @@ -138,10 +138,10 @@ void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx) | |||
| mm_flags = mm_support(); | |||
| if (avctx->dsp_mask) { | |||
| if (avctx->dsp_mask & FF_MM_FORCE) | |||
| mm_flags |= (avctx->dsp_mask & 0xffff); | |||
| else | |||
| mm_flags &= ~(avctx->dsp_mask & 0xffff); | |||
| if (avctx->dsp_mask & FF_MM_FORCE) | |||
| mm_flags |= (avctx->dsp_mask & 0xffff); | |||
| else | |||
| mm_flags &= ~(avctx->dsp_mask & 0xffff); | |||
| } | |||
| if (!(mm_flags & MM_IWMMXT)) return; | |||
| @@ -1,6 +1,6 @@ | |||
| /* | |||
| C-like prototype : | |||
| void j_rev_dct_ARM(DCTBLOCK data) | |||
| void j_rev_dct_ARM(DCTBLOCK data) | |||
| With DCTBLOCK being a pointer to an array of 64 'signed shorts' | |||
| @@ -51,336 +51,336 @@ | |||
| #define FIX_M_1_961570560_ID 40 | |||
| #define FIX_M_2_562915447_ID 44 | |||
| #define FIX_0xFFFF_ID 48 | |||
| .text | |||
| .align | |||
| .text | |||
| .align | |||
| .global j_rev_dct_ARM | |||
| .global j_rev_dct_ARM | |||
| j_rev_dct_ARM: | |||
| stmdb sp!, { r4 - r12, lr } @ all callee saved regs | |||
| stmdb sp!, { r4 - r12, lr } @ all callee saved regs | |||
| sub sp, sp, #4 @ reserve some space on the stack | |||
| str r0, [ sp ] @ save the DCT pointer to the stack | |||
| sub sp, sp, #4 @ reserve some space on the stack | |||
| str r0, [ sp ] @ save the DCT pointer to the stack | |||
| mov lr, r0 @ lr = pointer to the current row | |||
| mov r12, #8 @ r12 = row-counter | |||
| add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array | |||
| mov lr, r0 @ lr = pointer to the current row | |||
| mov r12, #8 @ r12 = row-counter | |||
| add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array | |||
| row_loop: | |||
| ldrsh r0, [lr, # 0] @ r0 = 'd0' | |||
| ldrsh r1, [lr, # 8] @ r1 = 'd1' | |||
| @ Optimization for row that have all items except the first set to 0 | |||
| @ (this works as the DCTELEMS are always 4-byte aligned) | |||
| ldr r5, [lr, # 0] | |||
| ldr r2, [lr, # 4] | |||
| ldr r3, [lr, # 8] | |||
| ldr r4, [lr, #12] | |||
| orr r3, r3, r4 | |||
| orr r3, r3, r2 | |||
| orrs r5, r3, r5 | |||
| beq end_of_row_loop @ nothing to be done as ALL of them are '0' | |||
| orrs r2, r3, r1 | |||
| beq empty_row | |||
| ldrsh r2, [lr, # 2] @ r2 = 'd2' | |||
| ldrsh r4, [lr, # 4] @ r4 = 'd4' | |||
| ldrsh r6, [lr, # 6] @ r6 = 'd6' | |||
| ldr r3, [r11, #FIX_0_541196100_ID] | |||
| add r7, r2, r6 | |||
| ldr r5, [r11, #FIX_M_1_847759065_ID] | |||
| mul r7, r3, r7 @ r7 = z1 | |||
| ldr r3, [r11, #FIX_0_765366865_ID] | |||
| mla r6, r5, r6, r7 @ r6 = tmp2 | |||
| add r5, r0, r4 @ r5 = tmp0 | |||
| mla r2, r3, r2, r7 @ r2 = tmp3 | |||
| sub r3, r0, r4 @ r3 = tmp1 | |||
| add r0, r2, r5, lsl #13 @ r0 = tmp10 | |||
| rsb r2, r2, r5, lsl #13 @ r2 = tmp13 | |||
| add r4, r6, r3, lsl #13 @ r4 = tmp11 | |||
| rsb r3, r6, r3, lsl #13 @ r3 = tmp12 | |||
| stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 | |||
| ldrsh r3, [lr, #10] @ r3 = 'd3' | |||
| ldrsh r5, [lr, #12] @ r5 = 'd5' | |||
| ldrsh r7, [lr, #14] @ r7 = 'd7' | |||
| add r0, r3, r5 @ r0 = 'z2' | |||
| add r2, r1, r7 @ r2 = 'z1' | |||
| add r4, r3, r7 @ r4 = 'z3' | |||
| add r6, r1, r5 @ r6 = 'z4' | |||
| ldr r9, [r11, #FIX_1_175875602_ID] | |||
| add r8, r4, r6 @ r8 = z3 + z4 | |||
| ldr r10, [r11, #FIX_M_0_899976223_ID] | |||
| mul r8, r9, r8 @ r8 = 'z5' | |||
| ldr r9, [r11, #FIX_M_2_562915447_ID] | |||
| mul r2, r10, r2 @ r2 = 'z1' | |||
| ldr r10, [r11, #FIX_M_1_961570560_ID] | |||
| mul r0, r9, r0 @ r0 = 'z2' | |||
| ldr r9, [r11, #FIX_M_0_390180644_ID] | |||
| mla r4, r10, r4, r8 @ r4 = 'z3' | |||
| ldr r10, [r11, #FIX_0_298631336_ID] | |||
| mla r6, r9, r6, r8 @ r6 = 'z4' | |||
| ldr r9, [r11, #FIX_2_053119869_ID] | |||
| mla r7, r10, r7, r2 @ r7 = tmp0 + z1 | |||
| ldr r10, [r11, #FIX_3_072711026_ID] | |||
| mla r5, r9, r5, r0 @ r5 = tmp1 + z2 | |||
| ldr r9, [r11, #FIX_1_501321110_ID] | |||
| mla r3, r10, r3, r0 @ r3 = tmp2 + z2 | |||
| add r7, r7, r4 @ r7 = tmp0 | |||
| mla r1, r9, r1, r2 @ r1 = tmp3 + z1 | |||
| add r5, r5, r6 @ r5 = tmp1 | |||
| add r3, r3, r4 @ r3 = tmp2 | |||
| add r1, r1, r6 @ r1 = tmp3 | |||
| ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 | |||
| @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 | |||
| @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) | |||
| add r8, r0, r1 | |||
| add r8, r8, #(1<<10) | |||
| mov r8, r8, asr #11 | |||
| strh r8, [lr, # 0] | |||
| @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) | |||
| sub r8, r0, r1 | |||
| add r8, r8, #(1<<10) | |||
| mov r8, r8, asr #11 | |||
| strh r8, [lr, #14] | |||
| @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) | |||
| add r8, r6, r3 | |||
| add r8, r8, #(1<<10) | |||
| mov r8, r8, asr #11 | |||
| strh r8, [lr, # 2] | |||
| @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) | |||
| sub r8, r6, r3 | |||
| add r8, r8, #(1<<10) | |||
| mov r8, r8, asr #11 | |||
| strh r8, [lr, #12] | |||
| @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) | |||
| add r8, r4, r5 | |||
| add r8, r8, #(1<<10) | |||
| mov r8, r8, asr #11 | |||
| strh r8, [lr, # 4] | |||
| @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) | |||
| sub r8, r4, r5 | |||
| add r8, r8, #(1<<10) | |||
| mov r8, r8, asr #11 | |||
| strh r8, [lr, #10] | |||
| @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) | |||
| add r8, r2, r7 | |||
| add r8, r8, #(1<<10) | |||
| mov r8, r8, asr #11 | |||
| strh r8, [lr, # 6] | |||
| @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) | |||
| sub r8, r2, r7 | |||
| add r8, r8, #(1<<10) | |||
| mov r8, r8, asr #11 | |||
| strh r8, [lr, # 8] | |||
| @ End of row loop | |||
| add lr, lr, #16 | |||
| subs r12, r12, #1 | |||
| bne row_loop | |||
| beq start_column_loop | |||
| ldrsh r0, [lr, # 0] @ r0 = 'd0' | |||
| ldrsh r1, [lr, # 8] @ r1 = 'd1' | |||
| @ Optimization for row that have all items except the first set to 0 | |||
| @ (this works as the DCTELEMS are always 4-byte aligned) | |||
| ldr r5, [lr, # 0] | |||
| ldr r2, [lr, # 4] | |||
| ldr r3, [lr, # 8] | |||
| ldr r4, [lr, #12] | |||
| orr r3, r3, r4 | |||
| orr r3, r3, r2 | |||
| orrs r5, r3, r5 | |||
| beq end_of_row_loop @ nothing to be done as ALL of them are '0' | |||
| orrs r2, r3, r1 | |||
| beq empty_row | |||
| ldrsh r2, [lr, # 2] @ r2 = 'd2' | |||
| ldrsh r4, [lr, # 4] @ r4 = 'd4' | |||
| ldrsh r6, [lr, # 6] @ r6 = 'd6' | |||
| ldr r3, [r11, #FIX_0_541196100_ID] | |||
| add r7, r2, r6 | |||
| ldr r5, [r11, #FIX_M_1_847759065_ID] | |||
| mul r7, r3, r7 @ r7 = z1 | |||
| ldr r3, [r11, #FIX_0_765366865_ID] | |||
| mla r6, r5, r6, r7 @ r6 = tmp2 | |||
| add r5, r0, r4 @ r5 = tmp0 | |||
| mla r2, r3, r2, r7 @ r2 = tmp3 | |||
| sub r3, r0, r4 @ r3 = tmp1 | |||
| add r0, r2, r5, lsl #13 @ r0 = tmp10 | |||
| rsb r2, r2, r5, lsl #13 @ r2 = tmp13 | |||
| add r4, r6, r3, lsl #13 @ r4 = tmp11 | |||
| rsb r3, r6, r3, lsl #13 @ r3 = tmp12 | |||
| stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 | |||
| ldrsh r3, [lr, #10] @ r3 = 'd3' | |||
| ldrsh r5, [lr, #12] @ r5 = 'd5' | |||
| ldrsh r7, [lr, #14] @ r7 = 'd7' | |||
| add r0, r3, r5 @ r0 = 'z2' | |||
| add r2, r1, r7 @ r2 = 'z1' | |||
| add r4, r3, r7 @ r4 = 'z3' | |||
| add r6, r1, r5 @ r6 = 'z4' | |||
| ldr r9, [r11, #FIX_1_175875602_ID] | |||
| add r8, r4, r6 @ r8 = z3 + z4 | |||
| ldr r10, [r11, #FIX_M_0_899976223_ID] | |||
| mul r8, r9, r8 @ r8 = 'z5' | |||
| ldr r9, [r11, #FIX_M_2_562915447_ID] | |||
| mul r2, r10, r2 @ r2 = 'z1' | |||
| ldr r10, [r11, #FIX_M_1_961570560_ID] | |||
| mul r0, r9, r0 @ r0 = 'z2' | |||
| ldr r9, [r11, #FIX_M_0_390180644_ID] | |||
| mla r4, r10, r4, r8 @ r4 = 'z3' | |||
| ldr r10, [r11, #FIX_0_298631336_ID] | |||
| mla r6, r9, r6, r8 @ r6 = 'z4' | |||
| ldr r9, [r11, #FIX_2_053119869_ID] | |||
| mla r7, r10, r7, r2 @ r7 = tmp0 + z1 | |||
| ldr r10, [r11, #FIX_3_072711026_ID] | |||
| mla r5, r9, r5, r0 @ r5 = tmp1 + z2 | |||
| ldr r9, [r11, #FIX_1_501321110_ID] | |||
| mla r3, r10, r3, r0 @ r3 = tmp2 + z2 | |||
| add r7, r7, r4 @ r7 = tmp0 | |||
| mla r1, r9, r1, r2 @ r1 = tmp3 + z1 | |||
| add r5, r5, r6 @ r5 = tmp1 | |||
| add r3, r3, r4 @ r3 = tmp2 | |||
| add r1, r1, r6 @ r1 = tmp3 | |||
| ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 | |||
| @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 | |||
| @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) | |||
| add r8, r0, r1 | |||
| add r8, r8, #(1<<10) | |||
| mov r8, r8, asr #11 | |||
| strh r8, [lr, # 0] | |||
| @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) | |||
| sub r8, r0, r1 | |||
| add r8, r8, #(1<<10) | |||
| mov r8, r8, asr #11 | |||
| strh r8, [lr, #14] | |||
| @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) | |||
| add r8, r6, r3 | |||
| add r8, r8, #(1<<10) | |||
| mov r8, r8, asr #11 | |||
| strh r8, [lr, # 2] | |||
| @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) | |||
| sub r8, r6, r3 | |||
| add r8, r8, #(1<<10) | |||
| mov r8, r8, asr #11 | |||
| strh r8, [lr, #12] | |||
| @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) | |||
| add r8, r4, r5 | |||
| add r8, r8, #(1<<10) | |||
| mov r8, r8, asr #11 | |||
| strh r8, [lr, # 4] | |||
| @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) | |||
| sub r8, r4, r5 | |||
| add r8, r8, #(1<<10) | |||
| mov r8, r8, asr #11 | |||
| strh r8, [lr, #10] | |||
| @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) | |||
| add r8, r2, r7 | |||
| add r8, r8, #(1<<10) | |||
| mov r8, r8, asr #11 | |||
| strh r8, [lr, # 6] | |||
| @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) | |||
| sub r8, r2, r7 | |||
| add r8, r8, #(1<<10) | |||
| mov r8, r8, asr #11 | |||
| strh r8, [lr, # 8] | |||
| @ End of row loop | |||
| add lr, lr, #16 | |||
| subs r12, r12, #1 | |||
| bne row_loop | |||
| beq start_column_loop | |||
| empty_row: | |||
| ldr r1, [r11, #FIX_0xFFFF_ID] | |||
| mov r0, r0, lsl #2 | |||
| and r0, r0, r1 | |||
| add r0, r0, r0, lsl #16 | |||
| str r0, [lr, # 0] | |||
| str r0, [lr, # 4] | |||
| str r0, [lr, # 8] | |||
| str r0, [lr, #12] | |||
| ldr r1, [r11, #FIX_0xFFFF_ID] | |||
| mov r0, r0, lsl #2 | |||
| and r0, r0, r1 | |||
| add r0, r0, r0, lsl #16 | |||
| str r0, [lr, # 0] | |||
| str r0, [lr, # 4] | |||
| str r0, [lr, # 8] | |||
| str r0, [lr, #12] | |||
| end_of_row_loop: | |||
| @ End of loop | |||
| add lr, lr, #16 | |||
| subs r12, r12, #1 | |||
| bne row_loop | |||
| @ End of loop | |||
| add lr, lr, #16 | |||
| subs r12, r12, #1 | |||
| bne row_loop | |||
| start_column_loop: | |||
| @ Start of column loop | |||
| ldr lr, [ sp ] | |||
| mov r12, #8 | |||
| @ Start of column loop | |||
| ldr lr, [ sp ] | |||
| mov r12, #8 | |||
| column_loop: | |||
| ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' | |||
| ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' | |||
| ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' | |||
| ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' | |||
| ldr r3, [r11, #FIX_0_541196100_ID] | |||
| add r1, r2, r6 | |||
| ldr r5, [r11, #FIX_M_1_847759065_ID] | |||
| mul r1, r3, r1 @ r1 = z1 | |||
| ldr r3, [r11, #FIX_0_765366865_ID] | |||
| mla r6, r5, r6, r1 @ r6 = tmp2 | |||
| add r5, r0, r4 @ r5 = tmp0 | |||
| mla r2, r3, r2, r1 @ r2 = tmp3 | |||
| sub r3, r0, r4 @ r3 = tmp1 | |||
| add r0, r2, r5, lsl #13 @ r0 = tmp10 | |||
| rsb r2, r2, r5, lsl #13 @ r2 = tmp13 | |||
| add r4, r6, r3, lsl #13 @ r4 = tmp11 | |||
| rsb r6, r6, r3, lsl #13 @ r6 = tmp12 | |||
| ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' | |||
| ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' | |||
| ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' | |||
| ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' | |||
| @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) | |||
| orr r9, r1, r3 | |||
| orr r10, r5, r7 | |||
| orrs r10, r9, r10 | |||
| beq empty_odd_column | |||
| stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 | |||
| add r0, r3, r5 @ r0 = 'z2' | |||
| add r2, r1, r7 @ r2 = 'z1' | |||
| add r4, r3, r7 @ r4 = 'z3' | |||
| add r6, r1, r5 @ r6 = 'z4' | |||
| ldr r9, [r11, #FIX_1_175875602_ID] | |||
| add r8, r4, r6 | |||
| ldr r10, [r11, #FIX_M_0_899976223_ID] | |||
| mul r8, r9, r8 @ r8 = 'z5' | |||
| ldr r9, [r11, #FIX_M_2_562915447_ID] | |||
| mul r2, r10, r2 @ r2 = 'z1' | |||
| ldr r10, [r11, #FIX_M_1_961570560_ID] | |||
| mul r0, r9, r0 @ r0 = 'z2' | |||
| ldr r9, [r11, #FIX_M_0_390180644_ID] | |||
| mla r4, r10, r4, r8 @ r4 = 'z3' | |||
| ldr r10, [r11, #FIX_0_298631336_ID] | |||
| mla r6, r9, r6, r8 @ r6 = 'z4' | |||
| ldr r9, [r11, #FIX_2_053119869_ID] | |||
| mla r7, r10, r7, r2 @ r7 = tmp0 + z1 | |||
| ldr r10, [r11, #FIX_3_072711026_ID] | |||
| mla r5, r9, r5, r0 @ r5 = tmp1 + z2 | |||
| ldr r9, [r11, #FIX_1_501321110_ID] | |||
| mla r3, r10, r3, r0 @ r3 = tmp2 + z2 | |||
| add r7, r7, r4 @ r7 = tmp0 | |||
| mla r1, r9, r1, r2 @ r1 = tmp3 + z1 | |||
| add r5, r5, r6 @ r5 = tmp1 | |||
| add r3, r3, r4 @ r3 = tmp2 | |||
| add r1, r1, r6 @ r1 = tmp3 | |||
| ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 | |||
| @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 | |||
| @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) | |||
| add r8, r0, r1 | |||
| add r8, r8, #(1<<17) | |||
| mov r8, r8, asr #18 | |||
| strh r8, [lr, #( 0*8)] | |||
| @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) | |||
| sub r8, r0, r1 | |||
| add r8, r8, #(1<<17) | |||
| mov r8, r8, asr #18 | |||
| strh r8, [lr, #(14*8)] | |||
| @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) | |||
| add r8, r4, r3 | |||
| add r8, r8, #(1<<17) | |||
| mov r8, r8, asr #18 | |||
| strh r8, [lr, #( 2*8)] | |||
| @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) | |||
| sub r8, r4, r3 | |||
| add r8, r8, #(1<<17) | |||
| mov r8, r8, asr #18 | |||
| strh r8, [lr, #(12*8)] | |||
| @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) | |||
| add r8, r6, r5 | |||
| add r8, r8, #(1<<17) | |||
| mov r8, r8, asr #18 | |||
| strh r8, [lr, #( 4*8)] | |||
| @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) | |||
| sub r8, r6, r5 | |||
| add r8, r8, #(1<<17) | |||
| mov r8, r8, asr #18 | |||
| strh r8, [lr, #(10*8)] | |||
| @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) | |||
| add r8, r2, r7 | |||
| add r8, r8, #(1<<17) | |||
| mov r8, r8, asr #18 | |||
| strh r8, [lr, #( 6*8)] | |||
| @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) | |||
| sub r8, r2, r7 | |||
| add r8, r8, #(1<<17) | |||
| mov r8, r8, asr #18 | |||
| strh r8, [lr, #( 8*8)] | |||
| @ End of row loop | |||
| add lr, lr, #2 | |||
| subs r12, r12, #1 | |||
| bne column_loop | |||
| beq the_end | |||
| ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' | |||
| ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' | |||
| ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' | |||
| ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' | |||
| ldr r3, [r11, #FIX_0_541196100_ID] | |||
| add r1, r2, r6 | |||
| ldr r5, [r11, #FIX_M_1_847759065_ID] | |||
| mul r1, r3, r1 @ r1 = z1 | |||
| ldr r3, [r11, #FIX_0_765366865_ID] | |||
| mla r6, r5, r6, r1 @ r6 = tmp2 | |||
| add r5, r0, r4 @ r5 = tmp0 | |||
| mla r2, r3, r2, r1 @ r2 = tmp3 | |||
| sub r3, r0, r4 @ r3 = tmp1 | |||
| add r0, r2, r5, lsl #13 @ r0 = tmp10 | |||
| rsb r2, r2, r5, lsl #13 @ r2 = tmp13 | |||
| add r4, r6, r3, lsl #13 @ r4 = tmp11 | |||
| rsb r6, r6, r3, lsl #13 @ r6 = tmp12 | |||
| ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' | |||
| ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' | |||
| ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' | |||
| ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' | |||
| @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) | |||
| orr r9, r1, r3 | |||
| orr r10, r5, r7 | |||
| orrs r10, r9, r10 | |||
| beq empty_odd_column | |||
| stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 | |||
| add r0, r3, r5 @ r0 = 'z2' | |||
| add r2, r1, r7 @ r2 = 'z1' | |||
| add r4, r3, r7 @ r4 = 'z3' | |||
| add r6, r1, r5 @ r6 = 'z4' | |||
| ldr r9, [r11, #FIX_1_175875602_ID] | |||
| add r8, r4, r6 | |||
| ldr r10, [r11, #FIX_M_0_899976223_ID] | |||
| mul r8, r9, r8 @ r8 = 'z5' | |||
| ldr r9, [r11, #FIX_M_2_562915447_ID] | |||
| mul r2, r10, r2 @ r2 = 'z1' | |||
| ldr r10, [r11, #FIX_M_1_961570560_ID] | |||
| mul r0, r9, r0 @ r0 = 'z2' | |||
| ldr r9, [r11, #FIX_M_0_390180644_ID] | |||
| mla r4, r10, r4, r8 @ r4 = 'z3' | |||
| ldr r10, [r11, #FIX_0_298631336_ID] | |||
| mla r6, r9, r6, r8 @ r6 = 'z4' | |||
| ldr r9, [r11, #FIX_2_053119869_ID] | |||
| mla r7, r10, r7, r2 @ r7 = tmp0 + z1 | |||
| ldr r10, [r11, #FIX_3_072711026_ID] | |||
| mla r5, r9, r5, r0 @ r5 = tmp1 + z2 | |||
| ldr r9, [r11, #FIX_1_501321110_ID] | |||
| mla r3, r10, r3, r0 @ r3 = tmp2 + z2 | |||
| add r7, r7, r4 @ r7 = tmp0 | |||
| mla r1, r9, r1, r2 @ r1 = tmp3 + z1 | |||
| add r5, r5, r6 @ r5 = tmp1 | |||
| add r3, r3, r4 @ r3 = tmp2 | |||
| add r1, r1, r6 @ r1 = tmp3 | |||
| ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 | |||
| @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 | |||
| @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) | |||
| add r8, r0, r1 | |||
| add r8, r8, #(1<<17) | |||
| mov r8, r8, asr #18 | |||
| strh r8, [lr, #( 0*8)] | |||
| @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) | |||
| sub r8, r0, r1 | |||
| add r8, r8, #(1<<17) | |||
| mov r8, r8, asr #18 | |||
| strh r8, [lr, #(14*8)] | |||
| @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) | |||
| add r8, r4, r3 | |||
| add r8, r8, #(1<<17) | |||
| mov r8, r8, asr #18 | |||
| strh r8, [lr, #( 2*8)] | |||
| @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) | |||
| sub r8, r4, r3 | |||
| add r8, r8, #(1<<17) | |||
| mov r8, r8, asr #18 | |||
| strh r8, [lr, #(12*8)] | |||
| @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) | |||
| add r8, r6, r5 | |||
| add r8, r8, #(1<<17) | |||
| mov r8, r8, asr #18 | |||
| strh r8, [lr, #( 4*8)] | |||
| @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) | |||
| sub r8, r6, r5 | |||
| add r8, r8, #(1<<17) | |||
| mov r8, r8, asr #18 | |||
| strh r8, [lr, #(10*8)] | |||
| @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) | |||
| add r8, r2, r7 | |||
| add r8, r8, #(1<<17) | |||
| mov r8, r8, asr #18 | |||
| strh r8, [lr, #( 6*8)] | |||
| @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) | |||
| sub r8, r2, r7 | |||
| add r8, r8, #(1<<17) | |||
| mov r8, r8, asr #18 | |||
| strh r8, [lr, #( 8*8)] | |||
| @ End of row loop | |||
| add lr, lr, #2 | |||
| subs r12, r12, #1 | |||
| bne column_loop | |||
| beq the_end | |||
| empty_odd_column: | |||
| @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) | |||
| @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) | |||
| add r0, r0, #(1<<17) | |||
| mov r0, r0, asr #18 | |||
| strh r0, [lr, #( 0*8)] | |||
| strh r0, [lr, #(14*8)] | |||
| @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) | |||
| @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) | |||
| add r4, r4, #(1<<17) | |||
| mov r4, r4, asr #18 | |||
| strh r4, [lr, #( 2*8)] | |||
| strh r4, [lr, #(12*8)] | |||
| @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) | |||
| @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) | |||
| add r6, r6, #(1<<17) | |||
| mov r6, r6, asr #18 | |||
| strh r6, [lr, #( 4*8)] | |||
| strh r6, [lr, #(10*8)] | |||
| @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) | |||
| @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) | |||
| add r2, r2, #(1<<17) | |||
| mov r2, r2, asr #18 | |||
| strh r2, [lr, #( 6*8)] | |||
| strh r2, [lr, #( 8*8)] | |||
| @ End of row loop | |||
| add lr, lr, #2 | |||
| subs r12, r12, #1 | |||
| bne column_loop | |||
| @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) | |||
| @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) | |||
| add r0, r0, #(1<<17) | |||
| mov r0, r0, asr #18 | |||
| strh r0, [lr, #( 0*8)] | |||
| strh r0, [lr, #(14*8)] | |||
| @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) | |||
| @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) | |||
| add r4, r4, #(1<<17) | |||
| mov r4, r4, asr #18 | |||
| strh r4, [lr, #( 2*8)] | |||
| strh r4, [lr, #(12*8)] | |||
| @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) | |||
| @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) | |||
| add r6, r6, #(1<<17) | |||
| mov r6, r6, asr #18 | |||
| strh r6, [lr, #( 4*8)] | |||
| strh r6, [lr, #(10*8)] | |||
| @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) | |||
| @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) | |||
| add r2, r2, #(1<<17) | |||
| mov r2, r2, asr #18 | |||
| strh r2, [lr, #( 6*8)] | |||
| strh r2, [lr, #( 8*8)] | |||
| @ End of row loop | |||
| add lr, lr, #2 | |||
| subs r12, r12, #1 | |||
| bne column_loop | |||
| the_end: | |||
| @ The end.... | |||
| add sp, sp, #4 | |||
| ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return | |||
| @ The end.... | |||
| add sp, sp, #4 | |||
| ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return | |||
| const_array: | |||
| .align | |||
| .word FIX_0_298631336 | |||
| .word FIX_0_541196100 | |||
| .word FIX_0_765366865 | |||
| .word FIX_1_175875602 | |||
| .word FIX_1_501321110 | |||
| .word FIX_2_053119869 | |||
| .word FIX_3_072711026 | |||
| .word FIX_M_0_390180644 | |||
| .word FIX_M_0_899976223 | |||
| .word FIX_M_1_847759065 | |||
| .word FIX_M_1_961570560 | |||
| .word FIX_M_2_562915447 | |||
| .word FIX_0xFFFF | |||
| .align | |||
| .word FIX_0_298631336 | |||
| .word FIX_0_541196100 | |||
| .word FIX_0_765366865 | |||
| .word FIX_1_175875602 | |||
| .word FIX_1_501321110 | |||
| .word FIX_2_053119869 | |||
| .word FIX_3_072711026 | |||
| .word FIX_M_0_390180644 | |||
| .word FIX_M_0_899976223 | |||
| .word FIX_M_1_847759065 | |||
| .word FIX_M_1_961570560 | |||
| .word FIX_M_2_562915447 | |||
| .word FIX_0xFFFF | |||
| @@ -51,9 +51,9 @@ | |||
| #define COL_SHIFTED_1 524288 /* 1<< (COL_SHIFT-1) */ | |||
| .text | |||
| .align | |||
| .global simple_idct_ARM | |||
| .text | |||
| .align | |||
| .global simple_idct_ARM | |||
| simple_idct_ARM: | |||
| @@ void simple_idct_ARM(int16_t *block) | |||
| @@ -120,8 +120,8 @@ __b_evaluation: | |||
| ldr r11, [r12, #offW7] @ R11=W7 | |||
| mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||
| mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||
| teq r2, #0 @ if null avoid muls | |||
| mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||
| teq r2, #0 @ if null avoid muls | |||
| mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||
| rsbne r2, r2, #0 @ R2=-ROWr16[3] | |||
| mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||
| mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||
| @@ -147,7 +147,7 @@ __b_evaluation: | |||
| @@ MAC16(b3, -W1, row[7]); | |||
| @@ MAC16(b1, -W5, row[7]); | |||
| mov r3, r3, asr #16 @ R3=ROWr16[5] | |||
| teq r3, #0 @ if null avoid muls | |||
| teq r3, #0 @ if null avoid muls | |||
| mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5]=b0 | |||
| mov r4, r4, asr #16 @ R4=ROWr16[7] | |||
| mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5]=b2 | |||
| @@ -155,7 +155,7 @@ __b_evaluation: | |||
| rsbne r3, r3, #0 @ R3=-ROWr16[5] | |||
| mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5]=b1 | |||
| @@ R3 is free now | |||
| teq r4, #0 @ if null avoid muls | |||
| teq r4, #0 @ if null avoid muls | |||
| mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7]=b0 | |||
| mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7]=b2 | |||
| rsbne r4, r4, #0 @ R4=-ROWr16[7] | |||
| @@ -187,7 +187,7 @@ __a_evaluation: | |||
| teq r2, #0 | |||
| beq __end_bef_a_evaluation | |||
| add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1) | |||
| add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1) | |||
| mul r11, r8, r4 @ R11=W2*ROWr16[2] | |||
| sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3) | |||
| add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0) | |||
| @@ -203,7 +203,7 @@ __a_evaluation: | |||
| @@ a2 -= W4*row[4] | |||
| @@ a3 += W4*row[4] | |||
| ldrsh r11, [r14, #8] @ R11=ROWr16[4] | |||
| teq r11, #0 @ if null avoid muls | |||
| teq r11, #0 @ if null avoid muls | |||
| mulne r11, r9, r11 @ R11=W4*ROWr16[4] | |||
| @@ R9 is free now | |||
| ldrsh r9, [r14, #12] @ R9=ROWr16[6] | |||
| @@ -212,7 +212,7 @@ __a_evaluation: | |||
| subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2) | |||
| addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3) | |||
| @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead | |||
| teq r9, #0 @ if null avoid muls | |||
| teq r9, #0 @ if null avoid muls | |||
| mulne r11, r10, r9 @ R11=W6*ROWr16[6] | |||
| addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0) | |||
| mulne r10, r8, r9 @ R10=W2*ROWr16[6] | |||
| @@ -294,165 +294,165 @@ __end_row_loop: | |||
| @@ at this point, R0=block, R1-R11 (free) | |||
| @@ R12=__const_ptr_, R14=&block[n] | |||
| add r14, r0, #14 @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block. | |||
| @@ at this point, R0=block, R1-R11 (free) | |||
| @@ R12=__const_ptr_, R14=&block[n] | |||
| add r14, r0, #14 @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block. | |||
| __col_loop: | |||
| __b_evaluation2: | |||
| @@ at this point, R0=block (temp), R1-R11 (free) | |||
| @@ R12=__const_ptr_, R14=&block[n] | |||
| @@ proceed with b0-b3 first, followed by a0-a3 | |||
| @@ MUL16(b0, W1, col[8x1]); | |||
| @@ MUL16(b1, W3, col[8x1]); | |||
| @@ MUL16(b2, W5, col[8x1]); | |||
| @@ MUL16(b3, W7, col[8x1]); | |||
| @@ MAC16(b0, W3, col[8x3]); | |||
| @@ MAC16(b1, -W7, col[8x3]); | |||
| @@ MAC16(b2, -W1, col[8x3]); | |||
| @@ MAC16(b3, -W5, col[8x3]); | |||
| ldr r8, [r12, #offW1] @ R8=W1 | |||
| ldrsh r7, [r14, #16] | |||
| mul r0, r8, r7 @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||
| ldr r9, [r12, #offW3] @ R9=W3 | |||
| ldr r10, [r12, #offW5] @ R10=W5 | |||
| mul r1, r9, r7 @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||
| ldr r11, [r12, #offW7] @ R11=W7 | |||
| mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||
| ldrsh r2, [r14, #48] | |||
| mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||
| teq r2, #0 @ if 0, then avoid muls | |||
| mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||
| rsbne r2, r2, #0 @ R2=-ROWr16[3] | |||
| mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||
| mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||
| mlane r7, r10, r2, r7 @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||
| @@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free), | |||
| @@ R5=b2, R6 (free), R7=b3, R8=W1, R9=W3, R10=W5, R11=W7, | |||
| @@ R12=__const_ptr_, R14=&block[n] | |||
| @@ MAC16(b0, W5, col[5x8]); | |||
| @@ MAC16(b2, W7, col[5x8]); | |||
| @@ MAC16(b3, W3, col[5x8]); | |||
| @@ MAC16(b1, -W1, col[5x8]); | |||
| @@ MAC16(b0, W7, col[7x8]); | |||
| @@ MAC16(b2, W3, col[7x8]); | |||
| @@ MAC16(b3, -W1, col[7x8]); | |||
| @@ MAC16(b1, -W5, col[7x8]); | |||
| ldrsh r3, [r14, #80] @ R3=COLr16[5x8] | |||
| teq r3, #0 @ if 0 then avoid muls | |||
| mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5x8]=b0 | |||
| mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5x8]=b2 | |||
| mlane r7, r9, r3, r7 @ R7+=W3*ROWr16[5x8]=b3 | |||
| rsbne r3, r3, #0 @ R3=-ROWr16[5x8] | |||
| ldrsh r4, [r14, #112] @ R4=COLr16[7x8] | |||
| mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5x8]=b1 | |||
| @@ R3 is free now | |||
| teq r4, #0 @ if 0 then avoid muls | |||
| mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7x8]=b0 | |||
| mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7x8]=b2 | |||
| rsbne r4, r4, #0 @ R4=-ROWr16[7x8] | |||
| mlane r7, r8, r4, r7 @ R7-=W1*ROWr16[7x8]=b3 | |||
| mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7x8]=b1 | |||
| @@ R4 is free now | |||
| @@ at this point, R0=block (temp), R1-R11 (free) | |||
| @@ R12=__const_ptr_, R14=&block[n] | |||
| @@ proceed with b0-b3 first, followed by a0-a3 | |||
| @@ MUL16(b0, W1, col[8x1]); | |||
| @@ MUL16(b1, W3, col[8x1]); | |||
| @@ MUL16(b2, W5, col[8x1]); | |||
| @@ MUL16(b3, W7, col[8x1]); | |||
| @@ MAC16(b0, W3, col[8x3]); | |||
| @@ MAC16(b1, -W7, col[8x3]); | |||
| @@ MAC16(b2, -W1, col[8x3]); | |||
| @@ MAC16(b3, -W5, col[8x3]); | |||
| ldr r8, [r12, #offW1] @ R8=W1 | |||
| ldrsh r7, [r14, #16] | |||
| mul r0, r8, r7 @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||
| ldr r9, [r12, #offW3] @ R9=W3 | |||
| ldr r10, [r12, #offW5] @ R10=W5 | |||
| mul r1, r9, r7 @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||
| ldr r11, [r12, #offW7] @ R11=W7 | |||
| mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||
| ldrsh r2, [r14, #48] | |||
| mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) | |||
| teq r2, #0 @ if 0, then avoid muls | |||
| mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||
| rsbne r2, r2, #0 @ R2=-ROWr16[3] | |||
| mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||
| mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||
| mlane r7, r10, r2, r7 @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) | |||
| @@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free), | |||
| @@ R5=b2, R6 (free), R7=b3, R8=W1, R9=W3, R10=W5, R11=W7, | |||
| @@ R12=__const_ptr_, R14=&block[n] | |||
| @@ MAC16(b0, W5, col[5x8]); | |||
| @@ MAC16(b2, W7, col[5x8]); | |||
| @@ MAC16(b3, W3, col[5x8]); | |||
| @@ MAC16(b1, -W1, col[5x8]); | |||
| @@ MAC16(b0, W7, col[7x8]); | |||
| @@ MAC16(b2, W3, col[7x8]); | |||
| @@ MAC16(b3, -W1, col[7x8]); | |||
| @@ MAC16(b1, -W5, col[7x8]); | |||
| ldrsh r3, [r14, #80] @ R3=COLr16[5x8] | |||
| teq r3, #0 @ if 0 then avoid muls | |||
| mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5x8]=b0 | |||
| mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5x8]=b2 | |||
| mlane r7, r9, r3, r7 @ R7+=W3*ROWr16[5x8]=b3 | |||
| rsbne r3, r3, #0 @ R3=-ROWr16[5x8] | |||
| ldrsh r4, [r14, #112] @ R4=COLr16[7x8] | |||
| mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5x8]=b1 | |||
| @@ R3 is free now | |||
| teq r4, #0 @ if 0 then avoid muls | |||
| mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7x8]=b0 | |||
| mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7x8]=b2 | |||
| rsbne r4, r4, #0 @ R4=-ROWr16[7x8] | |||
| mlane r7, r8, r4, r7 @ R7-=W1*ROWr16[7x8]=b3 | |||
| mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7x8]=b1 | |||
| @@ R4 is free now | |||
| __end_b_evaluation2: | |||
| @@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free), | |||
| @@ R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free), | |||
| @@ R12=__const_ptr_, R14=&block[n] | |||
| @@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free), | |||
| @@ R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free), | |||
| @@ R12=__const_ptr_, R14=&block[n] | |||
| __a_evaluation2: | |||
| @@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1)); | |||
| @@ a1 = a0 + W6 * row[2]; | |||
| @@ a2 = a0 - W6 * row[2]; | |||
| @@ a3 = a0 - W2 * row[2]; | |||
| @@ a0 = a0 + W2 * row[2]; | |||
| ldrsh r6, [r14, #0] | |||
| ldr r9, [r12, #offW4] @ R9=W4 | |||
| mul r6, r9, r6 @ R6=W4*ROWr16[0] | |||
| ldr r10, [r12, #offW6] @ R10=W6 | |||
| ldrsh r4, [r14, #32] @ R4=ROWr16[2] (a3 not defined yet) | |||
| add r6, r6, #COL_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(COL_SHIFT-1) (a0) | |||
| mul r11, r10, r4 @ R11=W6*ROWr16[2] | |||
| ldr r8, [r12, #offW2] @ R8=W2 | |||
| add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1) | |||
| sub r3, r6, r11 @ R3=a0-W6*ROWr16[2] (a2) | |||
| mul r11, r8, r4 @ R11=W2*ROWr16[2] | |||
| sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3) | |||
| add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0) | |||
| @@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3, | |||
| @@ R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free), | |||
| @@ R12=__const_ptr_, R14=&block[n] | |||
| @@ a0 += W4*row[4] | |||
| @@ a1 -= W4*row[4] | |||
| @@ a2 -= W4*row[4] | |||
| @@ a3 += W4*row[4] | |||
| ldrsh r11, [r14, #64] @ R11=ROWr16[4] | |||
| teq r11, #0 @ if null avoid muls | |||
| mulne r11, r9, r11 @ R11=W4*ROWr16[4] | |||
| @@ R9 is free now | |||
| addne r6, r6, r11 @ R6+=W4*ROWr16[4] (a0) | |||
| subne r2, r2, r11 @ R2-=W4*ROWr16[4] (a1) | |||
| subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2) | |||
| ldrsh r9, [r14, #96] @ R9=ROWr16[6] | |||
| addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3) | |||
| @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead | |||
| teq r9, #0 @ if null avoid muls | |||
| mulne r11, r10, r9 @ R11=W6*ROWr16[6] | |||
| addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0) | |||
| mulne r10, r8, r9 @ R10=W2*ROWr16[6] | |||
| @@ a0 += W6*row[6]; | |||
| @@ a3 -= W6*row[6]; | |||
| @@ a1 -= W2*row[6]; | |||
| @@ a2 += W2*row[6]; | |||
| subne r4, r4, r11 @ R4-=W6*ROWr16[6] (a3) | |||
| subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1) | |||
| addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2) | |||
| @@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1)); | |||
| @@ a1 = a0 + W6 * row[2]; | |||
| @@ a2 = a0 - W6 * row[2]; | |||
| @@ a3 = a0 - W2 * row[2]; | |||
| @@ a0 = a0 + W2 * row[2]; | |||
| ldrsh r6, [r14, #0] | |||
| ldr r9, [r12, #offW4] @ R9=W4 | |||
| mul r6, r9, r6 @ R6=W4*ROWr16[0] | |||
| ldr r10, [r12, #offW6] @ R10=W6 | |||
| ldrsh r4, [r14, #32] @ R4=ROWr16[2] (a3 not defined yet) | |||
| add r6, r6, #COL_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(COL_SHIFT-1) (a0) | |||
| mul r11, r10, r4 @ R11=W6*ROWr16[2] | |||
| ldr r8, [r12, #offW2] @ R8=W2 | |||
| add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1) | |||
| sub r3, r6, r11 @ R3=a0-W6*ROWr16[2] (a2) | |||
| mul r11, r8, r4 @ R11=W2*ROWr16[2] | |||
| sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3) | |||
| add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0) | |||
| @@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3, | |||
| @@ R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free), | |||
| @@ R12=__const_ptr_, R14=&block[n] | |||
| @@ a0 += W4*row[4] | |||
| @@ a1 -= W4*row[4] | |||
| @@ a2 -= W4*row[4] | |||
| @@ a3 += W4*row[4] | |||
| ldrsh r11, [r14, #64] @ R11=ROWr16[4] | |||
| teq r11, #0 @ if null avoid muls | |||
| mulne r11, r9, r11 @ R11=W4*ROWr16[4] | |||
| @@ R9 is free now | |||
| addne r6, r6, r11 @ R6+=W4*ROWr16[4] (a0) | |||
| subne r2, r2, r11 @ R2-=W4*ROWr16[4] (a1) | |||
| subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2) | |||
| ldrsh r9, [r14, #96] @ R9=ROWr16[6] | |||
| addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3) | |||
| @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead | |||
| teq r9, #0 @ if null avoid muls | |||
| mulne r11, r10, r9 @ R11=W6*ROWr16[6] | |||
| addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0) | |||
| mulne r10, r8, r9 @ R10=W2*ROWr16[6] | |||
| @@ a0 += W6*row[6]; | |||
| @@ a3 -= W6*row[6]; | |||
| @@ a1 -= W2*row[6]; | |||
| @@ a2 += W2*row[6]; | |||
| subne r4, r4, r11 @ R4-=W6*ROWr16[6] (a3) | |||
| subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1) | |||
| addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2) | |||
| __end_a_evaluation2: | |||
| @@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3, | |||
| @@ R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free), | |||
| @@ R12=__const_ptr_, R14=&block[n] | |||
| @@ col[0 ] = ((a0 + b0) >> COL_SHIFT); | |||
| @@ col[8 ] = ((a1 + b1) >> COL_SHIFT); | |||
| @@ col[16] = ((a2 + b2) >> COL_SHIFT); | |||
| @@ col[24] = ((a3 + b3) >> COL_SHIFT); | |||
| @@ col[32] = ((a3 - b3) >> COL_SHIFT); | |||
| @@ col[40] = ((a2 - b2) >> COL_SHIFT); | |||
| @@ col[48] = ((a1 - b1) >> COL_SHIFT); | |||
| @@ col[56] = ((a0 - b0) >> COL_SHIFT); | |||
| @@@@@ no optimisation here @@@@@ | |||
| add r8, r6, r0 @ R8=a0+b0 | |||
| add r9, r2, r1 @ R9=a1+b1 | |||
| mov r8, r8, asr #COL_SHIFT | |||
| mov r9, r9, asr #COL_SHIFT | |||
| strh r8, [r14, #0] | |||
| strh r9, [r14, #16] | |||
| add r8, r3, r5 @ R8=a2+b2 | |||
| add r9, r4, r7 @ R9=a3+b3 | |||
| mov r8, r8, asr #COL_SHIFT | |||
| mov r9, r9, asr #COL_SHIFT | |||
| strh r8, [r14, #32] | |||
| strh r9, [r14, #48] | |||
| sub r8, r4, r7 @ R8=a3-b3 | |||
| sub r9, r3, r5 @ R9=a2-b2 | |||
| mov r8, r8, asr #COL_SHIFT | |||
| mov r9, r9, asr #COL_SHIFT | |||
| strh r8, [r14, #64] | |||
| strh r9, [r14, #80] | |||
| sub r8, r2, r1 @ R8=a1-b1 | |||
| sub r9, r6, r0 @ R9=a0-b0 | |||
| mov r8, r8, asr #COL_SHIFT | |||
| mov r9, r9, asr #COL_SHIFT | |||
| strh r8, [r14, #96] | |||
| strh r9, [r14, #112] | |||
| @@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3, | |||
| @@ R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free), | |||
| @@ R12=__const_ptr_, R14=&block[n] | |||
| @@ col[0 ] = ((a0 + b0) >> COL_SHIFT); | |||
| @@ col[8 ] = ((a1 + b1) >> COL_SHIFT); | |||
| @@ col[16] = ((a2 + b2) >> COL_SHIFT); | |||
| @@ col[24] = ((a3 + b3) >> COL_SHIFT); | |||
| @@ col[32] = ((a3 - b3) >> COL_SHIFT); | |||
| @@ col[40] = ((a2 - b2) >> COL_SHIFT); | |||
| @@ col[48] = ((a1 - b1) >> COL_SHIFT); | |||
| @@ col[56] = ((a0 - b0) >> COL_SHIFT); | |||
| @@@@@ no optimisation here @@@@@ | |||
| add r8, r6, r0 @ R8=a0+b0 | |||
| add r9, r2, r1 @ R9=a1+b1 | |||
| mov r8, r8, asr #COL_SHIFT | |||
| mov r9, r9, asr #COL_SHIFT | |||
| strh r8, [r14, #0] | |||
| strh r9, [r14, #16] | |||
| add r8, r3, r5 @ R8=a2+b2 | |||
| add r9, r4, r7 @ R9=a3+b3 | |||
| mov r8, r8, asr #COL_SHIFT | |||
| mov r9, r9, asr #COL_SHIFT | |||
| strh r8, [r14, #32] | |||
| strh r9, [r14, #48] | |||
| sub r8, r4, r7 @ R8=a3-b3 | |||
| sub r9, r3, r5 @ R9=a2-b2 | |||
| mov r8, r8, asr #COL_SHIFT | |||
| mov r9, r9, asr #COL_SHIFT | |||
| strh r8, [r14, #64] | |||
| strh r9, [r14, #80] | |||
| sub r8, r2, r1 @ R8=a1-b1 | |||
| sub r9, r6, r0 @ R9=a0-b0 | |||
| mov r8, r8, asr #COL_SHIFT | |||
| mov r9, r9, asr #COL_SHIFT | |||
| strh r8, [r14, #96] | |||
| strh r9, [r14, #112] | |||
| __end_col_loop: | |||
| @@ at this point, R0-R11 (free) | |||
| @@ R12=__const_ptr_, R14=&block[n] | |||
| ldr r0, [sp, #0] @ R0=block | |||
| teq r0, r14 @ compare current &block[n] to block, when block is reached, the loop is finished. | |||
| sub r14, r14, #2 | |||
| bne __col_loop | |||
| @@ at this point, R0-R11 (free) | |||
| @@ R12=__const_ptr_, R14=&block[n] | |||
| ldr r0, [sp, #0] @ R0=block | |||
| teq r0, r14 @ compare current &block[n] to block, when block is reached, the loop is finished. | |||
| sub r14, r14, #2 | |||
| bne __col_loop | |||
| @@ -466,15 +466,15 @@ __end_simple_idct_ARM: | |||
| @@ kind of sub-function, here not to overload the common case. | |||
| __end_bef_a_evaluation: | |||
| add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1) | |||
| add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1) | |||
| mul r11, r8, r4 @ R11=W2*ROWr16[2] | |||
| sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3) | |||
| add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0) | |||
| bal __end_a_evaluation | |||
| bal __end_a_evaluation | |||
| __constant_ptr__: @@ see #defines at the beginning of the source code for values. | |||
| .align | |||
| .align | |||
| .word W1 | |||
| .word W2 | |||
| .word W3 | |||
| @@ -15,21 +15,21 @@ extern "C" { | |||
| #include <sys/types.h> /* size_t */ | |||
| //FIXME the following 2 really dont belong in here | |||
| #define FFMPEG_VERSION_INT 0x000409 | |||
| #define FFMPEG_VERSION "CVS" | |||
| #define FFMPEG_VERSION_INT 0x000409 | |||
| #define FFMPEG_VERSION "CVS" | |||
| #define AV_STRINGIFY(s) AV_TOSTRING(s) | |||
| #define AV_STRINGIFY(s) AV_TOSTRING(s) | |||
| #define AV_TOSTRING(s) #s | |||
| #define LIBAVCODEC_VERSION_INT ((51<<16)+(0<<8)+0) | |||
| #define LIBAVCODEC_VERSION 51.0.0 | |||
| #define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT | |||
| #define LIBAVCODEC_VERSION_INT ((51<<16)+(0<<8)+0) | |||
| #define LIBAVCODEC_VERSION 51.0.0 | |||
| #define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT | |||
| #define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION) | |||
| #define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION) | |||
| #define AV_NOPTS_VALUE int64_t_C(0x8000000000000000) | |||
| #define AV_TIME_BASE 1000000 | |||
| #define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE} | |||
| #define AV_NOPTS_VALUE int64_t_C(0x8000000000000000) | |||
| #define AV_TIME_BASE 1000000 | |||
| #define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE} | |||
| enum CodecID { | |||
| CODEC_ID_NONE, | |||
| @@ -362,9 +362,9 @@ extern int motion_estimation_method; | |||
| #define CODEC_FLAG2_LOCAL_HEADER 0x00000008 ///< place global headers at every keyframe instead of in extradata | |||
| /* Unsupported options : | |||
| * Syntax Arithmetic coding (SAC) | |||
| * Reference Picture Selection | |||
| * Independant Segment Decoding */ | |||
| * Syntax Arithmetic coding (SAC) | |||
| * Reference Picture Selection | |||
| * Independant Segment Decoding */ | |||
| /* /Fx */ | |||
| /* codec capabilities */ | |||
| @@ -646,9 +646,9 @@ typedef struct AVPanScan{ | |||
| */\ | |||
| int8_t *ref_index[2]; | |||
| #define FF_QSCALE_TYPE_MPEG1 0 | |||
| #define FF_QSCALE_TYPE_MPEG2 1 | |||
| #define FF_QSCALE_TYPE_H264 2 | |||
| #define FF_QSCALE_TYPE_MPEG1 0 | |||
| #define FF_QSCALE_TYPE_MPEG2 1 | |||
| #define FF_QSCALE_TYPE_H264 2 | |||
| #define FF_BUFFER_TYPE_INTERNAL 1 | |||
| #define FF_BUFFER_TYPE_USER 2 ///< Direct rendering buffers (image is (de)allocated by user) | |||
| @@ -684,9 +684,9 @@ typedef struct AVCLASS AVClass; | |||
| struct AVCLASS { | |||
| const char* class_name; | |||
| const char* (*item_name)(void*); /* actually passing a pointer to an AVCodecContext | |||
| or AVFormatContext, which begin with an AVClass. | |||
| Needed because av_log is in libavcodec and has no visibility | |||
| of AVIn/OutputFormat */ | |||
| or AVFormatContext, which begin with an AVClass. | |||
| Needed because av_log is in libavcodec and has no visibility | |||
| of AVIn/OutputFormat */ | |||
| struct AVOption *option; | |||
| }; | |||
| @@ -1252,18 +1252,18 @@ typedef struct AVCodecContext { | |||
| * result into program crash) | |||
| */ | |||
| unsigned dsp_mask; | |||
| #define FF_MM_FORCE 0x80000000 /* force usage of selected flags (OR) */ | |||
| #define FF_MM_FORCE 0x80000000 /* force usage of selected flags (OR) */ | |||
| /* lower 16 bits - CPU features */ | |||
| #ifdef HAVE_MMX | |||
| #define FF_MM_MMX 0x0001 /* standard MMX */ | |||
| #define FF_MM_3DNOW 0x0004 /* AMD 3DNOW */ | |||
| #define FF_MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ | |||
| #define FF_MM_SSE 0x0008 /* SSE functions */ | |||
| #define FF_MM_SSE2 0x0010 /* PIV SSE2 functions */ | |||
| #define FF_MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */ | |||
| #define FF_MM_MMX 0x0001 /* standard MMX */ | |||
| #define FF_MM_3DNOW 0x0004 /* AMD 3DNOW */ | |||
| #define FF_MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ | |||
| #define FF_MM_SSE 0x0008 /* SSE functions */ | |||
| #define FF_MM_SSE2 0x0010 /* PIV SSE2 functions */ | |||
| #define FF_MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */ | |||
| #endif /* HAVE_MMX */ | |||
| #ifdef HAVE_IWMMXT | |||
| #define FF_MM_IWMMXT 0x0100 /* XScale IWMMXT */ | |||
| #define FF_MM_IWMMXT 0x0100 /* XScale IWMMXT */ | |||
| #endif /* HAVE_IWMMXT */ | |||
| /** | |||
| @@ -2223,7 +2223,7 @@ int avcodec_find_best_pix_fmt(int pix_fmt_mask, int src_pix_fmt, | |||
| #define FF_ALPHA_TRANSP 0x0001 /* image has some totally transparent pixels */ | |||
| #define FF_ALPHA_SEMI_TRANSP 0x0002 /* image has some transparent pixels */ | |||
| int img_get_alpha_info(const AVPicture *src, | |||
| int pix_fmt, int width, int height); | |||
| int pix_fmt, int width, int height); | |||
| /* convert among pixel formats */ | |||
| int img_convert(AVPicture *dst, int dst_pix_fmt, | |||
| @@ -35,20 +35,20 @@ typedef struct ThreadContext{ | |||
| // it's odd Be never patented that :D | |||
| struct benaphore { | |||
| vint32 atom; | |||
| sem_id sem; | |||
| vint32 atom; | |||
| sem_id sem; | |||
| }; | |||
| static inline int lock_ben(struct benaphore *ben) | |||
| { | |||
| if (atomic_add(&ben->atom, 1) > 0) | |||
| return acquire_sem(ben->sem); | |||
| return B_OK; | |||
| if (atomic_add(&ben->atom, 1) > 0) | |||
| return acquire_sem(ben->sem); | |||
| return B_OK; | |||
| } | |||
| static inline int unlock_ben(struct benaphore *ben) | |||
| { | |||
| if (atomic_add(&ben->atom, -1) > 1) | |||
| return release_sem(ben->sem); | |||
| return B_OK; | |||
| if (atomic_add(&ben->atom, -1) > 1) | |||
| return release_sem(ben->sem); | |||
| return B_OK; | |||
| } | |||
| static struct benaphore av_thread_lib_ben; | |||
| @@ -155,25 +155,25 @@ fail: | |||
| int avcodec_thread_lock_lib(void) | |||
| { | |||
| return lock_ben(&av_thread_lib_ben); | |||
| return lock_ben(&av_thread_lib_ben); | |||
| } | |||
| int avcodec_thread_unlock_lib(void) | |||
| { | |||
| return unlock_ben(&av_thread_lib_ben); | |||
| return unlock_ben(&av_thread_lib_ben); | |||
| } | |||
| /* our versions of _init and _fini (which are called by those actually from crt.o) */ | |||
| void initialize_after(void) | |||
| { | |||
| av_thread_lib_ben.atom = 0; | |||
| av_thread_lib_ben.sem = create_sem(0, "libavcodec benaphore"); | |||
| av_thread_lib_ben.atom = 0; | |||
| av_thread_lib_ben.sem = create_sem(0, "libavcodec benaphore"); | |||
| } | |||
| void uninitialize_before(void) | |||
| { | |||
| delete_sem(av_thread_lib_ben.sem); | |||
| delete_sem(av_thread_lib_ben.sem); | |||
| } | |||
| @@ -83,7 +83,7 @@ int check_marker(GetBitContext *s, const char *msg) | |||
| { | |||
| int bit= get_bits1(s); | |||
| if(!bit) | |||
| av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg); | |||
| av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg); | |||
| return bit; | |||
| } | |||
| @@ -146,7 +146,7 @@ typedef struct RL_VLC_ELEM { | |||
| # ifdef __GNUC__ | |||
| static inline uint32_t unaligned32(const void *v) { | |||
| struct Unaligned { | |||
| uint32_t i; | |||
| uint32_t i; | |||
| } __attribute__((packed)); | |||
| return ((const struct Unaligned *) v)->i; | |||
| @@ -183,7 +183,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) | |||
| bit_buf = (bit_buf<<n) | value; | |||
| bit_left-=n; | |||
| } else { | |||
| bit_buf<<=bit_left; | |||
| bit_buf<<=bit_left; | |||
| bit_buf |= value >> (n - bit_left); | |||
| #ifdef UNALIGNED_STORES_ARE_BAD | |||
| if (3 & (intptr_t) s->buf_ptr) { | |||
| @@ -196,7 +196,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) | |||
| *(uint32_t *)s->buf_ptr = be2me_32(bit_buf); | |||
| //printf("bitbuf = %08x\n", bit_buf); | |||
| s->buf_ptr+=4; | |||
| bit_left+=32 - n; | |||
| bit_left+=32 - n; | |||
| bit_buf = value; | |||
| } | |||
| @@ -212,21 +212,21 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) | |||
| # ifdef ALIGNED_BITSTREAM_WRITER | |||
| # if defined(ARCH_X86) || defined(ARCH_X86_64) | |||
| asm volatile( | |||
| "movl %0, %%ecx \n\t" | |||
| "xorl %%eax, %%eax \n\t" | |||
| "shrdl %%cl, %1, %%eax \n\t" | |||
| "shrl %%cl, %1 \n\t" | |||
| "movl %0, %%ecx \n\t" | |||
| "shrl $3, %%ecx \n\t" | |||
| "andl $0xFFFFFFFC, %%ecx \n\t" | |||
| "bswapl %1 \n\t" | |||
| "orl %1, (%2, %%ecx) \n\t" | |||
| "bswapl %%eax \n\t" | |||
| "addl %3, %0 \n\t" | |||
| "movl %%eax, 4(%2, %%ecx) \n\t" | |||
| : "=&r" (s->index), "=&r" (value) | |||
| : "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n)) | |||
| : "%eax", "%ecx" | |||
| "movl %0, %%ecx \n\t" | |||
| "xorl %%eax, %%eax \n\t" | |||
| "shrdl %%cl, %1, %%eax \n\t" | |||
| "shrl %%cl, %1 \n\t" | |||
| "movl %0, %%ecx \n\t" | |||
| "shrl $3, %%ecx \n\t" | |||
| "andl $0xFFFFFFFC, %%ecx \n\t" | |||
| "bswapl %1 \n\t" | |||
| "orl %1, (%2, %%ecx) \n\t" | |||
| "bswapl %%eax \n\t" | |||
| "addl %3, %0 \n\t" | |||
| "movl %%eax, 4(%2, %%ecx) \n\t" | |||
| : "=&r" (s->index), "=&r" (value) | |||
| : "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n)) | |||
| : "%eax", "%ecx" | |||
| ); | |||
| # else | |||
| int index= s->index; | |||
| @@ -243,20 +243,20 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) | |||
| # else //ALIGNED_BITSTREAM_WRITER | |||
| # if defined(ARCH_X86) || defined(ARCH_X86_64) | |||
| asm volatile( | |||
| "movl $7, %%ecx \n\t" | |||
| "andl %0, %%ecx \n\t" | |||
| "addl %3, %%ecx \n\t" | |||
| "negl %%ecx \n\t" | |||
| "shll %%cl, %1 \n\t" | |||
| "bswapl %1 \n\t" | |||
| "movl %0, %%ecx \n\t" | |||
| "shrl $3, %%ecx \n\t" | |||
| "orl %1, (%%ecx, %2) \n\t" | |||
| "addl %3, %0 \n\t" | |||
| "movl $0, 4(%%ecx, %2) \n\t" | |||
| : "=&r" (s->index), "=&r" (value) | |||
| : "r" (s->buf), "r" (n), "0" (s->index), "1" (value) | |||
| : "%ecx" | |||
| "movl $7, %%ecx \n\t" | |||
| "andl %0, %%ecx \n\t" | |||
| "addl %3, %%ecx \n\t" | |||
| "negl %%ecx \n\t" | |||
| "shll %%cl, %1 \n\t" | |||
| "bswapl %1 \n\t" | |||
| "movl %0, %%ecx \n\t" | |||
| "shrl $3, %%ecx \n\t" | |||
| "orl %1, (%%ecx, %2) \n\t" | |||
| "addl %3, %0 \n\t" | |||
| "movl $0, 4(%%ecx, %2) \n\t" | |||
| : "=&r" (s->index), "=&r" (value) | |||
| : "r" (s->buf), "r" (n), "0" (s->index), "1" (value) | |||
| : "%ecx" | |||
| ); | |||
| # else | |||
| int index= s->index; | |||
| @@ -276,9 +276,9 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) | |||
| static inline uint8_t* pbBufPtr(PutBitContext *s) | |||
| { | |||
| #ifdef ALT_BITSTREAM_WRITER | |||
| return s->buf + (s->index>>3); | |||
| return s->buf + (s->index>>3); | |||
| #else | |||
| return s->buf_ptr; | |||
| return s->buf_ptr; | |||
| #endif | |||
| } | |||
| @@ -290,10 +290,10 @@ static inline void skip_put_bytes(PutBitContext *s, int n){ | |||
| assert((put_bits_count(s)&7)==0); | |||
| #ifdef ALT_BITSTREAM_WRITER | |||
| FIXME may need some cleaning of the buffer | |||
| s->index += n<<3; | |||
| s->index += n<<3; | |||
| #else | |||
| assert(s->bit_left==32); | |||
| s->buf_ptr += n; | |||
| s->buf_ptr += n; | |||
| #endif | |||
| } | |||
| @@ -366,10 +366,10 @@ for examples see get_bits, show_bits, skip_bits, get_vlc | |||
| static inline int unaligned32_be(const void *v) | |||
| { | |||
| #ifdef CONFIG_ALIGN | |||
| const uint8_t *p=v; | |||
| return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]); | |||
| const uint8_t *p=v; | |||
| return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]); | |||
| #else | |||
| return be2me_32( unaligned32(v)); //original | |||
| return be2me_32( unaligned32(v)); //original | |||
| #endif | |||
| } | |||
| @@ -528,8 +528,8 @@ static inline int get_bits_count(GetBitContext *s){ | |||
| #if defined(ARCH_X86) || defined(ARCH_X86_64) | |||
| # define SKIP_CACHE(name, gb, num)\ | |||
| asm(\ | |||
| "shldl %2, %1, %0 \n\t"\ | |||
| "shll %2, %1 \n\t"\ | |||
| "shldl %2, %1, %0 \n\t"\ | |||
| "shll %2, %1 \n\t"\ | |||
| : "+r" (name##_cache0), "+r" (name##_cache1)\ | |||
| : "Ic" ((uint8_t)num)\ | |||
| ); | |||
| @@ -61,13 +61,13 @@ static int decode_frame(AVCodecContext *avctx, | |||
| uint8_t *cb= &a->picture.data[1][ y*a->picture.linesize[1] ]; | |||
| uint8_t *cr= &a->picture.data[2][ y*a->picture.linesize[2] ]; | |||
| for(x=0; x<avctx->width; x+=4){ | |||
| luma[3] = get_bits(&a->gb, 5) << 3; | |||
| luma[2] = get_bits(&a->gb, 5) << 3; | |||
| luma[1] = get_bits(&a->gb, 5) << 3; | |||
| luma[0] = get_bits(&a->gb, 5) << 3; | |||
| luma+= 4; | |||
| *(cb++) = get_bits(&a->gb, 6) << 2; | |||
| *(cr++) = get_bits(&a->gb, 6) << 2; | |||
| luma[3] = get_bits(&a->gb, 5) << 3; | |||
| luma[2] = get_bits(&a->gb, 5) << 3; | |||
| luma[1] = get_bits(&a->gb, 5) << 3; | |||
| luma[0] = get_bits(&a->gb, 5) << 3; | |||
| luma+= 4; | |||
| *(cb++) = get_bits(&a->gb, 6) << 2; | |||
| *(cr++) = get_bits(&a->gb, 6) << 2; | |||
| } | |||
| } | |||
| @@ -65,14 +65,14 @@ int64_t gettime(void) | |||
| static short idct_mmx_perm[64]; | |||
| static short idct_simple_mmx_perm[64]={ | |||
| 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |||
| 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |||
| 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |||
| 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |||
| 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |||
| 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |||
| 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |||
| 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |||
| 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |||
| 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |||
| 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |||
| 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |||
| 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |||
| 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |||
| 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |||
| 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |||
| }; | |||
| void idct_mmx_init(void) | |||
| @@ -81,8 +81,8 @@ void idct_mmx_init(void) | |||
| /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ | |||
| for (i = 0; i < 64; i++) { | |||
| idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |||
| // idct_simple_mmx_perm[i] = simple_block_permute_op(i); | |||
| idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |||
| // idct_simple_mmx_perm[i] = simple_block_permute_op(i); | |||
| } | |||
| } | |||
| @@ -151,7 +151,7 @@ void dct_error(const char *name, int is_idct, | |||
| for(i=0;i<64;i++) | |||
| block[idct_simple_mmx_perm[i]] = block1[i]; | |||
| } else { | |||
| } else { | |||
| for(i=0; i<64; i++) | |||
| block[i]= block1[i]; | |||
| } | |||
| @@ -186,9 +186,9 @@ void dct_error(const char *name, int is_idct, | |||
| if (v > err_inf) | |||
| err_inf = v; | |||
| err2 += v * v; | |||
| sysErr[i] += block[i] - block1[i]; | |||
| blockSumErr += v; | |||
| if( abs(block[i])>maxout) maxout=abs(block[i]); | |||
| sysErr[i] += block[i] - block1[i]; | |||
| blockSumErr += v; | |||
| if( abs(block[i])>maxout) maxout=abs(block[i]); | |||
| } | |||
| if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr; | |||
| #if 0 // print different matrix pairs | |||
| @@ -209,7 +209,7 @@ void dct_error(const char *name, int is_idct, | |||
| #if 1 // dump systematic errors | |||
| for(i=0; i<64; i++){ | |||
| if(i%8==0) printf("\n"); | |||
| if(i%8==0) printf("\n"); | |||
| printf("%5d ", (int)sysErr[i]); | |||
| } | |||
| printf("\n"); | |||
| @@ -503,7 +503,7 @@ int main(int argc, char **argv) | |||
| dct_error("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, test); | |||
| // dct_error("ODIVX-C", 1, odivx_idct_c, idct); | |||
| //printf(" test against odivx idct\n"); | |||
| // dct_error("REF", 1, idct, odivx_idct_c); | |||
| // dct_error("REF", 1, idct, odivx_idct_c); | |||
| // dct_error("INT", 1, j_rev_dct, odivx_idct_c); | |||
| // dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c); | |||
| // dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c); | |||
| @@ -124,14 +124,14 @@ const uint32_t inverse[256]={ | |||
| /* Input permutation for the simple_idct_mmx */ | |||
| static const uint8_t simple_mmx_permutation[64]={ | |||
| 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |||
| 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |||
| 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |||
| 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |||
| 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |||
| 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |||
| 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |||
| 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |||
| 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |||
| 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |||
| 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |||
| 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |||
| 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |||
| 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |||
| 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |||
| 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |||
| }; | |||
| static int pix_sum_c(uint8_t * pix, int line_size) | |||
| @@ -140,18 +140,18 @@ static int pix_sum_c(uint8_t * pix, int line_size) | |||
| s = 0; | |||
| for (i = 0; i < 16; i++) { | |||
| for (j = 0; j < 16; j += 8) { | |||
| s += pix[0]; | |||
| s += pix[1]; | |||
| s += pix[2]; | |||
| s += pix[3]; | |||
| s += pix[4]; | |||
| s += pix[5]; | |||
| s += pix[6]; | |||
| s += pix[7]; | |||
| pix += 8; | |||
| } | |||
| pix += line_size - 16; | |||
| for (j = 0; j < 16; j += 8) { | |||
| s += pix[0]; | |||
| s += pix[1]; | |||
| s += pix[2]; | |||
| s += pix[3]; | |||
| s += pix[4]; | |||
| s += pix[5]; | |||
| s += pix[6]; | |||
| s += pix[7]; | |||
| pix += 8; | |||
| } | |||
| pix += line_size - 16; | |||
| } | |||
| return s; | |||
| } | |||
| @@ -163,33 +163,33 @@ static int pix_norm1_c(uint8_t * pix, int line_size) | |||
| s = 0; | |||
| for (i = 0; i < 16; i++) { | |||
| for (j = 0; j < 16; j += 8) { | |||
| for (j = 0; j < 16; j += 8) { | |||
| #if 0 | |||
| s += sq[pix[0]]; | |||
| s += sq[pix[1]]; | |||
| s += sq[pix[2]]; | |||
| s += sq[pix[3]]; | |||
| s += sq[pix[4]]; | |||
| s += sq[pix[5]]; | |||
| s += sq[pix[6]]; | |||
| s += sq[pix[7]]; | |||
| s += sq[pix[0]]; | |||
| s += sq[pix[1]]; | |||
| s += sq[pix[2]]; | |||
| s += sq[pix[3]]; | |||
| s += sq[pix[4]]; | |||
| s += sq[pix[5]]; | |||
| s += sq[pix[6]]; | |||
| s += sq[pix[7]]; | |||
| #else | |||
| #if LONG_MAX > 2147483647 | |||
| register uint64_t x=*(uint64_t*)pix; | |||
| s += sq[x&0xff]; | |||
| s += sq[(x>>8)&0xff]; | |||
| s += sq[(x>>16)&0xff]; | |||
| s += sq[(x>>24)&0xff]; | |||
| register uint64_t x=*(uint64_t*)pix; | |||
| s += sq[x&0xff]; | |||
| s += sq[(x>>8)&0xff]; | |||
| s += sq[(x>>16)&0xff]; | |||
| s += sq[(x>>24)&0xff]; | |||
| s += sq[(x>>32)&0xff]; | |||
| s += sq[(x>>40)&0xff]; | |||
| s += sq[(x>>48)&0xff]; | |||
| s += sq[(x>>56)&0xff]; | |||
| #else | |||
| register uint32_t x=*(uint32_t*)pix; | |||
| s += sq[x&0xff]; | |||
| s += sq[(x>>8)&0xff]; | |||
| s += sq[(x>>16)&0xff]; | |||
| s += sq[(x>>24)&0xff]; | |||
| register uint32_t x=*(uint32_t*)pix; | |||
| s += sq[x&0xff]; | |||
| s += sq[(x>>8)&0xff]; | |||
| s += sq[(x>>16)&0xff]; | |||
| s += sq[(x>>24)&0xff]; | |||
| x=*(uint32_t*)(pix+4); | |||
| s += sq[x&0xff]; | |||
| s += sq[(x>>8)&0xff]; | |||
| @@ -197,9 +197,9 @@ static int pix_norm1_c(uint8_t * pix, int line_size) | |||
| s += sq[(x>>24)&0xff]; | |||
| #endif | |||
| #endif | |||
| pix += 8; | |||
| } | |||
| pix += line_size - 16; | |||
| pix += 8; | |||
| } | |||
| pix += line_size - 16; | |||
| } | |||
| return s; | |||
| } | |||
| @@ -410,7 +410,7 @@ static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int lin | |||
| } | |||
| static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, | |||
| const uint8_t *s2, int stride){ | |||
| const uint8_t *s2, int stride){ | |||
| int i; | |||
| /* read the pixels */ | |||
| @@ -431,7 +431,7 @@ static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, | |||
| static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | |||
| int line_size) | |||
| int line_size) | |||
| { | |||
| int i; | |||
| uint8_t *cm = cropTbl + MAX_NEG_CROP; | |||
| @@ -453,7 +453,7 @@ static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | |||
| } | |||
| static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, | |||
| int line_size) | |||
| int line_size) | |||
| { | |||
| int i; | |||
| uint8_t *cm = cropTbl + MAX_NEG_CROP; | |||
| @@ -471,7 +471,7 @@ static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels | |||
| } | |||
| static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, | |||
| int line_size) | |||
| int line_size) | |||
| { | |||
| int i; | |||
| uint8_t *cm = cropTbl + MAX_NEG_CROP; | |||
| @@ -1214,7 +1214,7 @@ static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int | |||
| int i,j; | |||
| for (i=0; i < height; i++) { | |||
| for (j=0; j < width; j++) { | |||
| dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; | |||
| dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; | |||
| } | |||
| src += stride; | |||
| dst += stride; | |||
| @@ -1225,7 +1225,7 @@ static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int | |||
| int i,j; | |||
| for (i=0; i < height; i++) { | |||
| for (j=0; j < width; j++) { | |||
| dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; | |||
| dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; | |||
| } | |||
| src += stride; | |||
| dst += stride; | |||
| @@ -1236,7 +1236,7 @@ static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int | |||
| int i,j; | |||
| for (i=0; i < height; i++) { | |||
| for (j=0; j < width; j++) { | |||
| dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; | |||
| dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; | |||
| } | |||
| src += stride; | |||
| dst += stride; | |||
| @@ -1247,7 +1247,7 @@ static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int | |||
| int i,j; | |||
| for (i=0; i < height; i++) { | |||
| for (j=0; j < width; j++) { | |||
| dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; | |||
| dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; | |||
| } | |||
| src += stride; | |||
| dst += stride; | |||
| @@ -1258,7 +1258,7 @@ static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int | |||
| int i,j; | |||
| for (i=0; i < height; i++) { | |||
| for (j=0; j < width; j++) { | |||
| dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; | |||
| dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; | |||
| } | |||
| src += stride; | |||
| dst += stride; | |||
| @@ -1269,7 +1269,7 @@ static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int | |||
| int i,j; | |||
| for (i=0; i < height; i++) { | |||
| for (j=0; j < width; j++) { | |||
| dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; | |||
| dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; | |||
| } | |||
| src += stride; | |||
| dst += stride; | |||
| @@ -1280,7 +1280,7 @@ static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int | |||
| int i,j; | |||
| for (i=0; i < height; i++) { | |||
| for (j=0; j < width; j++) { | |||
| dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; | |||
| dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; | |||
| } | |||
| src += stride; | |||
| dst += stride; | |||
| @@ -1291,7 +1291,7 @@ static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int | |||
| int i,j; | |||
| for (i=0; i < height; i++) { | |||
| for (j=0; j < width; j++) { | |||
| dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; | |||
| dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; | |||
| } | |||
| src += stride; | |||
| dst += stride; | |||
| @@ -1311,7 +1311,7 @@ static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int | |||
| int i,j; | |||
| for (i=0; i < height; i++) { | |||
| for (j=0; j < width; j++) { | |||
| dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; | |||
| dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; | |||
| } | |||
| src += stride; | |||
| dst += stride; | |||
| @@ -1322,7 +1322,7 @@ static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int | |||
| int i,j; | |||
| for (i=0; i < height; i++) { | |||
| for (j=0; j < width; j++) { | |||
| dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; | |||
| dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; | |||
| } | |||
| src += stride; | |||
| dst += stride; | |||
| @@ -1333,7 +1333,7 @@ static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int | |||
| int i,j; | |||
| for (i=0; i < height; i++) { | |||
| for (j=0; j < width; j++) { | |||
| dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; | |||
| dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; | |||
| } | |||
| src += stride; | |||
| dst += stride; | |||
| @@ -1344,7 +1344,7 @@ static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int | |||
| int i,j; | |||
| for (i=0; i < height; i++) { | |||
| for (j=0; j < width; j++) { | |||
| dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |||
| dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |||
| } | |||
| src += stride; | |||
| dst += stride; | |||
| @@ -1355,7 +1355,7 @@ static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int | |||
| int i,j; | |||
| for (i=0; i < height; i++) { | |||
| for (j=0; j < width; j++) { | |||
| dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |||
| dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |||
| } | |||
| src += stride; | |||
| dst += stride; | |||
| @@ -1366,7 +1366,7 @@ static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int | |||
| int i,j; | |||
| for (i=0; i < height; i++) { | |||
| for (j=0; j < width; j++) { | |||
| dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; | |||
| dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; | |||
| } | |||
| src += stride; | |||
| dst += stride; | |||
| @@ -1377,7 +1377,7 @@ static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int | |||
| int i,j; | |||
| for (i=0; i < height; i++) { | |||
| for (j=0; j < width; j++) { | |||
| dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |||
| dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |||
| } | |||
| src += stride; | |||
| dst += stride; | |||
| @@ -1388,7 +1388,7 @@ static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int | |||
| int i,j; | |||
| for (i=0; i < height; i++) { | |||
| for (j=0; j < width; j++) { | |||
| dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |||
| dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |||
| } | |||
| src += stride; | |||
| dst += stride; | |||
| @@ -3666,15 +3666,15 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||
| #ifdef CONFIG_ENCODERS | |||
| if(avctx->dct_algo==FF_DCT_FASTINT) { | |||
| c->fdct = fdct_ifast; | |||
| c->fdct248 = fdct_ifast248; | |||
| c->fdct248 = fdct_ifast248; | |||
| } | |||
| else if(avctx->dct_algo==FF_DCT_FAAN) { | |||
| c->fdct = ff_faandct; | |||
| c->fdct248 = ff_faandct248; | |||
| c->fdct248 = ff_faandct248; | |||
| } | |||
| else { | |||
| c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default | |||
| c->fdct248 = ff_fdct248_islow; | |||
| c->fdct248 = ff_fdct248_islow; | |||
| } | |||
| #endif //CONFIG_ENCODERS | |||
| @@ -151,7 +151,7 @@ typedef struct DSPContext { | |||
| * global motion compensation. | |||
| */ | |||
| void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy, | |||
| int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); | |||
| int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); | |||
| void (*clear_blocks)(DCTELEM *blocks/*align 16*/); | |||
| int (*pix_sum)(uint8_t * pix, int line_size); | |||
| int (*pix_norm1)(uint8_t * pix, int line_size); | |||
| @@ -342,7 +342,7 @@ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scant | |||
| void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type); | |||
| #define BYTE_VEC32(c) ((c)*0x01010101UL) | |||
| #define BYTE_VEC32(c) ((c)*0x01010101UL) | |||
| static inline uint32_t rnd_avg32(uint32_t a, uint32_t b) | |||
| { | |||
| @@ -194,7 +194,7 @@ channels_multi (int flags) | |||
| { | |||
| if (flags & DTS_LFE) | |||
| return 6; | |||
| else if (flags & 1) /* center channel */ | |||
| else if (flags & 1) /* center channel */ | |||
| return 5; | |||
| else if ((flags & DTS_CHANNEL_MASK) == DTS_2F2R) | |||
| return 4; | |||
| @@ -84,7 +84,7 @@ static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm) | |||
| j = perm[i]; | |||
| s->dv_idct_shift[0][0][q][j] = | |||
| dv_quant_shifts[q][dv_88_areas[i]] + 1; | |||
| s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1; | |||
| s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1; | |||
| } | |||
| /* 248DCT */ | |||
| @@ -92,7 +92,7 @@ static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm) | |||
| /* 248 table */ | |||
| s->dv_idct_shift[0][1][q][i] = | |||
| dv_quant_shifts[q][dv_248_areas[i]] + 1; | |||
| s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1; | |||
| s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1; | |||
| } | |||
| } | |||
| } | |||
| @@ -114,35 +114,35 @@ static int dvvideo_init(AVCodecContext *avctx) | |||
| done = 1; | |||
| dv_vlc_map = av_mallocz_static(DV_VLC_MAP_LEV_SIZE*DV_VLC_MAP_RUN_SIZE*sizeof(struct dv_vlc_pair)); | |||
| if (!dv_vlc_map) | |||
| return -ENOMEM; | |||
| /* dv_anchor lets each thread know its Id */ | |||
| dv_anchor = av_malloc(12*27*sizeof(void*)); | |||
| if (!dv_anchor) { | |||
| return -ENOMEM; | |||
| } | |||
| for (i=0; i<12*27; i++) | |||
| dv_anchor[i] = (void*)(size_t)i; | |||
| /* it's faster to include sign bit in a generic VLC parsing scheme */ | |||
| for (i=0, j=0; i<NB_DV_VLC; i++, j++) { | |||
| new_dv_vlc_bits[j] = dv_vlc_bits[i]; | |||
| new_dv_vlc_len[j] = dv_vlc_len[i]; | |||
| new_dv_vlc_run[j] = dv_vlc_run[i]; | |||
| new_dv_vlc_level[j] = dv_vlc_level[i]; | |||
| if (dv_vlc_level[i]) { | |||
| new_dv_vlc_bits[j] <<= 1; | |||
| new_dv_vlc_len[j]++; | |||
| j++; | |||
| new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1; | |||
| new_dv_vlc_len[j] = dv_vlc_len[i] + 1; | |||
| new_dv_vlc_run[j] = dv_vlc_run[i]; | |||
| new_dv_vlc_level[j] = -dv_vlc_level[i]; | |||
| } | |||
| } | |||
| if (!dv_vlc_map) | |||
| return -ENOMEM; | |||
| /* dv_anchor lets each thread know its Id */ | |||
| dv_anchor = av_malloc(12*27*sizeof(void*)); | |||
| if (!dv_anchor) { | |||
| return -ENOMEM; | |||
| } | |||
| for (i=0; i<12*27; i++) | |||
| dv_anchor[i] = (void*)(size_t)i; | |||
| /* it's faster to include sign bit in a generic VLC parsing scheme */ | |||
| for (i=0, j=0; i<NB_DV_VLC; i++, j++) { | |||
| new_dv_vlc_bits[j] = dv_vlc_bits[i]; | |||
| new_dv_vlc_len[j] = dv_vlc_len[i]; | |||
| new_dv_vlc_run[j] = dv_vlc_run[i]; | |||
| new_dv_vlc_level[j] = dv_vlc_level[i]; | |||
| if (dv_vlc_level[i]) { | |||
| new_dv_vlc_bits[j] <<= 1; | |||
| new_dv_vlc_len[j]++; | |||
| j++; | |||
| new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1; | |||
| new_dv_vlc_len[j] = dv_vlc_len[i] + 1; | |||
| new_dv_vlc_run[j] = dv_vlc_run[i]; | |||
| new_dv_vlc_level[j] = -dv_vlc_level[i]; | |||
| } | |||
| } | |||
| /* NOTE: as a trick, we use the fact the no codes are unused | |||
| to accelerate the parsing of partial codes */ | |||
| @@ -150,10 +150,10 @@ static int dvvideo_init(AVCodecContext *avctx) | |||
| new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2, 0); | |||
| dv_rl_vlc = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM)); | |||
| if (!dv_rl_vlc) { | |||
| av_free(dv_anchor); | |||
| return -ENOMEM; | |||
| } | |||
| if (!dv_rl_vlc) { | |||
| av_free(dv_anchor); | |||
| return -ENOMEM; | |||
| } | |||
| for(i = 0; i < dv_vlc.table_size; i++){ | |||
| int code= dv_vlc.table[i][0]; | |||
| int len = dv_vlc.table[i][1]; | |||
| @@ -170,49 +170,49 @@ static int dvvideo_init(AVCodecContext *avctx) | |||
| dv_rl_vlc[i].level = level; | |||
| dv_rl_vlc[i].run = run; | |||
| } | |||
| free_vlc(&dv_vlc); | |||
| free_vlc(&dv_vlc); | |||
| for (i = 0; i < NB_DV_VLC - 1; i++) { | |||
| for (i = 0; i < NB_DV_VLC - 1; i++) { | |||
| if (dv_vlc_run[i] >= DV_VLC_MAP_RUN_SIZE) | |||
| continue; | |||
| continue; | |||
| #ifdef DV_CODEC_TINY_TARGET | |||
| if (dv_vlc_level[i] >= DV_VLC_MAP_LEV_SIZE) | |||
| continue; | |||
| continue; | |||
| #endif | |||
| if (dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size != 0) | |||
| continue; | |||
| if (dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size != 0) | |||
| continue; | |||
| dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].vlc = dv_vlc_bits[i] << | |||
| (!!dv_vlc_level[i]); | |||
| dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size = dv_vlc_len[i] + | |||
| (!!dv_vlc_level[i]); | |||
| } | |||
| for (i = 0; i < DV_VLC_MAP_RUN_SIZE; i++) { | |||
| dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].vlc = dv_vlc_bits[i] << | |||
| (!!dv_vlc_level[i]); | |||
| dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size = dv_vlc_len[i] + | |||
| (!!dv_vlc_level[i]); | |||
| } | |||
| for (i = 0; i < DV_VLC_MAP_RUN_SIZE; i++) { | |||
| #ifdef DV_CODEC_TINY_TARGET | |||
| for (j = 1; j < DV_VLC_MAP_LEV_SIZE; j++) { | |||
| if (dv_vlc_map[i][j].size == 0) { | |||
| dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc | | |||
| (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size)); | |||
| dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size + | |||
| dv_vlc_map[0][j].size; | |||
| } | |||
| } | |||
| for (j = 1; j < DV_VLC_MAP_LEV_SIZE; j++) { | |||
| if (dv_vlc_map[i][j].size == 0) { | |||
| dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc | | |||
| (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size)); | |||
| dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size + | |||
| dv_vlc_map[0][j].size; | |||
| } | |||
| } | |||
| #else | |||
| for (j = 1; j < DV_VLC_MAP_LEV_SIZE/2; j++) { | |||
| if (dv_vlc_map[i][j].size == 0) { | |||
| dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc | | |||
| (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size)); | |||
| dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size + | |||
| dv_vlc_map[0][j].size; | |||
| } | |||
| dv_vlc_map[i][((uint16_t)(-j))&0x1ff].vlc = | |||
| dv_vlc_map[i][j].vlc | 1; | |||
| dv_vlc_map[i][((uint16_t)(-j))&0x1ff].size = | |||
| dv_vlc_map[i][j].size; | |||
| } | |||
| for (j = 1; j < DV_VLC_MAP_LEV_SIZE/2; j++) { | |||
| if (dv_vlc_map[i][j].size == 0) { | |||
| dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc | | |||
| (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size)); | |||
| dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size + | |||
| dv_vlc_map[0][j].size; | |||
| } | |||
| dv_vlc_map[i][((uint16_t)(-j))&0x1ff].vlc = | |||
| dv_vlc_map[i][j].vlc | 1; | |||
| dv_vlc_map[i][((uint16_t)(-j))&0x1ff].size = | |||
| dv_vlc_map[i][j].size; | |||
| } | |||
| #endif | |||
| } | |||
| } | |||
| } | |||
| /* Generic DSP setup */ | |||
| @@ -241,7 +241,7 @@ static int dvvideo_init(AVCodecContext *avctx) | |||
| /* FIXME: I really don't think this should be here */ | |||
| if (dv_codec_profile(avctx)) | |||
| avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt; | |||
| avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt; | |||
| avctx->coded_frame = &s->picture; | |||
| s->avctx= avctx; | |||
| @@ -306,9 +306,9 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block) | |||
| /* if we must parse a partial vlc, we do it here */ | |||
| if (partial_bit_count > 0) { | |||
| re_cache = ((unsigned)re_cache >> partial_bit_count) | | |||
| (mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count)); | |||
| re_index -= partial_bit_count; | |||
| mb->partial_bit_count = 0; | |||
| (mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count)); | |||
| re_index -= partial_bit_count; | |||
| mb->partial_bit_count = 0; | |||
| } | |||
| /* get the AC coefficients until last_index is reached */ | |||
| @@ -318,30 +318,30 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block) | |||
| #endif | |||
| /* our own optimized GET_RL_VLC */ | |||
| index = NEG_USR32(re_cache, TEX_VLC_BITS); | |||
| vlc_len = dv_rl_vlc[index].len; | |||
| vlc_len = dv_rl_vlc[index].len; | |||
| if (vlc_len < 0) { | |||
| index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level; | |||
| vlc_len = TEX_VLC_BITS - vlc_len; | |||
| } | |||
| level = dv_rl_vlc[index].level; | |||
| run = dv_rl_vlc[index].run; | |||
| /* gotta check if we're still within gb boundaries */ | |||
| if (re_index + vlc_len > last_index) { | |||
| /* should be < 16 bits otherwise a codeword could have been parsed */ | |||
| mb->partial_bit_count = last_index - re_index; | |||
| mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count); | |||
| re_index = last_index; | |||
| break; | |||
| } | |||
| re_index += vlc_len; | |||
| run = dv_rl_vlc[index].run; | |||
| /* gotta check if we're still within gb boundaries */ | |||
| if (re_index + vlc_len > last_index) { | |||
| /* should be < 16 bits otherwise a codeword could have been parsed */ | |||
| mb->partial_bit_count = last_index - re_index; | |||
| mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count); | |||
| re_index = last_index; | |||
| break; | |||
| } | |||
| re_index += vlc_len; | |||
| #ifdef VLC_DEBUG | |||
| printf("run=%d level=%d\n", run, level); | |||
| printf("run=%d level=%d\n", run, level); | |||
| #endif | |||
| pos += run; | |||
| if (pos >= 64) | |||
| break; | |||
| pos += run; | |||
| if (pos >= 64) | |||
| break; | |||
| assert(level); | |||
| pos1 = scan_table[pos]; | |||
| @@ -404,7 +404,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s, | |||
| block = block1; | |||
| for(j = 0;j < 6; j++) { | |||
| last_index = block_sizes[j]; | |||
| init_get_bits(&gb, buf_ptr, last_index); | |||
| init_get_bits(&gb, buf_ptr, last_index); | |||
| /* get the dc */ | |||
| dc = get_sbits(&gb, 9); | |||
| @@ -444,7 +444,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s, | |||
| block = block1; | |||
| mb = mb1; | |||
| init_get_bits(&gb, mb_bit_buffer, put_bits_count(&pb)); | |||
| flush_put_bits(&pb); | |||
| flush_put_bits(&pb); | |||
| for(j = 0;j < 6; j++, block += 64, mb++) { | |||
| if (mb->pos < 64 && get_bits_left(&gb) > 0) { | |||
| dv_decode_ac(&gb, mb, block); | |||
| @@ -456,7 +456,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s, | |||
| /* all blocks are finished, so the extra bytes can be used at | |||
| the video segment level */ | |||
| if (j >= 6) | |||
| bit_copy(&vs_pb, &gb); | |||
| bit_copy(&vs_pb, &gb); | |||
| } | |||
| /* we need a pass other the whole video segment */ | |||
| @@ -475,8 +475,8 @@ static inline void dv_decode_video_segment(DVVideoContext *s, | |||
| #endif | |||
| dv_decode_ac(&gb, mb, block); | |||
| } | |||
| if (mb->pos >= 64 && mb->pos < 127) | |||
| av_log(NULL, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos); | |||
| if (mb->pos >= 64 && mb->pos < 127) | |||
| av_log(NULL, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos); | |||
| block += 64; | |||
| mb++; | |||
| } | |||
| @@ -508,7 +508,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s, | |||
| if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) { | |||
| uint64_t aligned_pixels[64/8]; | |||
| uint8_t *pixels= (uint8_t*)aligned_pixels; | |||
| uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1; | |||
| uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1; | |||
| int x, y, linesize; | |||
| /* NOTE: at end of line, the macroblock is handled as 420 */ | |||
| idct_put(pixels, 8, block); | |||
| @@ -543,21 +543,21 @@ static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc) | |||
| int size; | |||
| if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) { | |||
| *vlc = dv_vlc_map[run][level].vlc | sign; | |||
| size = dv_vlc_map[run][level].size; | |||
| size = dv_vlc_map[run][level].size; | |||
| } | |||
| else { | |||
| if (level < DV_VLC_MAP_LEV_SIZE) { | |||
| *vlc = dv_vlc_map[0][level].vlc | sign; | |||
| size = dv_vlc_map[0][level].size; | |||
| } else { | |||
| *vlc = dv_vlc_map[0][level].vlc | sign; | |||
| size = dv_vlc_map[0][level].size; | |||
| } else { | |||
| *vlc = 0xfe00 | (level << 1) | sign; | |||
| size = 16; | |||
| } | |||
| if (run) { | |||
| *vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc : | |||
| (0x1f80 | (run - 1))) << size; | |||
| size += (run < 16) ? dv_vlc_map[run-1][0].size : 13; | |||
| } | |||
| size = 16; | |||
| } | |||
| if (run) { | |||
| *vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc : | |||
| (0x1f80 | (run - 1))) << size; | |||
| size += (run < 16) ? dv_vlc_map[run-1][0].size : 13; | |||
| } | |||
| } | |||
| return size; | |||
| @@ -568,13 +568,13 @@ static always_inline int dv_rl2vlc_size(int run, int level) | |||
| int size; | |||
| if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) { | |||
| size = dv_vlc_map[run][level].size; | |||
| size = dv_vlc_map[run][level].size; | |||
| } | |||
| else { | |||
| size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16; | |||
| if (run) { | |||
| size += (run < 16) ? dv_vlc_map[run-1][0].size : 13; | |||
| } | |||
| size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16; | |||
| if (run) { | |||
| size += (run < 16) ? dv_vlc_map[run-1][0].size : 13; | |||
| } | |||
| } | |||
| return size; | |||
| } | |||
| @@ -620,14 +620,14 @@ static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext | |||
| for (; size > (bits_left = put_bits_left(pb)); pb++) { | |||
| if (bits_left) { | |||
| size -= bits_left; | |||
| put_bits(pb, bits_left, vlc >> size); | |||
| vlc = vlc & ((1<<size)-1); | |||
| } | |||
| if (pb + 1 >= pb_end) { | |||
| bi->partial_bit_count = size; | |||
| bi->partial_bit_buffer = vlc; | |||
| return pb; | |||
| } | |||
| put_bits(pb, bits_left, vlc >> size); | |||
| vlc = vlc & ((1<<size)-1); | |||
| } | |||
| if (pb + 1 >= pb_end) { | |||
| bi->partial_bit_count = size; | |||
| bi->partial_bit_buffer = vlc; | |||
| return pb; | |||
| } | |||
| } | |||
| /* Store VLC */ | |||
| @@ -712,14 +712,14 @@ static always_inline int dv_guess_dct_mode(DCTELEM *blk) { | |||
| s = blk; | |||
| for(i=0; i<7; i++) { | |||
| score88 += SC(0, 8) + SC(1, 9) + SC(2, 10) + SC(3, 11) + | |||
| SC(4, 12) + SC(5,13) + SC(6, 14) + SC(7, 15); | |||
| SC(4, 12) + SC(5,13) + SC(6, 14) + SC(7, 15); | |||
| s += 8; | |||
| } | |||
| /* Compute 2-4-8 score (small values give a better chance for 2-4-8 DCT) */ | |||
| s = blk; | |||
| for(i=0; i<6; i++) { | |||
| score248 += SC(0, 16) + SC(1,17) + SC(2, 18) + SC(3, 19) + | |||
| SC(4, 20) + SC(5,21) + SC(6, 22) + SC(7, 23); | |||
| SC(4, 20) + SC(5,21) + SC(6, 22) + SC(7, 23); | |||
| s += 8; | |||
| } | |||
| @@ -736,30 +736,30 @@ static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos) | |||
| b = blks; | |||
| for (i=0; i<5; i++) { | |||
| if (!qnos[i]) | |||
| continue; | |||
| continue; | |||
| qnos[i]--; | |||
| size[i] = 0; | |||
| qnos[i]--; | |||
| size[i] = 0; | |||
| for (j=0; j<6; j++, b++) { | |||
| for (a=0; a<4; a++) { | |||
| if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) { | |||
| b->bit_size[a] = 1; // 4 areas 4 bits for EOB :) | |||
| b->area_q[a]++; | |||
| for (a=0; a<4; a++) { | |||
| if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) { | |||
| b->bit_size[a] = 1; // 4 areas 4 bits for EOB :) | |||
| b->area_q[a]++; | |||
| prev= b->prev[a]; | |||
| for (k= b->next[prev] ; k<mb_area_start[a+1]; k= b->next[k]) { | |||
| b->mb[k] >>= 1; | |||
| if (b->mb[k]) { | |||
| b->mb[k] >>= 1; | |||
| if (b->mb[k]) { | |||
| b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]); | |||
| prev= k; | |||
| prev= k; | |||
| } else { | |||
| b->next[prev] = b->next[k]; | |||
| } | |||
| } | |||
| } | |||
| b->prev[a+1]= prev; | |||
| } | |||
| size[i] += b->bit_size[a]; | |||
| } | |||
| } | |||
| } | |||
| size[i] += b->bit_size[a]; | |||
| } | |||
| } | |||
| } | |||
| } while ((vs_total_ac_bits < size[0] + size[1] + size[2] + size[3] + size[4]) && | |||
| (qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4])); | |||
| @@ -797,68 +797,68 @@ static inline void dv_encode_video_segment(DVVideoContext *s, | |||
| mb_x = v & 0xff; | |||
| mb_y = v >> 8; | |||
| y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8); | |||
| c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ? | |||
| ((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) : | |||
| (((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8)); | |||
| do_edge_wrap = 0; | |||
| qnos[mb_index] = 15; /* No quantization */ | |||
| c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ? | |||
| ((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) : | |||
| (((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8)); | |||
| do_edge_wrap = 0; | |||
| qnos[mb_index] = 15; /* No quantization */ | |||
| ptr = dif + mb_index*80 + 4; | |||
| for(j = 0;j < 6; j++) { | |||
| if (j < 4) { /* Four Y blocks */ | |||
| /* NOTE: at end of line, the macroblock is handled as 420 */ | |||
| if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { | |||
| /* NOTE: at end of line, the macroblock is handled as 420 */ | |||
| if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { | |||
| data = y_ptr + (j * 8); | |||
| } else { | |||
| data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]); | |||
| } | |||
| linesize = s->picture.linesize[0]; | |||
| linesize = s->picture.linesize[0]; | |||
| } else { /* Cr and Cb blocks */ | |||
| /* don't ask Fabrice why they inverted Cb and Cr ! */ | |||
| data = s->picture.data[6 - j] + c_offset; | |||
| linesize = s->picture.linesize[6 - j]; | |||
| if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) | |||
| do_edge_wrap = 1; | |||
| } | |||
| /* Everything is set up -- now just copy data -> DCT block */ | |||
| if (do_edge_wrap) { /* Edge wrap copy: 4x16 -> 8x8 */ | |||
| uint8_t* d; | |||
| DCTELEM *b = block; | |||
| for (i=0;i<8;i++) { | |||
| d = data + 8 * linesize; | |||
| b[0] = data[0]; b[1] = data[1]; b[2] = data[2]; b[3] = data[3]; | |||
| /* don't ask Fabrice why they inverted Cb and Cr ! */ | |||
| data = s->picture.data[6 - j] + c_offset; | |||
| linesize = s->picture.linesize[6 - j]; | |||
| if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) | |||
| do_edge_wrap = 1; | |||
| } | |||
| /* Everything is set up -- now just copy data -> DCT block */ | |||
| if (do_edge_wrap) { /* Edge wrap copy: 4x16 -> 8x8 */ | |||
| uint8_t* d; | |||
| DCTELEM *b = block; | |||
| for (i=0;i<8;i++) { | |||
| d = data + 8 * linesize; | |||
| b[0] = data[0]; b[1] = data[1]; b[2] = data[2]; b[3] = data[3]; | |||
| b[4] = d[0]; b[5] = d[1]; b[6] = d[2]; b[7] = d[3]; | |||
| data += linesize; | |||
| b += 8; | |||
| } | |||
| } else { /* Simple copy: 8x8 -> 8x8 */ | |||
| s->get_pixels(block, data, linesize); | |||
| } | |||
| data += linesize; | |||
| b += 8; | |||
| } | |||
| } else { /* Simple copy: 8x8 -> 8x8 */ | |||
| s->get_pixels(block, data, linesize); | |||
| } | |||
| if(s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) | |||
| enc_blk->dct_mode = dv_guess_dct_mode(block); | |||
| else | |||
| enc_blk->dct_mode = 0; | |||
| enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0; | |||
| enc_blk->partial_bit_count = 0; | |||
| enc_blk->partial_bit_buffer = 0; | |||
| enc_blk->cur_ac = 0; | |||
| enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0; | |||
| enc_blk->partial_bit_count = 0; | |||
| enc_blk->partial_bit_buffer = 0; | |||
| enc_blk->cur_ac = 0; | |||
| s->fdct[enc_blk->dct_mode](block); | |||
| s->fdct[enc_blk->dct_mode](block); | |||
| dv_set_class_number(block, enc_blk, | |||
| enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4); | |||
| dv_set_class_number(block, enc_blk, | |||
| enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4); | |||
| init_put_bits(pb, ptr, block_sizes[j]/8); | |||
| put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2)); | |||
| put_bits(pb, 1, enc_blk->dct_mode); | |||
| put_bits(pb, 2, enc_blk->cno); | |||
| vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] + | |||
| enc_blk->bit_size[2] + enc_blk->bit_size[3]; | |||
| ++enc_blk; | |||
| ++pb; | |||
| ptr += block_sizes[j]/8; | |||
| put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2)); | |||
| put_bits(pb, 1, enc_blk->dct_mode); | |||
| put_bits(pb, 2, enc_blk->cno); | |||
| vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] + | |||
| enc_blk->bit_size[2] + enc_blk->bit_size[3]; | |||
| ++enc_blk; | |||
| ++pb; | |||
| ptr += block_sizes[j]/8; | |||
| } | |||
| } | |||
| @@ -898,7 +898,7 @@ static int dv_decode_mt(AVCodecContext *avctx, void* sl) | |||
| DVVideoContext *s = avctx->priv_data; | |||
| int slice = (size_t)sl; | |||
| dv_decode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80], | |||
| &s->sys->video_place[slice*5]); | |||
| &s->sys->video_place[slice*5]); | |||
| return 0; | |||
| } | |||
| @@ -907,7 +907,7 @@ static int dv_encode_mt(AVCodecContext *avctx, void* sl) | |||
| DVVideoContext *s = avctx->priv_data; | |||
| int slice = (size_t)sl; | |||
| dv_encode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80], | |||
| &s->sys->video_place[slice*5]); | |||
| &s->sys->video_place[slice*5]); | |||
| return 0; | |||
| } | |||
| @@ -940,7 +940,7 @@ static int dvvideo_decode_frame(AVCodecContext *avctx, | |||
| s->buf = buf; | |||
| avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL, | |||
| s->sys->difseg_size * 27); | |||
| s->sys->difseg_size * 27); | |||
| emms_c(); | |||
| @@ -958,7 +958,7 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size, | |||
| s->sys = dv_codec_profile(c); | |||
| if (!s->sys) | |||
| return -1; | |||
| return -1; | |||
| if(buf_size < s->sys->frame_size) | |||
| return -1; | |||
| @@ -969,7 +969,7 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size, | |||
| s->buf = buf; | |||
| c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL, | |||
| s->sys->difseg_size * 27); | |||
| s->sys->difseg_size * 27); | |||
| emms_c(); | |||
| return s->sys->frame_size; | |||
| @@ -192,7 +192,7 @@ static void dvb_encode_rle4(uint8_t **pq, | |||
| #define SCALEBITS 10 | |||
| #define ONE_HALF (1 << (SCALEBITS - 1)) | |||
| #define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5)) | |||
| #define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5)) | |||
| #define RGB_TO_Y_CCIR(r, g, b) \ | |||
| ((FIX(0.29900*219.0/255.0) * (r) + FIX(0.58700*219.0/255.0) * (g) + \ | |||
| @@ -108,8 +108,8 @@ static void filter181(int16_t *data, int width, int height, int stride){ | |||
| /** | |||
| * guess the dc of blocks which dont have a undamaged dc | |||
| * @param w width in 8 pixel blocks | |||
| * @param h height in 8 pixel blocks | |||
| * @param w width in 8 pixel blocks | |||
| * @param h height in 8 pixel blocks | |||
| */ | |||
| static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, int is_luma){ | |||
| int b_x, b_y; | |||
| @@ -192,8 +192,8 @@ static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, i | |||
| /** | |||
| * simple horizontal deblocking filter used for error resilience | |||
| * @param w width in 8 pixel blocks | |||
| * @param h height in 8 pixel blocks | |||
| * @param w width in 8 pixel blocks | |||
| * @param h height in 8 pixel blocks | |||
| */ | |||
| static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){ | |||
| int b_x, b_y; | |||
| @@ -252,8 +252,8 @@ static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int st | |||
| /** | |||
| * simple vertical deblocking filter used for error resilience | |||
| * @param w width in 8 pixel blocks | |||
| * @param h height in 8 pixel blocks | |||
| * @param w width in 8 pixel blocks | |||
| * @param h height in 8 pixel blocks | |||
| */ | |||
| static void v_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){ | |||
| int b_x, b_y; | |||
| @@ -348,7 +348,7 @@ static void guess_mv(MpegEncContext *s){ | |||
| s->mv_type = MV_TYPE_16X16; | |||
| s->mb_skipped=0; | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->mb_x= mb_x; | |||
| s->mb_y= mb_y; | |||
| @@ -476,7 +476,7 @@ int score_sum=0; | |||
| s->mv_type = MV_TYPE_16X16; | |||
| s->mb_skipped=0; | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->mb_x= mb_x; | |||
| s->mb_y= mb_y; | |||
| @@ -582,7 +582,7 @@ static int is_intra_more_likely(MpegEncContext *s){ | |||
| uint8_t *mb_ptr = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize; | |||
| uint8_t *last_mb_ptr= s->last_picture.data [0] + mb_x*16 + mb_y*16*s->linesize; | |||
| is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr , s->linesize, 16); | |||
| is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr , s->linesize, 16); | |||
| is_intra_likely -= s->dsp.sad[0](NULL, last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize, 16); | |||
| }else{ | |||
| if(IS_INTRA(s->current_picture.mb_type[mb_xy])) | |||
| @@ -873,7 +873,7 @@ void ff_er_frame_end(MpegEncContext *s){ | |||
| s->mv[0][0][1] = s->current_picture.motion_val[0][ mb_x*2 + mb_y*2*s->b8_stride ][1]; | |||
| } | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->mb_x= mb_x; | |||
| s->mb_y= mb_y; | |||
| @@ -46,7 +46,7 @@ static int Faac_encode_init(AVCodecContext *avctx) | |||
| /* check faac version */ | |||
| faac_cfg = faacEncGetCurrentConfiguration(s->faac_handle); | |||
| if (faac_cfg->version != FAAC_CFG_VERSION) { | |||
| av_log(avctx, AV_LOG_ERROR, "wrong libfaac version (compiled for: %d, using %d)\n", FAAC_CFG_VERSION, faac_cfg->version); | |||
| av_log(avctx, AV_LOG_ERROR, "wrong libfaac version (compiled for: %d, using %d)\n", FAAC_CFG_VERSION, faac_cfg->version); | |||
| faacEncClose(s->faac_handle); | |||
| return -1; | |||
| } | |||
| @@ -47,8 +47,8 @@ static const char* libfaadname = "libfaad.so.0"; | |||
| #endif | |||
| typedef struct { | |||
| void* handle; /* dlopen handle */ | |||
| void* faac_handle; /* FAAD library handle */ | |||
| void* handle; /* dlopen handle */ | |||
| void* faac_handle; /* FAAD library handle */ | |||
| int frame_size; | |||
| int sample_size; | |||
| int flags; | |||
| @@ -57,36 +57,36 @@ typedef struct { | |||
| faacDecHandle FAADAPI (*faacDecOpen)(void); | |||
| faacDecConfigurationPtr FAADAPI (*faacDecGetCurrentConfiguration)(faacDecHandle hDecoder); | |||
| #ifndef FAAD2_VERSION | |||
| int FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder, | |||
| int FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder, | |||
| faacDecConfigurationPtr config); | |||
| int FAADAPI (*faacDecInit)(faacDecHandle hDecoder, | |||
| unsigned char *buffer, | |||
| unsigned long *samplerate, | |||
| unsigned long *channels); | |||
| int FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer, | |||
| int FAADAPI (*faacDecInit)(faacDecHandle hDecoder, | |||
| unsigned char *buffer, | |||
| unsigned long *samplerate, | |||
| unsigned long *channels); | |||
| int FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer, | |||
| unsigned long SizeOfDecoderSpecificInfo, | |||
| unsigned long *samplerate, unsigned long *channels); | |||
| int FAADAPI (*faacDecDecode)(faacDecHandle hDecoder, | |||
| unsigned char *buffer, | |||
| unsigned long *bytesconsumed, | |||
| short *sample_buffer, | |||
| int FAADAPI (*faacDecDecode)(faacDecHandle hDecoder, | |||
| unsigned char *buffer, | |||
| unsigned long *bytesconsumed, | |||
| short *sample_buffer, | |||
| unsigned long *samples); | |||
| #else | |||
| unsigned char FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder, | |||
| unsigned char FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder, | |||
| faacDecConfigurationPtr config); | |||
| long FAADAPI (*faacDecInit)(faacDecHandle hDecoder, | |||
| unsigned char *buffer, | |||
| unsigned long buffer_size, | |||
| unsigned long *samplerate, | |||
| unsigned char *channels); | |||
| char FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer, | |||
| long FAADAPI (*faacDecInit)(faacDecHandle hDecoder, | |||
| unsigned char *buffer, | |||
| unsigned long buffer_size, | |||
| unsigned long *samplerate, | |||
| unsigned char *channels); | |||
| char FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer, | |||
| unsigned long SizeOfDecoderSpecificInfo, | |||
| unsigned long *samplerate, unsigned char *channels); | |||
| void *FAADAPI (*faacDecDecode)(faacDecHandle hDecoder, | |||
| faacDecFrameInfo *hInfo, | |||
| unsigned char *buffer, | |||
| unsigned long buffer_size); | |||
| char* FAADAPI (*faacDecGetErrorMessage)(unsigned char errcode); | |||
| void *FAADAPI (*faacDecDecode)(faacDecHandle hDecoder, | |||
| faacDecFrameInfo *hInfo, | |||
| unsigned char *buffer, | |||
| unsigned long buffer_size); | |||
| char* FAADAPI (*faacDecGetErrorMessage)(unsigned char errcode); | |||
| #endif | |||
| void FAADAPI (*faacDecClose)(faacDecHandle hDecoder); | |||
| @@ -112,14 +112,14 @@ static int faac_init_mp4(AVCodecContext *avctx) | |||
| int r = 0; | |||
| if (avctx->extradata) | |||
| r = s->faacDecInit2(s->faac_handle, (uint8_t*) avctx->extradata, | |||
| avctx->extradata_size, | |||
| &samplerate, &channels); | |||
| r = s->faacDecInit2(s->faac_handle, (uint8_t*) avctx->extradata, | |||
| avctx->extradata_size, | |||
| &samplerate, &channels); | |||
| // else r = s->faacDecInit(s->faac_handle ... ); | |||
| if (r < 0) | |||
| av_log(avctx, AV_LOG_ERROR, "faacDecInit2 failed r:%d sr:%ld ch:%ld s:%d\n", | |||
| r, samplerate, (long)channels, avctx->extradata_size); | |||
| av_log(avctx, AV_LOG_ERROR, "faacDecInit2 failed r:%d sr:%ld ch:%ld s:%d\n", | |||
| r, samplerate, (long)channels, avctx->extradata_size); | |||
| avctx->sample_rate = samplerate; | |||
| avctx->channels = channels; | |||
| @@ -141,7 +141,7 @@ static int faac_decode_frame(AVCodecContext *avctx, | |||
| void *out; | |||
| #endif | |||
| if(buf_size == 0) | |||
| return 0; | |||
| return 0; | |||
| #ifndef FAAD2_VERSION | |||
| out = s->faacDecDecode(s->faac_handle, | |||
| (unsigned char*)buf, | |||
| @@ -150,16 +150,16 @@ static int faac_decode_frame(AVCodecContext *avctx, | |||
| &samples); | |||
| samples *= s->sample_size; | |||
| if (data_size) | |||
| *data_size = samples; | |||
| *data_size = samples; | |||
| return (buf_size < (int)bytesconsumed) | |||
| ? buf_size : (int)bytesconsumed; | |||
| ? buf_size : (int)bytesconsumed; | |||
| #else | |||
| out = s->faacDecDecode(s->faac_handle, &frame_info, (unsigned char*)buf, (unsigned long)buf_size); | |||
| if (frame_info.error > 0) { | |||
| av_log(avctx, AV_LOG_ERROR, "faac: frame decoding failed: %s\n", | |||
| s->faacDecGetErrorMessage(frame_info.error)); | |||
| av_log(avctx, AV_LOG_ERROR, "faac: frame decoding failed: %s\n", | |||
| s->faacDecGetErrorMessage(frame_info.error)); | |||
| return 0; | |||
| } | |||
| @@ -167,10 +167,10 @@ static int faac_decode_frame(AVCodecContext *avctx, | |||
| memcpy(data, out, frame_info.samples); // CHECKME - can we cheat this one | |||
| if (data_size) | |||
| *data_size = frame_info.samples; | |||
| *data_size = frame_info.samples; | |||
| return (buf_size < (int)frame_info.bytesconsumed) | |||
| ? buf_size : (int)frame_info.bytesconsumed; | |||
| ? buf_size : (int)frame_info.bytesconsumed; | |||
| #endif | |||
| } | |||
| @@ -196,8 +196,8 @@ static int faac_decode_init(AVCodecContext *avctx) | |||
| s->handle = dlopen(libfaadname, RTLD_LAZY); | |||
| if (!s->handle) | |||
| { | |||
| av_log(avctx, AV_LOG_ERROR, "FAAD library: %s could not be opened! \n%s\n", | |||
| libfaadname, dlerror()); | |||
| av_log(avctx, AV_LOG_ERROR, "FAAD library: %s could not be opened! \n%s\n", | |||
| libfaadname, dlerror()); | |||
| return -1; | |||
| } | |||
| #define dfaac(a, b) \ | |||
| @@ -209,32 +209,32 @@ static int faac_decode_init(AVCodecContext *avctx) | |||
| #endif /* CONFIG_FAADBIN */ | |||
| // resolve all needed function calls | |||
| dfaac(Open, (faacDecHandle FAADAPI (*)(void))); | |||
| dfaac(GetCurrentConfiguration, (faacDecConfigurationPtr | |||
| FAADAPI (*)(faacDecHandle))); | |||
| dfaac(Open, (faacDecHandle FAADAPI (*)(void))); | |||
| dfaac(GetCurrentConfiguration, (faacDecConfigurationPtr | |||
| FAADAPI (*)(faacDecHandle))); | |||
| #ifndef FAAD2_VERSION | |||
| dfaac(SetConfiguration, (int FAADAPI (*)(faacDecHandle, | |||
| faacDecConfigurationPtr))); | |||
| dfaac(SetConfiguration, (int FAADAPI (*)(faacDecHandle, | |||
| faacDecConfigurationPtr))); | |||
| dfaac(Init, (int FAADAPI (*)(faacDecHandle, unsigned char*, | |||
| unsigned long*, unsigned long*))); | |||
| dfaac(Init, (int FAADAPI (*)(faacDecHandle, unsigned char*, | |||
| unsigned long*, unsigned long*))); | |||
| dfaac(Init2, (int FAADAPI (*)(faacDecHandle, unsigned char*, | |||
| unsigned long, unsigned long*, | |||
| unsigned long*))); | |||
| unsigned long, unsigned long*, | |||
| unsigned long*))); | |||
| dfaac(Close, (void FAADAPI (*)(faacDecHandle hDecoder))); | |||
| dfaac(Decode, (int FAADAPI (*)(faacDecHandle, unsigned char*, | |||
| unsigned long*, short*, unsigned long*))); | |||
| dfaac(Decode, (int FAADAPI (*)(faacDecHandle, unsigned char*, | |||
| unsigned long*, short*, unsigned long*))); | |||
| #else | |||
| dfaac(SetConfiguration, (unsigned char FAADAPI (*)(faacDecHandle, | |||
| faacDecConfigurationPtr))); | |||
| dfaac(Init, (long FAADAPI (*)(faacDecHandle, unsigned char*, | |||
| unsigned long, unsigned long*, unsigned char*))); | |||
| dfaac(Init2, (char FAADAPI (*)(faacDecHandle, unsigned char*, | |||
| unsigned long, unsigned long*, | |||
| unsigned char*))); | |||
| dfaac(Decode, (void *FAADAPI (*)(faacDecHandle, faacDecFrameInfo*, | |||
| unsigned char*, unsigned long))); | |||
| dfaac(GetErrorMessage, (char* FAADAPI (*)(unsigned char))); | |||
| dfaac(SetConfiguration, (unsigned char FAADAPI (*)(faacDecHandle, | |||
| faacDecConfigurationPtr))); | |||
| dfaac(Init, (long FAADAPI (*)(faacDecHandle, unsigned char*, | |||
| unsigned long, unsigned long*, unsigned char*))); | |||
| dfaac(Init2, (char FAADAPI (*)(faacDecHandle, unsigned char*, | |||
| unsigned long, unsigned long*, | |||
| unsigned char*))); | |||
| dfaac(Decode, (void *FAADAPI (*)(faacDecHandle, faacDecFrameInfo*, | |||
| unsigned char*, unsigned long))); | |||
| dfaac(GetErrorMessage, (char* FAADAPI (*)(unsigned char))); | |||
| #endif | |||
| #undef dfacc | |||
| @@ -243,8 +243,8 @@ static int faac_decode_init(AVCodecContext *avctx) | |||
| } | |||
| if (err) { | |||
| dlclose(s->handle); | |||
| av_log(avctx, AV_LOG_ERROR, "FAAD library: cannot resolve %s in %s!\n", | |||
| err, libfaadname); | |||
| av_log(avctx, AV_LOG_ERROR, "FAAD library: cannot resolve %s in %s!\n", | |||
| err, libfaadname); | |||
| return -1; | |||
| } | |||
| #endif | |||
| @@ -260,31 +260,31 @@ static int faac_decode_init(AVCodecContext *avctx) | |||
| faac_cfg = s->faacDecGetCurrentConfiguration(s->faac_handle); | |||
| if (faac_cfg) { | |||
| switch (avctx->bits_per_sample) { | |||
| case 8: av_log(avctx, AV_LOG_ERROR, "FAADlib unsupported bps %d\n", avctx->bits_per_sample); break; | |||
| default: | |||
| case 16: | |||
| switch (avctx->bits_per_sample) { | |||
| case 8: av_log(avctx, AV_LOG_ERROR, "FAADlib unsupported bps %d\n", avctx->bits_per_sample); break; | |||
| default: | |||
| case 16: | |||
| #ifdef FAAD2_VERSION | |||
| faac_cfg->outputFormat = FAAD_FMT_16BIT; | |||
| faac_cfg->outputFormat = FAAD_FMT_16BIT; | |||
| #endif | |||
| s->sample_size = 2; | |||
| break; | |||
| case 24: | |||
| s->sample_size = 2; | |||
| break; | |||
| case 24: | |||
| #ifdef FAAD2_VERSION | |||
| faac_cfg->outputFormat = FAAD_FMT_24BIT; | |||
| faac_cfg->outputFormat = FAAD_FMT_24BIT; | |||
| #endif | |||
| s->sample_size = 3; | |||
| break; | |||
| case 32: | |||
| s->sample_size = 3; | |||
| break; | |||
| case 32: | |||
| #ifdef FAAD2_VERSION | |||
| faac_cfg->outputFormat = FAAD_FMT_32BIT; | |||
| faac_cfg->outputFormat = FAAD_FMT_32BIT; | |||
| #endif | |||
| s->sample_size = 4; | |||
| break; | |||
| } | |||
| s->sample_size = 4; | |||
| break; | |||
| } | |||
| faac_cfg->defSampleRate = (!avctx->sample_rate) ? 44100 : avctx->sample_rate; | |||
| faac_cfg->defObjectType = LC; | |||
| faac_cfg->defSampleRate = (!avctx->sample_rate) ? 44100 : avctx->sample_rate; | |||
| faac_cfg->defObjectType = LC; | |||
| } | |||
| s->faacDecSetConfiguration(s->faac_handle, faac_cfg); | |||
| @@ -204,15 +204,15 @@ void ff_faandct248(DCTELEM * data) | |||
| data[8*6 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1)); | |||
| tmp10 = tmp4 + tmp7; | |||
| tmp11 = tmp5 + tmp6; | |||
| tmp12 = tmp5 - tmp6; | |||
| tmp13 = tmp4 - tmp7; | |||
| tmp11 = tmp5 + tmp6; | |||
| tmp12 = tmp5 - tmp6; | |||
| tmp13 = tmp4 - tmp7; | |||
| data[8*1 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11)); | |||
| data[8*5 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11)); | |||
| data[8*1 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11)); | |||
| data[8*5 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11)); | |||
| z1 = (tmp12 + tmp13)* A1; | |||
| data[8*3 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1)); | |||
| data[8*7 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1)); | |||
| z1 = (tmp12 + tmp13)* A1; | |||
| data[8*3 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1)); | |||
| data[8*7 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1)); | |||
| } | |||
| } | |||
| @@ -64,51 +64,51 @@ void init_fdct() | |||
| void fdct(block) | |||
| short *block; | |||
| { | |||
| register int i, j; | |||
| double s; | |||
| double tmp[64]; | |||
| register int i, j; | |||
| double s; | |||
| double tmp[64]; | |||
| for(i = 0; i < 8; i++) | |||
| for(j = 0; j < 8; j++) | |||
| { | |||
| s = 0.0; | |||
| for(i = 0; i < 8; i++) | |||
| for(j = 0; j < 8; j++) | |||
| { | |||
| s = 0.0; | |||
| /* | |||
| * for(k = 0; k < 8; k++) | |||
| * s += c[j][k] * block[8 * i + k]; | |||
| * for(k = 0; k < 8; k++) | |||
| * s += c[j][k] * block[8 * i + k]; | |||
| */ | |||
| s += c[j][0] * block[8 * i + 0]; | |||
| s += c[j][1] * block[8 * i + 1]; | |||
| s += c[j][2] * block[8 * i + 2]; | |||
| s += c[j][3] * block[8 * i + 3]; | |||
| s += c[j][4] * block[8 * i + 4]; | |||
| s += c[j][5] * block[8 * i + 5]; | |||
| s += c[j][6] * block[8 * i + 6]; | |||
| s += c[j][7] * block[8 * i + 7]; | |||
| tmp[8 * i + j] = s; | |||
| } | |||
| for(j = 0; j < 8; j++) | |||
| for(i = 0; i < 8; i++) | |||
| { | |||
| s = 0.0; | |||
| s += c[j][0] * block[8 * i + 0]; | |||
| s += c[j][1] * block[8 * i + 1]; | |||
| s += c[j][2] * block[8 * i + 2]; | |||
| s += c[j][3] * block[8 * i + 3]; | |||
| s += c[j][4] * block[8 * i + 4]; | |||
| s += c[j][5] * block[8 * i + 5]; | |||
| s += c[j][6] * block[8 * i + 6]; | |||
| s += c[j][7] * block[8 * i + 7]; | |||
| tmp[8 * i + j] = s; | |||
| } | |||
| for(j = 0; j < 8; j++) | |||
| for(i = 0; i < 8; i++) | |||
| { | |||
| s = 0.0; | |||
| /* | |||
| * for(k = 0; k < 8; k++) | |||
| * s += c[i][k] * tmp[8 * k + j]; | |||
| * for(k = 0; k < 8; k++) | |||
| * s += c[i][k] * tmp[8 * k + j]; | |||
| */ | |||
| s += c[i][0] * tmp[8 * 0 + j]; | |||
| s += c[i][1] * tmp[8 * 1 + j]; | |||
| s += c[i][2] * tmp[8 * 2 + j]; | |||
| s += c[i][3] * tmp[8 * 3 + j]; | |||
| s += c[i][4] * tmp[8 * 4 + j]; | |||
| s += c[i][5] * tmp[8 * 5 + j]; | |||
| s += c[i][6] * tmp[8 * 6 + j]; | |||
| s += c[i][7] * tmp[8 * 7 + j]; | |||
| s*=8.0; | |||
| block[8 * i + j] = (short)floor(s + 0.499999); | |||
| s += c[i][0] * tmp[8 * 0 + j]; | |||
| s += c[i][1] * tmp[8 * 1 + j]; | |||
| s += c[i][2] * tmp[8 * 2 + j]; | |||
| s += c[i][3] * tmp[8 * 3 + j]; | |||
| s += c[i][4] * tmp[8 * 4 + j]; | |||
| s += c[i][5] * tmp[8 * 5 + j]; | |||
| s += c[i][6] * tmp[8 * 6 + j]; | |||
| s += c[i][7] * tmp[8 * 7 + j]; | |||
| s*=8.0; | |||
| block[8 * i + j] = (short)floor(s + 0.499999); | |||
| /* | |||
| * reason for adding 0.499999 instead of 0.5: | |||
| * s is quite often x.5 (at least for i and/or j = 0 or 4) | |||
| @@ -149,8 +149,8 @@ int ff_fft_init(FFTContext *s, int nbits, int inverse) | |||
| void ff_fft_calc_c(FFTContext *s, FFTComplex *z) | |||
| { | |||
| int ln = s->nbits; | |||
| int j, np, np2; | |||
| int nblocks, nloops; | |||
| int j, np, np2; | |||
| int nblocks, nloops; | |||
| register FFTComplex *p, *q; | |||
| FFTComplex *exptab = s->exptab; | |||
| int l; | |||
| @@ -31,30 +31,30 @@ | |||
| * instead of simply using 32bit integer arithmetic. | |||
| */ | |||
| typedef struct Float11 { | |||
| int sign; /**< 1bit sign */ | |||
| int exp; /**< 4bit exponent */ | |||
| int mant; /**< 6bit mantissa */ | |||
| int sign; /**< 1bit sign */ | |||
| int exp; /**< 4bit exponent */ | |||
| int mant; /**< 6bit mantissa */ | |||
| } Float11; | |||
| static inline Float11* i2f(int16_t i, Float11* f) | |||
| { | |||
| f->sign = (i < 0); | |||
| if (f->sign) | |||
| i = -i; | |||
| f->exp = av_log2_16bit(i) + !!i; | |||
| f->mant = i? (i<<6) >> f->exp : | |||
| 1<<5; | |||
| return f; | |||
| f->sign = (i < 0); | |||
| if (f->sign) | |||
| i = -i; | |||
| f->exp = av_log2_16bit(i) + !!i; | |||
| f->mant = i? (i<<6) >> f->exp : | |||
| 1<<5; | |||
| return f; | |||
| } | |||
| static inline int16_t mult(Float11* f1, Float11* f2) | |||
| { | |||
| int res, exp; | |||
| int res, exp; | |||
| exp = f1->exp + f2->exp; | |||
| res = (((f1->mant * f2->mant) + 0x30) >> 4) << 7; | |||
| res = exp > 26 ? res << (exp - 26) : res >> (26 - exp); | |||
| return (f1->sign ^ f2->sign) ? -res : res; | |||
| exp = f1->exp + f2->exp; | |||
| res = (((f1->mant * f2->mant) + 0x30) >> 4) << 7; | |||
| res = exp > 26 ? res << (exp - 26) : res >> (26 - exp); | |||
| return (f1->sign ^ f2->sign) ? -res : res; | |||
| } | |||
| static inline int sgn(int value) | |||
| @@ -63,32 +63,32 @@ static inline int sgn(int value) | |||
| } | |||
| typedef struct G726Tables { | |||
| int bits; /**< bits per sample */ | |||
| int* quant; /**< quantization table */ | |||
| int* iquant; /**< inverse quantization table */ | |||
| int* W; /**< special table #1 ;-) */ | |||
| int* F; /**< special table #2 */ | |||
| int bits; /**< bits per sample */ | |||
| int* quant; /**< quantization table */ | |||
| int* iquant; /**< inverse quantization table */ | |||
| int* W; /**< special table #1 ;-) */ | |||
| int* F; /**< special table #2 */ | |||
| } G726Tables; | |||
| typedef struct G726Context { | |||
| G726Tables* tbls; /**< static tables needed for computation */ | |||
| Float11 sr[2]; /**< prev. reconstructed samples */ | |||
| Float11 dq[6]; /**< prev. difference */ | |||
| int a[2]; /**< second order predictor coeffs */ | |||
| int b[6]; /**< sixth order predictor coeffs */ | |||
| int pk[2]; /**< signs of prev. 2 sez + dq */ | |||
| int ap; /**< scale factor control */ | |||
| int yu; /**< fast scale factor */ | |||
| int yl; /**< slow scale factor */ | |||
| int dms; /**< short average magnitude of F[i] */ | |||
| int dml; /**< long average magnitude of F[i] */ | |||
| int td; /**< tone detect */ | |||
| int se; /**< estimated signal for the next iteration */ | |||
| int sez; /**< estimated second order prediction */ | |||
| int y; /**< quantizer scaling factor for the next iteration */ | |||
| G726Tables* tbls; /**< static tables needed for computation */ | |||
| Float11 sr[2]; /**< prev. reconstructed samples */ | |||
| Float11 dq[6]; /**< prev. difference */ | |||
| int a[2]; /**< second order predictor coeffs */ | |||
| int b[6]; /**< sixth order predictor coeffs */ | |||
| int pk[2]; /**< signs of prev. 2 sez + dq */ | |||
| int ap; /**< scale factor control */ | |||
| int yu; /**< fast scale factor */ | |||
| int yl; /**< slow scale factor */ | |||
| int dms; /**< short average magnitude of F[i] */ | |||
| int dml; /**< long average magnitude of F[i] */ | |||
| int td; /**< tone detect */ | |||
| int se; /**< estimated signal for the next iteration */ | |||
| int sez; /**< estimated second order prediction */ | |||
| int y; /**< quantizer scaling factor for the next iteration */ | |||
| } G726Context; | |||
| static int quant_tbl16[] = /**< 16kbit/s 2bits per sample */ | |||
| @@ -113,34 +113,34 @@ static int quant_tbl32[] = /**< 32kbit/s 4bits per sample | |||
| { -125, 79, 177, 245, 299, 348, 399, INT_MAX }; | |||
| static int iquant_tbl32[] = | |||
| { INT_MIN, 4, 135, 213, 273, 323, 373, 425, | |||
| 425, 373, 323, 273, 213, 135, 4, INT_MIN }; | |||
| 425, 373, 323, 273, 213, 135, 4, INT_MIN }; | |||
| static int W_tbl32[] = | |||
| { -12, 18, 41, 64, 112, 198, 355, 1122, | |||
| 1122, 355, 198, 112, 64, 41, 18, -12}; | |||
| 1122, 355, 198, 112, 64, 41, 18, -12}; | |||
| static int F_tbl32[] = | |||
| { 0, 0, 0, 1, 1, 1, 3, 7, 7, 3, 1, 1, 1, 0, 0, 0 }; | |||
| static int quant_tbl40[] = /**< 40kbit/s 5bits per sample */ | |||
| { -122, -16, 67, 138, 197, 249, 297, 338, | |||
| 377, 412, 444, 474, 501, 527, 552, INT_MAX }; | |||
| 377, 412, 444, 474, 501, 527, 552, INT_MAX }; | |||
| static int iquant_tbl40[] = | |||
| { INT_MIN, -66, 28, 104, 169, 224, 274, 318, | |||
| 358, 395, 429, 459, 488, 514, 539, 566, | |||
| 566, 539, 514, 488, 459, 429, 395, 358, | |||
| 318, 274, 224, 169, 104, 28, -66, INT_MIN }; | |||
| 358, 395, 429, 459, 488, 514, 539, 566, | |||
| 566, 539, 514, 488, 459, 429, 395, 358, | |||
| 318, 274, 224, 169, 104, 28, -66, INT_MIN }; | |||
| static int W_tbl40[] = | |||
| { 14, 14, 24, 39, 40, 41, 58, 100, | |||
| 141, 179, 219, 280, 358, 440, 529, 696, | |||
| 696, 529, 440, 358, 280, 219, 179, 141, | |||
| 100, 58, 41, 40, 39, 24, 14, 14 }; | |||
| 141, 179, 219, 280, 358, 440, 529, 696, | |||
| 696, 529, 440, 358, 280, 219, 179, 141, | |||
| 100, 58, 41, 40, 39, 24, 14, 14 }; | |||
| static int F_tbl40[] = | |||
| { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 6, | |||
| 6, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; | |||
| 6, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; | |||
| static G726Tables G726Tables_pool[] = | |||
| {{ 2, quant_tbl16, iquant_tbl16, W_tbl16, F_tbl16 }, | |||
| { 3, quant_tbl24, iquant_tbl24, W_tbl24, F_tbl24 }, | |||
| { 4, quant_tbl32, iquant_tbl32, W_tbl32, F_tbl32 }, | |||
| { 3, quant_tbl24, iquant_tbl24, W_tbl24, F_tbl24 }, | |||
| { 4, quant_tbl32, iquant_tbl32, W_tbl32, F_tbl32 }, | |||
| { 5, quant_tbl40, iquant_tbl40, W_tbl40, F_tbl40 }}; | |||
| @@ -207,20 +207,20 @@ static inline int16_t g726_iterate(G726Context* c, int16_t I) | |||
| dq0 = dq ? sgn(dq) : 0; | |||
| if (tr) { | |||
| c->a[0] = 0; | |||
| c->a[1] = 0; | |||
| c->a[1] = 0; | |||
| for (i=0; i<6; i++) | |||
| c->b[i] = 0; | |||
| c->b[i] = 0; | |||
| } else { | |||
| /* This is a bit crazy, but it really is +255 not +256 */ | |||
| fa1 = clip((-c->a[0]*c->pk[0]*pk0)>>5, -256, 255); | |||
| /* This is a bit crazy, but it really is +255 not +256 */ | |||
| fa1 = clip((-c->a[0]*c->pk[0]*pk0)>>5, -256, 255); | |||
| c->a[1] += 128*pk0*c->pk[1] + fa1 - (c->a[1]>>7); | |||
| c->a[1] = clip(c->a[1], -12288, 12288); | |||
| c->a[1] += 128*pk0*c->pk[1] + fa1 - (c->a[1]>>7); | |||
| c->a[1] = clip(c->a[1], -12288, 12288); | |||
| c->a[0] += 64*3*pk0*c->pk[0] - (c->a[0] >> 8); | |||
| c->a[0] = clip(c->a[0], -(15360 - c->a[1]), 15360 - c->a[1]); | |||
| c->a[0] = clip(c->a[0], -(15360 - c->a[1]), 15360 - c->a[1]); | |||
| for (i=0; i<6; i++) | |||
| c->b[i] += 128*dq0*sgn(-c->dq[i].sign) - (c->b[i]>>8); | |||
| c->b[i] += 128*dq0*sgn(-c->dq[i].sign) - (c->b[i]>>8); | |||
| } | |||
| /* Update Dq and Sr and Pk */ | |||
| @@ -323,13 +323,13 @@ static int g726_init(AVCodecContext * avctx) | |||
| if (avctx->channels != 1 || | |||
| (avctx->bit_rate != 16000 && avctx->bit_rate != 24000 && | |||
| avctx->bit_rate != 32000 && avctx->bit_rate != 40000)) { | |||
| avctx->bit_rate != 32000 && avctx->bit_rate != 40000)) { | |||
| av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n"); | |||
| return -1; | |||
| return -1; | |||
| } | |||
| if (avctx->sample_rate != 8000 && avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL) { | |||
| av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n"); | |||
| return -1; | |||
| return -1; | |||
| } | |||
| g726_reset(&c->c, avctx->bit_rate); | |||
| c->code_size = c->c.tbls->bits; | |||
| @@ -384,12 +384,12 @@ static int g726_decode_frame(AVCodecContext *avctx, | |||
| init_get_bits(&gb, buf, buf_size * 8); | |||
| if (c->bits_left) { | |||
| int s = c->code_size - c->bits_left;; | |||
| code = (c->bit_buffer << s) | get_bits(&gb, s); | |||
| *samples++ = g726_decode(&c->c, code & mask); | |||
| code = (c->bit_buffer << s) | get_bits(&gb, s); | |||
| *samples++ = g726_decode(&c->c, code & mask); | |||
| } | |||
| while (get_bits_count(&gb) + c->code_size <= buf_size*8) | |||
| *samples++ = g726_decode(&c->c, get_bits(&gb, c->code_size) & mask); | |||
| *samples++ = g726_decode(&c->c, get_bits(&gb, c->code_size) & mask); | |||
| c->bits_left = buf_size*8 - get_bits_count(&gb); | |||
| c->bit_buffer = get_bits(&gb, c->bits_left); | |||
| @@ -288,7 +288,7 @@ static inline int get_sr_golomb_flac(GetBitContext *gb, int k, int limit, int es | |||
| * read unsigned golomb rice code (shorten). | |||
| */ | |||
| static inline unsigned int get_ur_golomb_shorten(GetBitContext *gb, int k){ | |||
| return get_ur_golomb_jpegls(gb, k, INT_MAX, 0); | |||
| return get_ur_golomb_jpegls(gb, k, INT_MAX, 0); | |||
| } | |||
| /** | |||
| @@ -395,7 +395,7 @@ static inline void set_te_golomb(PutBitContext *pb, int i, int range){ | |||
| */ | |||
| static inline void set_se_golomb(PutBitContext *pb, int i){ | |||
| // if (i>32767 || i<-32767) | |||
| // av_log(NULL,AV_LOG_ERROR,"value out of range %d\n", i); | |||
| // av_log(NULL,AV_LOG_ERROR,"value out of range %d\n", i); | |||
| #if 0 | |||
| if(i<=0) i= -2*i; | |||
| else i= 2*i-1; | |||
| @@ -231,11 +231,11 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) | |||
| (coded_frame_rate_base * (int64_t)s->avctx->time_base.den); | |||
| put_bits(&s->pb, 8, temp_ref & 0xff); /* TemporalReference */ | |||
| put_bits(&s->pb, 1, 1); /* marker */ | |||
| put_bits(&s->pb, 1, 0); /* h263 id */ | |||
| put_bits(&s->pb, 1, 0); /* split screen off */ | |||
| put_bits(&s->pb, 1, 0); /* camera off */ | |||
| put_bits(&s->pb, 1, 0); /* freeze picture release off */ | |||
| put_bits(&s->pb, 1, 1); /* marker */ | |||
| put_bits(&s->pb, 1, 0); /* h263 id */ | |||
| put_bits(&s->pb, 1, 0); /* split screen off */ | |||
| put_bits(&s->pb, 1, 0); /* camera off */ | |||
| put_bits(&s->pb, 1, 0); /* freeze picture release off */ | |||
| format = h263_get_picture_format(s->width, s->height); | |||
| if (!s->h263_plus) { | |||
| @@ -245,12 +245,12 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) | |||
| /* By now UMV IS DISABLED ON H.263v1, since the restrictions | |||
| of H.263v1 UMV implies to check the predicted MV after | |||
| calculation of the current MB to see if we're on the limits */ | |||
| put_bits(&s->pb, 1, 0); /* Unrestricted Motion Vector: off */ | |||
| put_bits(&s->pb, 1, 0); /* SAC: off */ | |||
| put_bits(&s->pb, 1, s->obmc); /* Advanced Prediction */ | |||
| put_bits(&s->pb, 1, 0); /* only I/P frames, no PB frame */ | |||
| put_bits(&s->pb, 1, 0); /* Unrestricted Motion Vector: off */ | |||
| put_bits(&s->pb, 1, 0); /* SAC: off */ | |||
| put_bits(&s->pb, 1, s->obmc); /* Advanced Prediction */ | |||
| put_bits(&s->pb, 1, 0); /* only I/P frames, no PB frame */ | |||
| put_bits(&s->pb, 5, s->qscale); | |||
| put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */ | |||
| put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */ | |||
| } else { | |||
| int ufep=1; | |||
| /* H.263v2 */ | |||
| @@ -286,9 +286,9 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) | |||
| put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */ | |||
| /* This should be here if PLUSPTYPE */ | |||
| put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */ | |||
| put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */ | |||
| if (format == 7) { | |||
| if (format == 7) { | |||
| /* Custom Picture Format (CPFMT) */ | |||
| aspect_to_info(s, s->avctx->sample_aspect_ratio); | |||
| @@ -299,7 +299,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) | |||
| if (s->aspect_ratio_info == FF_ASPECT_EXTENDED){ | |||
| put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.num); | |||
| put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den); | |||
| } | |||
| } | |||
| } | |||
| if(s->custom_pcf){ | |||
| if(ufep){ | |||
| @@ -320,7 +320,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) | |||
| put_bits(&s->pb, 5, s->qscale); | |||
| } | |||
| put_bits(&s->pb, 1, 0); /* no PEI */ | |||
| put_bits(&s->pb, 1, 0); /* no PEI */ | |||
| if(s->h263_slice_structured){ | |||
| put_bits(&s->pb, 1, 1); | |||
| @@ -823,8 +823,8 @@ static inline void mpeg4_encode_blocks(MpegEncContext * s, DCTELEM block[6][64], | |||
| } | |||
| void mpeg4_encode_mb(MpegEncContext * s, | |||
| DCTELEM block[6][64], | |||
| int motion_x, int motion_y) | |||
| DCTELEM block[6][64], | |||
| int motion_x, int motion_y) | |||
| { | |||
| int cbpc, cbpy, pred_x, pred_y; | |||
| PutBitContext * const pb2 = s->data_partitioning ? &s->pb2 : &s->pb; | |||
| @@ -884,7 +884,7 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||
| return; | |||
| } | |||
| put_bits(&s->pb, 1, 0); /* mb coded modb1=0 */ | |||
| put_bits(&s->pb, 1, 0); /* mb coded modb1=0 */ | |||
| put_bits(&s->pb, 1, cbp ? 0 : 1); /* modb2 */ //FIXME merge | |||
| put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we don't need it :) | |||
| if(cbp) put_bits(&s->pb, 6, cbp); | |||
| @@ -998,7 +998,7 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||
| if(pic==NULL || pic->pict_type!=B_TYPE) break; | |||
| b_pic= pic->data[0] + offset + 16; //FIXME +16 | |||
| diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16); | |||
| diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16); | |||
| if(diff>s->qscale*70){ //FIXME check that 70 is optimal | |||
| s->mb_skipped=0; | |||
| break; | |||
| @@ -1021,7 +1021,7 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||
| } | |||
| } | |||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||
| cbpc = cbp & 3; | |||
| cbpy = cbp >> 2; | |||
| cbpy ^= 0xf; | |||
| @@ -1121,7 +1121,7 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||
| int dc_diff[6]; //dc values with the dc prediction subtracted | |||
| int dir[6]; //prediction direction | |||
| int zigzag_last_index[6]; | |||
| uint8_t *scan_table[6]; | |||
| uint8_t *scan_table[6]; | |||
| int i; | |||
| for(i=0; i<6; i++){ | |||
| @@ -1152,7 +1152,7 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||
| intra_MCBPC_code[cbpc]); | |||
| } else { | |||
| if(s->dquant) cbpc+=8; | |||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||
| put_bits(&s->pb, | |||
| inter_MCBPC_bits[cbpc + 4], | |||
| inter_MCBPC_code[cbpc + 4]); | |||
| @@ -1185,8 +1185,8 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||
| } | |||
| void h263_encode_mb(MpegEncContext * s, | |||
| DCTELEM block[6][64], | |||
| int motion_x, int motion_y) | |||
| DCTELEM block[6][64], | |||
| int motion_x, int motion_y) | |||
| { | |||
| int cbpc, cbpy, i, cbp, pred_x, pred_y; | |||
| int16_t pred_dc; | |||
| @@ -1211,7 +1211,7 @@ void h263_encode_mb(MpegEncContext * s, | |||
| return; | |||
| } | |||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||
| cbpc = cbp & 3; | |||
| cbpy = cbp >> 2; | |||
| @@ -1346,14 +1346,14 @@ void h263_encode_mb(MpegEncContext * s, | |||
| intra_MCBPC_code[cbpc]); | |||
| } else { | |||
| if(s->dquant) cbpc+=8; | |||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||
| put_bits(&s->pb, | |||
| inter_MCBPC_bits[cbpc + 4], | |||
| inter_MCBPC_code[cbpc + 4]); | |||
| } | |||
| if (s->h263_aic) { | |||
| /* XXX: currently, we do not try to use ac prediction */ | |||
| put_bits(&s->pb, 1, 0); /* no AC prediction */ | |||
| put_bits(&s->pb, 1, 0); /* no AC prediction */ | |||
| } | |||
| cbpy = cbp >> 2; | |||
| put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); | |||
| @@ -1796,7 +1796,7 @@ static void init_uni_dc_tab(void) | |||
| v = abs(level); | |||
| while (v) { | |||
| v >>= 1; | |||
| size++; | |||
| size++; | |||
| } | |||
| if (level < 0) | |||
| @@ -2318,14 +2318,14 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n | |||
| put_bits(&s->pb, 16, 0); | |||
| put_bits(&s->pb, 16, 0x120 + vol_number); /* video obj layer */ | |||
| put_bits(&s->pb, 1, 0); /* random access vol */ | |||
| put_bits(&s->pb, 8, s->vo_type); /* video obj type indication */ | |||
| put_bits(&s->pb, 1, 0); /* random access vol */ | |||
| put_bits(&s->pb, 8, s->vo_type); /* video obj type indication */ | |||
| if(s->workaround_bugs & FF_BUG_MS) { | |||
| put_bits(&s->pb, 1, 0); /* is obj layer id= no */ | |||
| put_bits(&s->pb, 1, 0); /* is obj layer id= no */ | |||
| } else { | |||
| put_bits(&s->pb, 1, 1); /* is obj layer id= yes */ | |||
| put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */ | |||
| put_bits(&s->pb, 3, 1); /* is obj layer priority */ | |||
| put_bits(&s->pb, 1, 1); /* is obj layer id= yes */ | |||
| put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */ | |||
| put_bits(&s->pb, 3, 1); /* is obj layer priority */ | |||
| } | |||
| aspect_to_info(s, s->avctx->sample_aspect_ratio); | |||
| @@ -2337,37 +2337,37 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n | |||
| } | |||
| if(s->workaround_bugs & FF_BUG_MS) { // | |||
| put_bits(&s->pb, 1, 0); /* vol control parameters= no @@@ */ | |||
| put_bits(&s->pb, 1, 0); /* vol control parameters= no @@@ */ | |||
| } else { | |||
| put_bits(&s->pb, 1, 1); /* vol control parameters= yes */ | |||
| put_bits(&s->pb, 2, 1); /* chroma format YUV 420/YV12 */ | |||
| put_bits(&s->pb, 1, 1); /* vol control parameters= yes */ | |||
| put_bits(&s->pb, 2, 1); /* chroma format YUV 420/YV12 */ | |||
| put_bits(&s->pb, 1, s->low_delay); | |||
| put_bits(&s->pb, 1, 0); /* vbv parameters= no */ | |||
| put_bits(&s->pb, 1, 0); /* vbv parameters= no */ | |||
| } | |||
| put_bits(&s->pb, 2, RECT_SHAPE); /* vol shape= rectangle */ | |||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||
| put_bits(&s->pb, 2, RECT_SHAPE); /* vol shape= rectangle */ | |||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||
| put_bits(&s->pb, 16, s->avctx->time_base.den); | |||
| if (s->time_increment_bits < 1) | |||
| s->time_increment_bits = 1; | |||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||
| put_bits(&s->pb, 1, 0); /* fixed vop rate=no */ | |||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||
| put_bits(&s->pb, 13, s->width); /* vol width */ | |||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||
| put_bits(&s->pb, 13, s->height); /* vol height */ | |||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||
| put_bits(&s->pb, 1, 0); /* fixed vop rate=no */ | |||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||
| put_bits(&s->pb, 13, s->width); /* vol width */ | |||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||
| put_bits(&s->pb, 13, s->height); /* vol height */ | |||
| put_bits(&s->pb, 1, 1); /* marker bit */ | |||
| put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1); | |||
| put_bits(&s->pb, 1, 1); /* obmc disable */ | |||
| put_bits(&s->pb, 1, 1); /* obmc disable */ | |||
| if (vo_ver_id == 1) { | |||
| put_bits(&s->pb, 1, s->vol_sprite_usage); /* sprite enable */ | |||
| put_bits(&s->pb, 1, s->vol_sprite_usage); /* sprite enable */ | |||
| }else{ | |||
| put_bits(&s->pb, 2, s->vol_sprite_usage); /* sprite enable */ | |||
| put_bits(&s->pb, 2, s->vol_sprite_usage); /* sprite enable */ | |||
| } | |||
| put_bits(&s->pb, 1, 0); /* not 8 bit == false */ | |||
| put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/ | |||
| put_bits(&s->pb, 1, 0); /* not 8 bit == false */ | |||
| put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/ | |||
| if(s->mpeg_quant){ | |||
| ff_write_quant_matrix(&s->pb, s->avctx->intra_matrix); | |||
| @@ -2376,27 +2376,27 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n | |||
| if (vo_ver_id != 1) | |||
| put_bits(&s->pb, 1, s->quarter_sample); | |||
| put_bits(&s->pb, 1, 1); /* complexity estimation disable */ | |||
| put_bits(&s->pb, 1, 1); /* complexity estimation disable */ | |||
| s->resync_marker= s->rtp_mode; | |||
| put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */ | |||
| put_bits(&s->pb, 1, s->data_partitioning ? 1 : 0); | |||
| if(s->data_partitioning){ | |||
| put_bits(&s->pb, 1, 0); /* no rvlc */ | |||
| put_bits(&s->pb, 1, 0); /* no rvlc */ | |||
| } | |||
| if (vo_ver_id != 1){ | |||
| put_bits(&s->pb, 1, 0); /* newpred */ | |||
| put_bits(&s->pb, 1, 0); /* reduced res vop */ | |||
| put_bits(&s->pb, 1, 0); /* newpred */ | |||
| put_bits(&s->pb, 1, 0); /* reduced res vop */ | |||
| } | |||
| put_bits(&s->pb, 1, 0); /* scalability */ | |||
| put_bits(&s->pb, 1, 0); /* scalability */ | |||
| ff_mpeg4_stuffing(&s->pb); | |||
| /* user data */ | |||
| if(!(s->flags & CODEC_FLAG_BITEXACT)){ | |||
| put_bits(&s->pb, 16, 0); | |||
| put_bits(&s->pb, 16, 0x1B2); /* user_data */ | |||
| ff_put_string(&s->pb, LIBAVCODEC_IDENT, 0); | |||
| put_bits(&s->pb, 16, 0x1B2); /* user_data */ | |||
| ff_put_string(&s->pb, LIBAVCODEC_IDENT, 0); | |||
| } | |||
| } | |||
| @@ -2421,9 +2421,9 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) | |||
| //printf("num:%d rate:%d base:%d\n", s->picture_number, s->time_base.den, FRAME_RATE_BASE); | |||
| put_bits(&s->pb, 16, 0); /* vop header */ | |||
| put_bits(&s->pb, 16, VOP_STARTCODE); /* vop header */ | |||
| put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */ | |||
| put_bits(&s->pb, 16, 0); /* vop header */ | |||
| put_bits(&s->pb, 16, VOP_STARTCODE); /* vop header */ | |||
| put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */ | |||
| assert(s->time>=0); | |||
| time_div= s->time/s->avctx->time_base.den; | |||
| @@ -2435,15 +2435,15 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) | |||
| put_bits(&s->pb, 1, 0); | |||
| put_bits(&s->pb, 1, 1); /* marker */ | |||
| put_bits(&s->pb, s->time_increment_bits, time_mod); /* time increment */ | |||
| put_bits(&s->pb, 1, 1); /* marker */ | |||
| put_bits(&s->pb, 1, 1); /* vop coded */ | |||
| put_bits(&s->pb, 1, 1); /* marker */ | |||
| put_bits(&s->pb, s->time_increment_bits, time_mod); /* time increment */ | |||
| put_bits(&s->pb, 1, 1); /* marker */ | |||
| put_bits(&s->pb, 1, 1); /* vop coded */ | |||
| if ( s->pict_type == P_TYPE | |||
| || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) { | |||
| put_bits(&s->pb, 1, s->no_rounding); /* rounding type */ | |||
| put_bits(&s->pb, 1, s->no_rounding); /* rounding type */ | |||
| } | |||
| put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */ | |||
| put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */ | |||
| if(!s->progressive_sequence){ | |||
| put_bits(&s->pb, 1, s->current_picture_ptr->top_field_first); | |||
| put_bits(&s->pb, 1, s->alternate_scan); | |||
| @@ -2453,9 +2453,9 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) | |||
| put_bits(&s->pb, 5, s->qscale); | |||
| if (s->pict_type != I_TYPE) | |||
| put_bits(&s->pb, 3, s->f_code); /* fcode_for */ | |||
| put_bits(&s->pb, 3, s->f_code); /* fcode_for */ | |||
| if (s->pict_type == B_TYPE) | |||
| put_bits(&s->pb, 3, s->b_code); /* fcode_back */ | |||
| put_bits(&s->pb, 3, s->b_code); /* fcode_back */ | |||
| // printf("****frame %d\n", picture_number); | |||
| } | |||
| @@ -2492,9 +2492,9 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *di | |||
| /* find prediction */ | |||
| if (n < 4) { | |||
| scale = s->y_dc_scale; | |||
| scale = s->y_dc_scale; | |||
| } else { | |||
| scale = s->c_dc_scale; | |||
| scale = s->c_dc_scale; | |||
| } | |||
| if(IS_3IV1) | |||
| scale= 8; | |||
| @@ -2520,10 +2520,10 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *di | |||
| } | |||
| if (abs(a - b) < abs(b - c)) { | |||
| pred = c; | |||
| pred = c; | |||
| *dir_ptr = 1; /* top */ | |||
| } else { | |||
| pred = a; | |||
| pred = a; | |||
| *dir_ptr = 0; /* left */ | |||
| } | |||
| /* we assume pred is positive */ | |||
| @@ -2629,11 +2629,11 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n) | |||
| // if(level<-255 || level>255) printf("dc overflow\n"); | |||
| level+=256; | |||
| if (n < 4) { | |||
| /* luminance */ | |||
| put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]); | |||
| /* luminance */ | |||
| put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]); | |||
| } else { | |||
| /* chrominance */ | |||
| put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]); | |||
| /* chrominance */ | |||
| put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]); | |||
| } | |||
| #else | |||
| int size, v; | |||
| @@ -2641,25 +2641,25 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n) | |||
| size = 0; | |||
| v = abs(level); | |||
| while (v) { | |||
| v >>= 1; | |||
| size++; | |||
| v >>= 1; | |||
| size++; | |||
| } | |||
| if (n < 4) { | |||
| /* luminance */ | |||
| put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]); | |||
| /* luminance */ | |||
| put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]); | |||
| } else { | |||
| /* chrominance */ | |||
| put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]); | |||
| /* chrominance */ | |||
| put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]); | |||
| } | |||
| /* encode remaining bits */ | |||
| if (size > 0) { | |||
| if (level < 0) | |||
| level = (-level) ^ ((1 << size) - 1); | |||
| put_bits(&s->pb, size, level); | |||
| if (size > 8) | |||
| put_bits(&s->pb, 1, 1); | |||
| if (level < 0) | |||
| level = (-level) ^ ((1 << size) - 1); | |||
| put_bits(&s->pb, size, level); | |||
| if (size > 8) | |||
| put_bits(&s->pb, 1, 1); | |||
| } | |||
| #endif | |||
| } | |||
| @@ -2689,16 +2689,16 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n | |||
| const int last_index = s->block_last_index[n]; | |||
| if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away | |||
| /* mpeg4 based DC predictor */ | |||
| mpeg4_encode_dc(dc_pb, intra_dc, n); | |||
| /* mpeg4 based DC predictor */ | |||
| mpeg4_encode_dc(dc_pb, intra_dc, n); | |||
| if(last_index<1) return; | |||
| i = 1; | |||
| i = 1; | |||
| rl = &rl_intra; | |||
| bits_tab= uni_mpeg4_intra_rl_bits; | |||
| len_tab = uni_mpeg4_intra_rl_len; | |||
| } else { | |||
| if(last_index<0) return; | |||
| i = 0; | |||
| i = 0; | |||
| rl = &rl_inter; | |||
| bits_tab= uni_mpeg4_inter_rl_bits; | |||
| len_tab = uni_mpeg4_inter_rl_len; | |||
| @@ -2708,9 +2708,9 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n | |||
| last_non_zero = i - 1; | |||
| #if 1 | |||
| for (; i < last_index; i++) { | |||
| int level = block[ scan_table[i] ]; | |||
| if (level) { | |||
| int run = i - last_non_zero - 1; | |||
| int level = block[ scan_table[i] ]; | |||
| if (level) { | |||
| int run = i - last_non_zero - 1; | |||
| level+=64; | |||
| if((level&(~127)) == 0){ | |||
| const int index= UNI_MPEG4_ENC_INDEX(0, run, level); | |||
| @@ -2718,11 +2718,11 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n | |||
| }else{ //ESC3 | |||
| put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(0<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1); | |||
| } | |||
| last_non_zero = i; | |||
| } | |||
| last_non_zero = i; | |||
| } | |||
| } | |||
| /*if(i<=last_index)*/{ | |||
| int level = block[ scan_table[i] ]; | |||
| int level = block[ scan_table[i] ]; | |||
| int run = i - last_non_zero - 1; | |||
| level+=64; | |||
| if((level&(~127)) == 0){ | |||
| @@ -2734,17 +2734,17 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n | |||
| } | |||
| #else | |||
| for (; i <= last_index; i++) { | |||
| const int slevel = block[ scan_table[i] ]; | |||
| if (slevel) { | |||
| const int slevel = block[ scan_table[i] ]; | |||
| if (slevel) { | |||
| int level; | |||
| int run = i - last_non_zero - 1; | |||
| last = (i == last_index); | |||
| sign = 0; | |||
| level = slevel; | |||
| if (level < 0) { | |||
| sign = 1; | |||
| level = -level; | |||
| } | |||
| int run = i - last_non_zero - 1; | |||
| last = (i == last_index); | |||
| sign = 0; | |||
| level = slevel; | |||
| if (level < 0) { | |||
| sign = 1; | |||
| level = -level; | |||
| } | |||
| code = get_rl_index(rl, last, run, level); | |||
| put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); | |||
| if (code == rl->n) { | |||
| @@ -2786,8 +2786,8 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n | |||
| } else { | |||
| put_bits(ac_pb, 1, sign); | |||
| } | |||
| last_non_zero = i; | |||
| } | |||
| last_non_zero = i; | |||
| } | |||
| } | |||
| #endif | |||
| } | |||
| @@ -2802,15 +2802,15 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in | |||
| int len=0; | |||
| if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away | |||
| /* mpeg4 based DC predictor */ | |||
| len += mpeg4_get_dc_length(intra_dc, n); | |||
| /* mpeg4 based DC predictor */ | |||
| len += mpeg4_get_dc_length(intra_dc, n); | |||
| if(last_index<1) return len; | |||
| i = 1; | |||
| i = 1; | |||
| rl = &rl_intra; | |||
| len_tab = uni_mpeg4_intra_rl_len; | |||
| } else { | |||
| if(last_index<0) return 0; | |||
| i = 0; | |||
| i = 0; | |||
| rl = &rl_inter; | |||
| len_tab = uni_mpeg4_inter_rl_len; | |||
| } | |||
| @@ -2818,9 +2818,9 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in | |||
| /* AC coefs */ | |||
| last_non_zero = i - 1; | |||
| for (; i < last_index; i++) { | |||
| int level = block[ scan_table[i] ]; | |||
| if (level) { | |||
| int run = i - last_non_zero - 1; | |||
| int level = block[ scan_table[i] ]; | |||
| if (level) { | |||
| int run = i - last_non_zero - 1; | |||
| level+=64; | |||
| if((level&(~127)) == 0){ | |||
| const int index= UNI_MPEG4_ENC_INDEX(0, run, level); | |||
| @@ -2828,11 +2828,11 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in | |||
| }else{ //ESC3 | |||
| len += 7+2+1+6+1+12+1; | |||
| } | |||
| last_non_zero = i; | |||
| } | |||
| last_non_zero = i; | |||
| } | |||
| } | |||
| /*if(i<=last_index)*/{ | |||
| int level = block[ scan_table[i] ]; | |||
| int level = block[ scan_table[i] ]; | |||
| int run = i - last_non_zero - 1; | |||
| level+=64; | |||
| if((level&(~127)) == 0){ | |||
| @@ -3251,7 +3251,7 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s) | |||
| //FIXME reduced res stuff here | |||
| if (s->pict_type != I_TYPE) { | |||
| int f_code = get_bits(&s->gb, 3); /* fcode_for */ | |||
| int f_code = get_bits(&s->gb, 3); /* fcode_for */ | |||
| if(f_code==0){ | |||
| av_log(s->avctx, AV_LOG_ERROR, "Error, video packet header damaged (f_code=0)\n"); | |||
| } | |||
| @@ -4741,7 +4741,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||
| if(intra) { | |||
| if(s->qscale < s->intra_dc_threshold){ | |||
| /* DC coef */ | |||
| /* DC coef */ | |||
| if(s->partitioned_frame){ | |||
| level = s->dc_val[0][ s->block_index[n] ]; | |||
| if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale); | |||
| @@ -4898,7 +4898,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||
| } | |||
| } | |||
| #endif | |||
| if (level>0) level= level * qmul + qadd; | |||
| if (level>0) level= level * qmul + qadd; | |||
| else level= level * qmul - qadd; | |||
| if((unsigned)(level + 2048) > 4095){ | |||
| @@ -5014,18 +5014,18 @@ int h263_decode_picture_header(MpegEncContext *s) | |||
| } | |||
| if (get_bits1(&s->gb) != 0) { | |||
| av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n"); | |||
| return -1; /* h263 id */ | |||
| return -1; /* h263 id */ | |||
| } | |||
| skip_bits1(&s->gb); /* split screen off */ | |||
| skip_bits1(&s->gb); /* camera off */ | |||
| skip_bits1(&s->gb); /* freeze picture release off */ | |||
| skip_bits1(&s->gb); /* split screen off */ | |||
| skip_bits1(&s->gb); /* camera off */ | |||
| skip_bits1(&s->gb); /* freeze picture release off */ | |||
| format = get_bits(&s->gb, 3); | |||
| /* | |||
| 0 forbidden | |||
| 1 sub-QCIF | |||
| 10 QCIF | |||
| 7 extended PTYPE (PLUSPTYPE) | |||
| 7 extended PTYPE (PLUSPTYPE) | |||
| */ | |||
| if (format != 7 && format != 6) { | |||
| @@ -5042,17 +5042,17 @@ int h263_decode_picture_header(MpegEncContext *s) | |||
| if (get_bits1(&s->gb) != 0) { | |||
| av_log(s->avctx, AV_LOG_ERROR, "H263 SAC not supported\n"); | |||
| return -1; /* SAC: off */ | |||
| return -1; /* SAC: off */ | |||
| } | |||
| s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */ | |||
| s->unrestricted_mv = s->h263_long_vectors || s->obmc; | |||
| if (get_bits1(&s->gb) != 0) { | |||
| av_log(s->avctx, AV_LOG_ERROR, "H263 PB frame not supported\n"); | |||
| return -1; /* not PB frame */ | |||
| return -1; /* not PB frame */ | |||
| } | |||
| s->chroma_qscale= s->qscale = get_bits(&s->gb, 5); | |||
| skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */ | |||
| skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */ | |||
| s->width = width; | |||
| s->height = height; | |||
| @@ -5511,17 +5511,17 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ | |||
| } | |||
| s->low_delay= get_bits1(gb); | |||
| if(get_bits1(gb)){ /* vbv parameters */ | |||
| get_bits(gb, 15); /* first_half_bitrate */ | |||
| skip_bits1(gb); /* marker */ | |||
| get_bits(gb, 15); /* latter_half_bitrate */ | |||
| skip_bits1(gb); /* marker */ | |||
| get_bits(gb, 15); /* first_half_vbv_buffer_size */ | |||
| skip_bits1(gb); /* marker */ | |||
| get_bits(gb, 3); /* latter_half_vbv_buffer_size */ | |||
| get_bits(gb, 11); /* first_half_vbv_occupancy */ | |||
| skip_bits1(gb); /* marker */ | |||
| get_bits(gb, 15); /* latter_half_vbv_occupancy */ | |||
| skip_bits1(gb); /* marker */ | |||
| get_bits(gb, 15); /* first_half_bitrate */ | |||
| skip_bits1(gb); /* marker */ | |||
| get_bits(gb, 15); /* latter_half_bitrate */ | |||
| skip_bits1(gb); /* marker */ | |||
| get_bits(gb, 15); /* first_half_vbv_buffer_size */ | |||
| skip_bits1(gb); /* marker */ | |||
| get_bits(gb, 3); /* latter_half_vbv_buffer_size */ | |||
| get_bits(gb, 11); /* first_half_vbv_occupancy */ | |||
| skip_bits1(gb); /* marker */ | |||
| get_bits(gb, 15); /* latter_half_vbv_occupancy */ | |||
| skip_bits1(gb); /* marker */ | |||
| } | |||
| }else{ | |||
| // set low delay flag only once the smartest? low delay detection won't be overriden | |||
| @@ -5628,7 +5628,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ | |||
| /* load custom intra matrix */ | |||
| if(get_bits1(gb)){ | |||
| int last=0; | |||
| for(i=0; i<64; i++){ | |||
| for(i=0; i<64; i++){ | |||
| int j; | |||
| v= get_bits(gb, 8); | |||
| if(v==0) break; | |||
| @@ -5641,7 +5641,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ | |||
| /* replicate last value */ | |||
| for(; i<64; i++){ | |||
| int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; | |||
| int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; | |||
| s->intra_matrix[j]= last; | |||
| s->chroma_intra_matrix[j]= last; | |||
| } | |||
| @@ -5650,7 +5650,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ | |||
| /* load custom non intra matrix */ | |||
| if(get_bits1(gb)){ | |||
| int last=0; | |||
| for(i=0; i<64; i++){ | |||
| for(i=0; i<64; i++){ | |||
| int j; | |||
| v= get_bits(gb, 8); | |||
| if(v==0) break; | |||
| @@ -5663,7 +5663,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ | |||
| /* replicate last value */ | |||
| for(; i<64; i++){ | |||
| int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; | |||
| int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; | |||
| s->inter_matrix[j]= last; | |||
| s->chroma_inter_matrix[j]= last; | |||
| } | |||
| @@ -5794,7 +5794,7 @@ static int decode_user_data(MpegEncContext *s, GetBitContext *gb){ | |||
| static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){ | |||
| int time_incr, time_increment; | |||
| s->pict_type = get_bits(gb, 2) + I_TYPE; /* pict type: I = 0 , P = 1 */ | |||
| s->pict_type = get_bits(gb, 2) + I_TYPE; /* pict type: I = 0 , P = 1 */ | |||
| if(s->pict_type==B_TYPE && s->low_delay && s->vol_control_parameters==0 && !(s->flags & CODEC_FLAG_LOW_DELAY)){ | |||
| av_log(s->avctx, AV_LOG_ERROR, "low_delay flag incorrectly, clearing it\n"); | |||
| s->low_delay=0; | |||
| @@ -5877,9 +5877,9 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){ | |||
| if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == P_TYPE | |||
| || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) { | |||
| /* rounding type for motion estimation */ | |||
| s->no_rounding = get_bits1(gb); | |||
| s->no_rounding = get_bits1(gb); | |||
| } else { | |||
| s->no_rounding = 0; | |||
| s->no_rounding = 0; | |||
| } | |||
| //FIXME reduced res stuff | |||
| @@ -5938,7 +5938,7 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){ | |||
| } | |||
| if (s->pict_type != I_TYPE) { | |||
| s->f_code = get_bits(gb, 3); /* fcode_for */ | |||
| s->f_code = get_bits(gb, 3); /* fcode_for */ | |||
| if(s->f_code==0){ | |||
| av_log(s->avctx, AV_LOG_ERROR, "Error, header damaged or not MPEG4 header (f_code=0)\n"); | |||
| return -1; // makes no sense to continue, as the MV decoding will break very quickly | |||
| @@ -6094,15 +6094,15 @@ int intel_h263_decode_picture_header(MpegEncContext *s) | |||
| if (get_bits1(&s->gb) != 1) { | |||
| av_log(s->avctx, AV_LOG_ERROR, "Bad marker\n"); | |||
| return -1; /* marker */ | |||
| return -1; /* marker */ | |||
| } | |||
| if (get_bits1(&s->gb) != 0) { | |||
| av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n"); | |||
| return -1; /* h263 id */ | |||
| return -1; /* h263 id */ | |||
| } | |||
| skip_bits1(&s->gb); /* split screen off */ | |||
| skip_bits1(&s->gb); /* camera off */ | |||
| skip_bits1(&s->gb); /* freeze picture release off */ | |||
| skip_bits1(&s->gb); /* split screen off */ | |||
| skip_bits1(&s->gb); /* camera off */ | |||
| skip_bits1(&s->gb); /* freeze picture release off */ | |||
| format = get_bits(&s->gb, 3); | |||
| if (format != 7) { | |||
| @@ -6118,23 +6118,23 @@ int intel_h263_decode_picture_header(MpegEncContext *s) | |||
| if (get_bits1(&s->gb) != 0) { | |||
| av_log(s->avctx, AV_LOG_ERROR, "SAC not supported\n"); | |||
| return -1; /* SAC: off */ | |||
| return -1; /* SAC: off */ | |||
| } | |||
| if (get_bits1(&s->gb) != 0) { | |||
| s->obmc= 1; | |||
| av_log(s->avctx, AV_LOG_ERROR, "Advanced Prediction Mode not supported\n"); | |||
| // return -1; /* advanced prediction mode: off */ | |||
| // return -1; /* advanced prediction mode: off */ | |||
| } | |||
| if (get_bits1(&s->gb) != 0) { | |||
| av_log(s->avctx, AV_LOG_ERROR, "PB frame mode no supported\n"); | |||
| return -1; /* PB frame mode */ | |||
| return -1; /* PB frame mode */ | |||
| } | |||
| /* skip unknown header garbage */ | |||
| skip_bits(&s->gb, 41); | |||
| s->chroma_qscale= s->qscale = get_bits(&s->gb, 5); | |||
| skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */ | |||
| skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */ | |||
| /* PEI */ | |||
| while (get_bits1(&s->gb) != 0) { | |||
| @@ -6208,7 +6208,7 @@ int flv_h263_decode_picture_header(MpegEncContext *s) | |||
| if (s->dropable) | |||
| s->pict_type = P_TYPE; | |||
| skip_bits1(&s->gb); /* deblocking flag */ | |||
| skip_bits1(&s->gb); /* deblocking flag */ | |||
| s->chroma_qscale= s->qscale = get_bits(&s->gb, 5); | |||
| s->h263_plus = 0; | |||
| @@ -147,15 +147,15 @@ typedef struct H264Context{ | |||
| MpegEncContext s; | |||
| int nal_ref_idc; | |||
| int nal_unit_type; | |||
| #define NAL_SLICE 1 | |||
| #define NAL_DPA 2 | |||
| #define NAL_DPB 3 | |||
| #define NAL_DPC 4 | |||
| #define NAL_IDR_SLICE 5 | |||
| #define NAL_SEI 6 | |||
| #define NAL_SPS 7 | |||
| #define NAL_PPS 8 | |||
| #define NAL_AUD 9 | |||
| #define NAL_SLICE 1 | |||
| #define NAL_DPA 2 | |||
| #define NAL_DPB 3 | |||
| #define NAL_DPC 4 | |||
| #define NAL_IDR_SLICE 5 | |||
| #define NAL_SEI 6 | |||
| #define NAL_SPS 7 | |||
| #define NAL_PPS 8 | |||
| #define NAL_AUD 9 | |||
| #define NAL_END_SEQUENCE 10 | |||
| #define NAL_END_STREAM 11 | |||
| #define NAL_FILLER_DATA 12 | |||
| @@ -1461,7 +1461,7 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c | |||
| int i, si, di; | |||
| uint8_t *dst; | |||
| // src[0]&0x80; //forbidden bit | |||
| // src[0]&0x80; //forbidden bit | |||
| h->nal_ref_idc= src[0]>>5; | |||
| h->nal_unit_type= src[0]&0x1F; | |||
| @@ -7545,8 +7545,8 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ | |||
| case NAL_SPS_EXT: | |||
| case NAL_AUXILIARY_SLICE: | |||
| break; | |||
| default: | |||
| av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type); | |||
| default: | |||
| av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type); | |||
| } | |||
| } | |||
| @@ -15,7 +15,7 @@ | |||
| /* ebx saving is necessary for PIC. gcc seems unable to see it alone */ | |||
| #define cpuid(index,eax,ebx,ecx,edx)\ | |||
| __asm __volatile\ | |||
| ("mov %%"REG_b", %%"REG_S"\n\t"\ | |||
| ("mov %%"REG_b", %%"REG_S"\n\t"\ | |||
| "cpuid\n\t"\ | |||
| "xchg %%"REG_b", %%"REG_S\ | |||
| : "=a" (eax), "=S" (ebx),\ | |||
| @@ -89,8 +89,8 @@ int mm_support(void) | |||
| edx == 0x48727561 && | |||
| ecx == 0x736c7561) { /* "CentaurHauls" */ | |||
| /* VIA C3 */ | |||
| if(ext_caps & (1<<24)) | |||
| rval |= MM_MMXEXT; | |||
| if(ext_caps & (1<<24)) | |||
| rval |= MM_MMXEXT; | |||
| } else if (ebx == 0x69727943 && | |||
| edx == 0x736e4978 && | |||
| ecx == 0x64616574) { | |||
| @@ -27,206 +27,206 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line | |||
| { | |||
| MOVQ_BFE(mm6); | |||
| __asm __volatile( | |||
| "lea (%3, %3), %%"REG_a" \n\t" | |||
| ".balign 8 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 1(%1), %%mm1 \n\t" | |||
| "movq (%1, %3), %%mm2 \n\t" | |||
| "movq 1(%1, %3), %%mm3 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%2) \n\t" | |||
| "movq %%mm5, (%2, %3) \n\t" | |||
| "add %%"REG_a", %1 \n\t" | |||
| "add %%"REG_a", %2 \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 1(%1), %%mm1 \n\t" | |||
| "movq (%1, %3), %%mm2 \n\t" | |||
| "movq 1(%1, %3), %%mm3 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%2) \n\t" | |||
| "movq %%mm5, (%2, %3) \n\t" | |||
| "add %%"REG_a", %1 \n\t" | |||
| "add %%"REG_a", %2 \n\t" | |||
| "subl $4, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| :"+g"(h), "+S"(pixels), "+D"(block) | |||
| :"r"((long)line_size) | |||
| :REG_a, "memory"); | |||
| "lea (%3, %3), %%"REG_a" \n\t" | |||
| ".balign 8 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 1(%1), %%mm1 \n\t" | |||
| "movq (%1, %3), %%mm2 \n\t" | |||
| "movq 1(%1, %3), %%mm3 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%2) \n\t" | |||
| "movq %%mm5, (%2, %3) \n\t" | |||
| "add %%"REG_a", %1 \n\t" | |||
| "add %%"REG_a", %2 \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 1(%1), %%mm1 \n\t" | |||
| "movq (%1, %3), %%mm2 \n\t" | |||
| "movq 1(%1, %3), %%mm3 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%2) \n\t" | |||
| "movq %%mm5, (%2, %3) \n\t" | |||
| "add %%"REG_a", %1 \n\t" | |||
| "add %%"REG_a", %2 \n\t" | |||
| "subl $4, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| :"+g"(h), "+S"(pixels), "+D"(block) | |||
| :"r"((long)line_size) | |||
| :REG_a, "memory"); | |||
| } | |||
| static void attribute_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | |||
| { | |||
| MOVQ_BFE(mm6); | |||
| __asm __volatile( | |||
| "testl $1, %0 \n\t" | |||
| " jz 1f \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%2), %%mm1 \n\t" | |||
| "add %4, %1 \n\t" | |||
| "add $8, %2 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm4, %%mm6) | |||
| "movq %%mm4, (%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "decl %0 \n\t" | |||
| ".balign 8 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%2), %%mm1 \n\t" | |||
| "add %4, %1 \n\t" | |||
| "movq (%1), %%mm2 \n\t" | |||
| "movq 8(%2), %%mm3 \n\t" | |||
| "add %4, %1 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "movq %%mm5, (%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 16(%2), %%mm1 \n\t" | |||
| "add %4, %1 \n\t" | |||
| "movq (%1), %%mm2 \n\t" | |||
| "movq 24(%2), %%mm3 \n\t" | |||
| "add %4, %1 \n\t" | |||
| "add $32, %2 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "movq %%mm5, (%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "subl $4, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| "testl $1, %0 \n\t" | |||
| " jz 1f \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%2), %%mm1 \n\t" | |||
| "add %4, %1 \n\t" | |||
| "add $8, %2 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm4, %%mm6) | |||
| "movq %%mm4, (%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "decl %0 \n\t" | |||
| ".balign 8 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%2), %%mm1 \n\t" | |||
| "add %4, %1 \n\t" | |||
| "movq (%1), %%mm2 \n\t" | |||
| "movq 8(%2), %%mm3 \n\t" | |||
| "add %4, %1 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "movq %%mm5, (%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 16(%2), %%mm1 \n\t" | |||
| "add %4, %1 \n\t" | |||
| "movq (%1), %%mm2 \n\t" | |||
| "movq 24(%2), %%mm3 \n\t" | |||
| "add %4, %1 \n\t" | |||
| "add $32, %2 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "movq %%mm5, (%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "subl $4, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used | |||
| :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |||
| #else | |||
| :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |||
| #endif | |||
| :"S"((long)src1Stride), "D"((long)dstStride) | |||
| :"memory"); | |||
| :"S"((long)src1Stride), "D"((long)dstStride) | |||
| :"memory"); | |||
| } | |||
| static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| { | |||
| MOVQ_BFE(mm6); | |||
| __asm __volatile( | |||
| "lea (%3, %3), %%"REG_a" \n\t" | |||
| ".balign 8 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 1(%1), %%mm1 \n\t" | |||
| "movq (%1, %3), %%mm2 \n\t" | |||
| "movq 1(%1, %3), %%mm3 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%2) \n\t" | |||
| "movq %%mm5, (%2, %3) \n\t" | |||
| "movq 8(%1), %%mm0 \n\t" | |||
| "movq 9(%1), %%mm1 \n\t" | |||
| "movq 8(%1, %3), %%mm2 \n\t" | |||
| "movq 9(%1, %3), %%mm3 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, 8(%2) \n\t" | |||
| "movq %%mm5, 8(%2, %3) \n\t" | |||
| "add %%"REG_a", %1 \n\t" | |||
| "add %%"REG_a", %2 \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 1(%1), %%mm1 \n\t" | |||
| "movq (%1, %3), %%mm2 \n\t" | |||
| "movq 1(%1, %3), %%mm3 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%2) \n\t" | |||
| "movq %%mm5, (%2, %3) \n\t" | |||
| "movq 8(%1), %%mm0 \n\t" | |||
| "movq 9(%1), %%mm1 \n\t" | |||
| "movq 8(%1, %3), %%mm2 \n\t" | |||
| "movq 9(%1, %3), %%mm3 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, 8(%2) \n\t" | |||
| "movq %%mm5, 8(%2, %3) \n\t" | |||
| "add %%"REG_a", %1 \n\t" | |||
| "add %%"REG_a", %2 \n\t" | |||
| "subl $4, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| :"+g"(h), "+S"(pixels), "+D"(block) | |||
| :"r"((long)line_size) | |||
| :REG_a, "memory"); | |||
| "lea (%3, %3), %%"REG_a" \n\t" | |||
| ".balign 8 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 1(%1), %%mm1 \n\t" | |||
| "movq (%1, %3), %%mm2 \n\t" | |||
| "movq 1(%1, %3), %%mm3 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%2) \n\t" | |||
| "movq %%mm5, (%2, %3) \n\t" | |||
| "movq 8(%1), %%mm0 \n\t" | |||
| "movq 9(%1), %%mm1 \n\t" | |||
| "movq 8(%1, %3), %%mm2 \n\t" | |||
| "movq 9(%1, %3), %%mm3 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, 8(%2) \n\t" | |||
| "movq %%mm5, 8(%2, %3) \n\t" | |||
| "add %%"REG_a", %1 \n\t" | |||
| "add %%"REG_a", %2 \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 1(%1), %%mm1 \n\t" | |||
| "movq (%1, %3), %%mm2 \n\t" | |||
| "movq 1(%1, %3), %%mm3 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%2) \n\t" | |||
| "movq %%mm5, (%2, %3) \n\t" | |||
| "movq 8(%1), %%mm0 \n\t" | |||
| "movq 9(%1), %%mm1 \n\t" | |||
| "movq 8(%1, %3), %%mm2 \n\t" | |||
| "movq 9(%1, %3), %%mm3 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, 8(%2) \n\t" | |||
| "movq %%mm5, 8(%2, %3) \n\t" | |||
| "add %%"REG_a", %1 \n\t" | |||
| "add %%"REG_a", %2 \n\t" | |||
| "subl $4, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| :"+g"(h), "+S"(pixels), "+D"(block) | |||
| :"r"((long)line_size) | |||
| :REG_a, "memory"); | |||
| } | |||
| static void attribute_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | |||
| { | |||
| MOVQ_BFE(mm6); | |||
| __asm __volatile( | |||
| "testl $1, %0 \n\t" | |||
| " jz 1f \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%2), %%mm1 \n\t" | |||
| "movq 8(%1), %%mm2 \n\t" | |||
| "movq 8(%2), %%mm3 \n\t" | |||
| "add %4, %1 \n\t" | |||
| "add $16, %2 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%3) \n\t" | |||
| "movq %%mm5, 8(%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "decl %0 \n\t" | |||
| ".balign 8 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%2), %%mm1 \n\t" | |||
| "movq 8(%1), %%mm2 \n\t" | |||
| "movq 8(%2), %%mm3 \n\t" | |||
| "add %4, %1 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%3) \n\t" | |||
| "movq %%mm5, 8(%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 16(%2), %%mm1 \n\t" | |||
| "movq 8(%1), %%mm2 \n\t" | |||
| "movq 24(%2), %%mm3 \n\t" | |||
| "add %4, %1 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%3) \n\t" | |||
| "movq %%mm5, 8(%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "add $32, %2 \n\t" | |||
| "subl $2, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| "testl $1, %0 \n\t" | |||
| " jz 1f \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%2), %%mm1 \n\t" | |||
| "movq 8(%1), %%mm2 \n\t" | |||
| "movq 8(%2), %%mm3 \n\t" | |||
| "add %4, %1 \n\t" | |||
| "add $16, %2 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%3) \n\t" | |||
| "movq %%mm5, 8(%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "decl %0 \n\t" | |||
| ".balign 8 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%2), %%mm1 \n\t" | |||
| "movq 8(%1), %%mm2 \n\t" | |||
| "movq 8(%2), %%mm3 \n\t" | |||
| "add %4, %1 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%3) \n\t" | |||
| "movq %%mm5, 8(%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 16(%2), %%mm1 \n\t" | |||
| "movq 8(%1), %%mm2 \n\t" | |||
| "movq 24(%2), %%mm3 \n\t" | |||
| "add %4, %1 \n\t" | |||
| PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%3) \n\t" | |||
| "movq %%mm5, 8(%3) \n\t" | |||
| "add %5, %3 \n\t" | |||
| "add $32, %2 \n\t" | |||
| "subl $2, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used | |||
| :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |||
| :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |||
| #else | |||
| :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |||
| :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |||
| #endif | |||
| :"S"((long)src1Stride), "D"((long)dstStride) | |||
| :"memory"); | |||
| :"S"((long)src1Stride), "D"((long)dstStride) | |||
| :"memory"); | |||
| } | |||
| static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| { | |||
| MOVQ_BFE(mm6); | |||
| __asm __volatile( | |||
| "lea (%3, %3), %%"REG_a" \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| ".balign 8 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %3), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"),%%mm2 \n\t" | |||
| PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) | |||
| "movq %%mm4, (%2) \n\t" | |||
| "movq %%mm5, (%2, %3) \n\t" | |||
| "add %%"REG_a", %1 \n\t" | |||
| "add %%"REG_a", %2 \n\t" | |||
| "movq (%1, %3), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"),%%mm0 \n\t" | |||
| PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) | |||
| "movq %%mm4, (%2) \n\t" | |||
| "movq %%mm5, (%2, %3) \n\t" | |||
| "add %%"REG_a", %1 \n\t" | |||
| "add %%"REG_a", %2 \n\t" | |||
| "subl $4, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| :"+g"(h), "+S"(pixels), "+D"(block) | |||
| :"r"((long)line_size) | |||
| :REG_a, "memory"); | |||
| "lea (%3, %3), %%"REG_a" \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| ".balign 8 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %3), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"),%%mm2 \n\t" | |||
| PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) | |||
| "movq %%mm4, (%2) \n\t" | |||
| "movq %%mm5, (%2, %3) \n\t" | |||
| "add %%"REG_a", %1 \n\t" | |||
| "add %%"REG_a", %2 \n\t" | |||
| "movq (%1, %3), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"),%%mm0 \n\t" | |||
| PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) | |||
| "movq %%mm4, (%2) \n\t" | |||
| "movq %%mm5, (%2, %3) \n\t" | |||
| "add %%"REG_a", %1 \n\t" | |||
| "add %%"REG_a", %2 \n\t" | |||
| "subl $4, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| :"+g"(h), "+S"(pixels), "+D"(block) | |||
| :"r"((long)line_size) | |||
| :REG_a, "memory"); | |||
| } | |||
| static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| @@ -234,65 +234,65 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin | |||
| MOVQ_ZERO(mm7); | |||
| SET_RND(mm6); // =2 for rnd and =1 for no_rnd version | |||
| __asm __volatile( | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 1(%1), %%mm4 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm4, %%mm5 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||
| "paddusw %%mm0, %%mm4 \n\t" | |||
| "paddusw %%mm1, %%mm5 \n\t" | |||
| "xor %%"REG_a", %%"REG_a" \n\t" | |||
| "add %3, %1 \n\t" | |||
| ".balign 8 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm2, %%mm3 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||
| "paddusw %%mm2, %%mm0 \n\t" | |||
| "paddusw %%mm3, %%mm1 \n\t" | |||
| "paddusw %%mm6, %%mm4 \n\t" | |||
| "paddusw %%mm6, %%mm5 \n\t" | |||
| "paddusw %%mm0, %%mm4 \n\t" | |||
| "paddusw %%mm1, %%mm5 \n\t" | |||
| "psrlw $2, %%mm4 \n\t" | |||
| "psrlw $2, %%mm5 \n\t" | |||
| "packuswb %%mm5, %%mm4 \n\t" | |||
| "movq %%mm4, (%2, %%"REG_a") \n\t" | |||
| "add %3, %%"REG_a" \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 1(%1), %%mm4 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm4, %%mm5 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||
| "paddusw %%mm0, %%mm4 \n\t" | |||
| "paddusw %%mm1, %%mm5 \n\t" | |||
| "xor %%"REG_a", %%"REG_a" \n\t" | |||
| "add %3, %1 \n\t" | |||
| ".balign 8 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm2, %%mm3 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||
| "paddusw %%mm2, %%mm0 \n\t" | |||
| "paddusw %%mm3, %%mm1 \n\t" | |||
| "paddusw %%mm6, %%mm4 \n\t" | |||
| "paddusw %%mm6, %%mm5 \n\t" | |||
| "paddusw %%mm0, %%mm4 \n\t" | |||
| "paddusw %%mm1, %%mm5 \n\t" | |||
| "psrlw $2, %%mm4 \n\t" | |||
| "psrlw $2, %%mm5 \n\t" | |||
| "packuswb %%mm5, %%mm4 \n\t" | |||
| "movq %%mm4, (%2, %%"REG_a") \n\t" | |||
| "add %3, %%"REG_a" \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 | |||
| "movq 1(%1, %%"REG_a"), %%mm4 \n\t" | |||
| "movq %%mm2, %%mm3 \n\t" | |||
| "movq %%mm4, %%mm5 \n\t" | |||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||
| "paddusw %%mm2, %%mm4 \n\t" | |||
| "paddusw %%mm3, %%mm5 \n\t" | |||
| "paddusw %%mm6, %%mm0 \n\t" | |||
| "paddusw %%mm6, %%mm1 \n\t" | |||
| "paddusw %%mm4, %%mm0 \n\t" | |||
| "paddusw %%mm5, %%mm1 \n\t" | |||
| "psrlw $2, %%mm0 \n\t" | |||
| "psrlw $2, %%mm1 \n\t" | |||
| "packuswb %%mm1, %%mm0 \n\t" | |||
| "movq %%mm0, (%2, %%"REG_a") \n\t" | |||
| "add %3, %%"REG_a" \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 | |||
| "movq 1(%1, %%"REG_a"), %%mm4 \n\t" | |||
| "movq %%mm2, %%mm3 \n\t" | |||
| "movq %%mm4, %%mm5 \n\t" | |||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||
| "paddusw %%mm2, %%mm4 \n\t" | |||
| "paddusw %%mm3, %%mm5 \n\t" | |||
| "paddusw %%mm6, %%mm0 \n\t" | |||
| "paddusw %%mm6, %%mm1 \n\t" | |||
| "paddusw %%mm4, %%mm0 \n\t" | |||
| "paddusw %%mm5, %%mm1 \n\t" | |||
| "psrlw $2, %%mm0 \n\t" | |||
| "psrlw $2, %%mm1 \n\t" | |||
| "packuswb %%mm1, %%mm0 \n\t" | |||
| "movq %%mm0, (%2, %%"REG_a") \n\t" | |||
| "add %3, %%"REG_a" \n\t" | |||
| "subl $2, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| :"+g"(h), "+S"(pixels) | |||
| :"D"(block), "r"((long)line_size) | |||
| :REG_a, "memory"); | |||
| "subl $2, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| :"+g"(h), "+S"(pixels) | |||
| :"D"(block), "r"((long)line_size) | |||
| :REG_a, "memory"); | |||
| } | |||
| // avg_pixels | |||
| @@ -301,16 +301,16 @@ static void attribute_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pi | |||
| MOVQ_BFE(mm6); | |||
| JUMPALIGN(); | |||
| do { | |||
| __asm __volatile( | |||
| "movd %0, %%mm0 \n\t" | |||
| "movd %1, %%mm1 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| "movd %%mm2, %0 \n\t" | |||
| :"+m"(*block) | |||
| :"m"(*pixels) | |||
| :"memory"); | |||
| pixels += line_size; | |||
| block += line_size; | |||
| __asm __volatile( | |||
| "movd %0, %%mm0 \n\t" | |||
| "movd %1, %%mm1 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| "movd %%mm2, %0 \n\t" | |||
| :"+m"(*block) | |||
| :"m"(*pixels) | |||
| :"memory"); | |||
| pixels += line_size; | |||
| block += line_size; | |||
| } | |||
| while (--h); | |||
| } | |||
| @@ -321,16 +321,16 @@ static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_si | |||
| MOVQ_BFE(mm6); | |||
| JUMPALIGN(); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %0, %%mm0 \n\t" | |||
| "movq %1, %%mm1 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| "movq %%mm2, %0 \n\t" | |||
| :"+m"(*block) | |||
| :"m"(*pixels) | |||
| :"memory"); | |||
| pixels += line_size; | |||
| block += line_size; | |||
| __asm __volatile( | |||
| "movq %0, %%mm0 \n\t" | |||
| "movq %1, %%mm1 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| "movq %%mm2, %0 \n\t" | |||
| :"+m"(*block) | |||
| :"m"(*pixels) | |||
| :"memory"); | |||
| pixels += line_size; | |||
| block += line_size; | |||
| } | |||
| while (--h); | |||
| } | |||
| @@ -340,20 +340,20 @@ static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_s | |||
| MOVQ_BFE(mm6); | |||
| JUMPALIGN(); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %0, %%mm0 \n\t" | |||
| "movq %1, %%mm1 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| "movq %%mm2, %0 \n\t" | |||
| "movq 8%0, %%mm0 \n\t" | |||
| "movq 8%1, %%mm1 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| "movq %%mm2, 8%0 \n\t" | |||
| :"+m"(*block) | |||
| :"m"(*pixels) | |||
| :"memory"); | |||
| pixels += line_size; | |||
| block += line_size; | |||
| __asm __volatile( | |||
| "movq %0, %%mm0 \n\t" | |||
| "movq %1, %%mm1 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| "movq %%mm2, %0 \n\t" | |||
| "movq 8%0, %%mm0 \n\t" | |||
| "movq 8%1, %%mm1 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| "movq %%mm2, 8%0 \n\t" | |||
| :"+m"(*block) | |||
| :"m"(*pixels) | |||
| :"memory"); | |||
| pixels += line_size; | |||
| block += line_size; | |||
| } | |||
| while (--h); | |||
| } | |||
| @@ -363,18 +363,18 @@ static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line | |||
| MOVQ_BFE(mm6); | |||
| JUMPALIGN(); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq 1%1, %%mm1 \n\t" | |||
| "movq %0, %%mm3 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||
| "movq %%mm0, %0 \n\t" | |||
| :"+m"(*block) | |||
| :"m"(*pixels) | |||
| :"memory"); | |||
| pixels += line_size; | |||
| block += line_size; | |||
| __asm __volatile( | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq 1%1, %%mm1 \n\t" | |||
| "movq %0, %%mm3 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||
| "movq %%mm0, %0 \n\t" | |||
| :"+m"(*block) | |||
| :"m"(*pixels) | |||
| :"memory"); | |||
| pixels += line_size; | |||
| block += line_size; | |||
| } while (--h); | |||
| } | |||
| @@ -383,17 +383,17 @@ static __attribute__((unused)) void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t * | |||
| MOVQ_BFE(mm6); | |||
| JUMPALIGN(); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq %2, %%mm1 \n\t" | |||
| "movq %0, %%mm3 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||
| "movq %%mm0, %0 \n\t" | |||
| :"+m"(*dst) | |||
| :"m"(*src1), "m"(*src2) | |||
| :"memory"); | |||
| dst += dstStride; | |||
| __asm __volatile( | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq %2, %%mm1 \n\t" | |||
| "movq %0, %%mm3 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||
| "movq %%mm0, %0 \n\t" | |||
| :"+m"(*dst) | |||
| :"m"(*src1), "m"(*src2) | |||
| :"memory"); | |||
| dst += dstStride; | |||
| src1 += src1Stride; | |||
| src2 += 8; | |||
| } while (--h); | |||
| @@ -404,24 +404,24 @@ static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin | |||
| MOVQ_BFE(mm6); | |||
| JUMPALIGN(); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq 1%1, %%mm1 \n\t" | |||
| "movq %0, %%mm3 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||
| "movq %%mm0, %0 \n\t" | |||
| "movq 8%1, %%mm0 \n\t" | |||
| "movq 9%1, %%mm1 \n\t" | |||
| "movq 8%0, %%mm3 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||
| "movq %%mm0, 8%0 \n\t" | |||
| :"+m"(*block) | |||
| :"m"(*pixels) | |||
| :"memory"); | |||
| pixels += line_size; | |||
| block += line_size; | |||
| __asm __volatile( | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq 1%1, %%mm1 \n\t" | |||
| "movq %0, %%mm3 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||
| "movq %%mm0, %0 \n\t" | |||
| "movq 8%1, %%mm0 \n\t" | |||
| "movq 9%1, %%mm1 \n\t" | |||
| "movq 8%0, %%mm3 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||
| "movq %%mm0, 8%0 \n\t" | |||
| :"+m"(*block) | |||
| :"m"(*pixels) | |||
| :"memory"); | |||
| pixels += line_size; | |||
| block += line_size; | |||
| } while (--h); | |||
| } | |||
| @@ -430,23 +430,23 @@ static __attribute__((unused)) void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t | |||
| MOVQ_BFE(mm6); | |||
| JUMPALIGN(); | |||
| do { | |||
| __asm __volatile( | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq %2, %%mm1 \n\t" | |||
| "movq %0, %%mm3 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||
| "movq %%mm0, %0 \n\t" | |||
| "movq 8%1, %%mm0 \n\t" | |||
| "movq 8%2, %%mm1 \n\t" | |||
| "movq 8%0, %%mm3 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||
| "movq %%mm0, 8%0 \n\t" | |||
| :"+m"(*dst) | |||
| :"m"(*src1), "m"(*src2) | |||
| :"memory"); | |||
| dst += dstStride; | |||
| __asm __volatile( | |||
| "movq %1, %%mm0 \n\t" | |||
| "movq %2, %%mm1 \n\t" | |||
| "movq %0, %%mm3 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||
| "movq %%mm0, %0 \n\t" | |||
| "movq 8%1, %%mm0 \n\t" | |||
| "movq 8%2, %%mm1 \n\t" | |||
| "movq 8%0, %%mm3 \n\t" | |||
| PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |||
| PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |||
| "movq %%mm0, 8%0 \n\t" | |||
| :"+m"(*dst) | |||
| :"m"(*src1), "m"(*src2) | |||
| :"memory"); | |||
| dst += dstStride; | |||
| src1 += src1Stride; | |||
| src2 += 16; | |||
| } while (--h); | |||
| @@ -456,39 +456,39 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line | |||
| { | |||
| MOVQ_BFE(mm6); | |||
| __asm __volatile( | |||
| "lea (%3, %3), %%"REG_a" \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| ".balign 8 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %3), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" | |||
| PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) | |||
| "movq (%2), %%mm3 \n\t" | |||
| PAVGB(%%mm3, %%mm4, %%mm0, %%mm6) | |||
| "movq (%2, %3), %%mm3 \n\t" | |||
| PAVGB(%%mm3, %%mm5, %%mm1, %%mm6) | |||
| "movq %%mm0, (%2) \n\t" | |||
| "movq %%mm1, (%2, %3) \n\t" | |||
| "add %%"REG_a", %1 \n\t" | |||
| "add %%"REG_a", %2 \n\t" | |||
| "lea (%3, %3), %%"REG_a" \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| ".balign 8 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %3), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" | |||
| PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) | |||
| "movq (%2), %%mm3 \n\t" | |||
| PAVGB(%%mm3, %%mm4, %%mm0, %%mm6) | |||
| "movq (%2, %3), %%mm3 \n\t" | |||
| PAVGB(%%mm3, %%mm5, %%mm1, %%mm6) | |||
| "movq %%mm0, (%2) \n\t" | |||
| "movq %%mm1, (%2, %3) \n\t" | |||
| "add %%"REG_a", %1 \n\t" | |||
| "add %%"REG_a", %2 \n\t" | |||
| "movq (%1, %3), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) | |||
| "movq (%2), %%mm3 \n\t" | |||
| PAVGB(%%mm3, %%mm4, %%mm2, %%mm6) | |||
| "movq (%2, %3), %%mm3 \n\t" | |||
| PAVGB(%%mm3, %%mm5, %%mm1, %%mm6) | |||
| "movq %%mm2, (%2) \n\t" | |||
| "movq %%mm1, (%2, %3) \n\t" | |||
| "add %%"REG_a", %1 \n\t" | |||
| "add %%"REG_a", %2 \n\t" | |||
| "movq (%1, %3), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) | |||
| "movq (%2), %%mm3 \n\t" | |||
| PAVGB(%%mm3, %%mm4, %%mm2, %%mm6) | |||
| "movq (%2, %3), %%mm3 \n\t" | |||
| PAVGB(%%mm3, %%mm5, %%mm1, %%mm6) | |||
| "movq %%mm2, (%2) \n\t" | |||
| "movq %%mm1, (%2, %3) \n\t" | |||
| "add %%"REG_a", %1 \n\t" | |||
| "add %%"REG_a", %2 \n\t" | |||
| "subl $4, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| :"+g"(h), "+S"(pixels), "+D"(block) | |||
| :"r"((long)line_size) | |||
| :REG_a, "memory"); | |||
| "subl $4, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| :"+g"(h), "+S"(pixels), "+D"(block) | |||
| :"r"((long)line_size) | |||
| :REG_a, "memory"); | |||
| } | |||
| // this routine is 'slightly' suboptimal but mostly unused | |||
| @@ -497,73 +497,73 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin | |||
| MOVQ_ZERO(mm7); | |||
| SET_RND(mm6); // =2 for rnd and =1 for no_rnd version | |||
| __asm __volatile( | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 1(%1), %%mm4 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm4, %%mm5 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||
| "paddusw %%mm0, %%mm4 \n\t" | |||
| "paddusw %%mm1, %%mm5 \n\t" | |||
| "xor %%"REG_a", %%"REG_a" \n\t" | |||
| "add %3, %1 \n\t" | |||
| ".balign 8 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm2, %%mm3 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||
| "paddusw %%mm2, %%mm0 \n\t" | |||
| "paddusw %%mm3, %%mm1 \n\t" | |||
| "paddusw %%mm6, %%mm4 \n\t" | |||
| "paddusw %%mm6, %%mm5 \n\t" | |||
| "paddusw %%mm0, %%mm4 \n\t" | |||
| "paddusw %%mm1, %%mm5 \n\t" | |||
| "psrlw $2, %%mm4 \n\t" | |||
| "psrlw $2, %%mm5 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||
| "packuswb %%mm5, %%mm4 \n\t" | |||
| "pcmpeqd %%mm2, %%mm2 \n\t" | |||
| "paddb %%mm2, %%mm2 \n\t" | |||
| PAVGB(%%mm3, %%mm4, %%mm5, %%mm2) | |||
| "movq %%mm5, (%2, %%"REG_a") \n\t" | |||
| "add %3, %%"REG_a" \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq 1(%1), %%mm4 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm4, %%mm5 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||
| "paddusw %%mm0, %%mm4 \n\t" | |||
| "paddusw %%mm1, %%mm5 \n\t" | |||
| "xor %%"REG_a", %%"REG_a" \n\t" | |||
| "add %3, %1 \n\t" | |||
| ".balign 8 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm2, %%mm3 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||
| "paddusw %%mm2, %%mm0 \n\t" | |||
| "paddusw %%mm3, %%mm1 \n\t" | |||
| "paddusw %%mm6, %%mm4 \n\t" | |||
| "paddusw %%mm6, %%mm5 \n\t" | |||
| "paddusw %%mm0, %%mm4 \n\t" | |||
| "paddusw %%mm1, %%mm5 \n\t" | |||
| "psrlw $2, %%mm4 \n\t" | |||
| "psrlw $2, %%mm5 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||
| "packuswb %%mm5, %%mm4 \n\t" | |||
| "pcmpeqd %%mm2, %%mm2 \n\t" | |||
| "paddb %%mm2, %%mm2 \n\t" | |||
| PAVGB(%%mm3, %%mm4, %%mm5, %%mm2) | |||
| "movq %%mm5, (%2, %%"REG_a") \n\t" | |||
| "add %3, %%"REG_a" \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 | |||
| "movq 1(%1, %%"REG_a"), %%mm4 \n\t" | |||
| "movq %%mm2, %%mm3 \n\t" | |||
| "movq %%mm4, %%mm5 \n\t" | |||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||
| "paddusw %%mm2, %%mm4 \n\t" | |||
| "paddusw %%mm3, %%mm5 \n\t" | |||
| "paddusw %%mm6, %%mm0 \n\t" | |||
| "paddusw %%mm6, %%mm1 \n\t" | |||
| "paddusw %%mm4, %%mm0 \n\t" | |||
| "paddusw %%mm5, %%mm1 \n\t" | |||
| "psrlw $2, %%mm0 \n\t" | |||
| "psrlw $2, %%mm1 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||
| "packuswb %%mm1, %%mm0 \n\t" | |||
| "pcmpeqd %%mm2, %%mm2 \n\t" | |||
| "paddb %%mm2, %%mm2 \n\t" | |||
| PAVGB(%%mm3, %%mm0, %%mm1, %%mm2) | |||
| "movq %%mm1, (%2, %%"REG_a") \n\t" | |||
| "add %3, %%"REG_a" \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 | |||
| "movq 1(%1, %%"REG_a"), %%mm4 \n\t" | |||
| "movq %%mm2, %%mm3 \n\t" | |||
| "movq %%mm4, %%mm5 \n\t" | |||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||
| "paddusw %%mm2, %%mm4 \n\t" | |||
| "paddusw %%mm3, %%mm5 \n\t" | |||
| "paddusw %%mm6, %%mm0 \n\t" | |||
| "paddusw %%mm6, %%mm1 \n\t" | |||
| "paddusw %%mm4, %%mm0 \n\t" | |||
| "paddusw %%mm5, %%mm1 \n\t" | |||
| "psrlw $2, %%mm0 \n\t" | |||
| "psrlw $2, %%mm1 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||
| "packuswb %%mm1, %%mm0 \n\t" | |||
| "pcmpeqd %%mm2, %%mm2 \n\t" | |||
| "paddb %%mm2, %%mm2 \n\t" | |||
| PAVGB(%%mm3, %%mm0, %%mm1, %%mm2) | |||
| "movq %%mm1, (%2, %%"REG_a") \n\t" | |||
| "add %3, %%"REG_a" \n\t" | |||
| "subl $2, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| :"+g"(h), "+S"(pixels) | |||
| :"D"(block), "r"((long)line_size) | |||
| :REG_a, "memory"); | |||
| "subl $2, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| :"+g"(h), "+S"(pixels) | |||
| :"D"(block), "r"((long)line_size) | |||
| :REG_a, "memory"); | |||
| } | |||
| //FIXME optimize | |||
| @@ -30,21 +30,21 @@ | |||
| // | |||
| ////////////////////////////////////////////////////////////////////// | |||
| #define BITS_FRW_ACC 3 //; 2 or 3 for accuracy | |||
| #define SHIFT_FRW_COL BITS_FRW_ACC | |||
| #define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3) | |||
| #define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1)) | |||
| //#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1)) | |||
| #define BITS_FRW_ACC 3 //; 2 or 3 for accuracy | |||
| #define SHIFT_FRW_COL BITS_FRW_ACC | |||
| #define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3) | |||
| #define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1)) | |||
| //#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1)) | |||
| //concatenated table, for forward DCT transformation | |||
| static const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = { | |||
| 13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5 | |||
| 27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5 | |||
| -21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5 | |||
| 13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5 | |||
| 27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5 | |||
| -21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5 | |||
| }; | |||
| static const int16_t ocos_4_16[4] ATTR_ALIGN(8) = { | |||
| 23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5 | |||
| 23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5 | |||
| }; | |||
| static const int64_t fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL; | |||
| @@ -351,62 +351,62 @@ static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset) | |||
| static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) | |||
| { | |||
| asm volatile( | |||
| ".macro FDCT_ROW_SSE2_H1 i t \n\t" | |||
| "movq \\i(%0), %%xmm2 \n\t" | |||
| "movq \\i+8(%0), %%xmm0 \n\t" | |||
| "movdqa \\t+32(%1), %%xmm3 \n\t" | |||
| "movdqa \\t+48(%1), %%xmm7 \n\t" | |||
| "movdqa \\t(%1), %%xmm4 \n\t" | |||
| "movdqa \\t+16(%1), %%xmm5 \n\t" | |||
| ".endm \n\t" | |||
| ".macro FDCT_ROW_SSE2_H2 i t \n\t" | |||
| "movq \\i(%0), %%xmm2 \n\t" | |||
| "movq \\i+8(%0), %%xmm0 \n\t" | |||
| "movdqa \\t+32(%1), %%xmm3 \n\t" | |||
| "movdqa \\t+48(%1), %%xmm7 \n\t" | |||
| ".endm \n\t" | |||
| ".macro FDCT_ROW_SSE2 i \n\t" | |||
| "movq %%xmm2, %%xmm1 \n\t" | |||
| "pshuflw $27, %%xmm0, %%xmm0 \n\t" | |||
| "paddsw %%xmm0, %%xmm1 \n\t" | |||
| "psubsw %%xmm0, %%xmm2 \n\t" | |||
| "punpckldq %%xmm2, %%xmm1 \n\t" | |||
| "pshufd $78, %%xmm1, %%xmm2 \n\t" | |||
| "pmaddwd %%xmm2, %%xmm3 \n\t" | |||
| "pmaddwd %%xmm1, %%xmm7 \n\t" | |||
| "pmaddwd %%xmm5, %%xmm2 \n\t" | |||
| "pmaddwd %%xmm4, %%xmm1 \n\t" | |||
| "paddd %%xmm7, %%xmm3 \n\t" | |||
| "paddd %%xmm2, %%xmm1 \n\t" | |||
| "paddd %%xmm6, %%xmm3 \n\t" | |||
| "paddd %%xmm6, %%xmm1 \n\t" | |||
| "psrad %3, %%xmm3 \n\t" | |||
| "psrad %3, %%xmm1 \n\t" | |||
| "packssdw %%xmm3, %%xmm1 \n\t" | |||
| "movdqa %%xmm1, \\i(%4) \n\t" | |||
| ".endm \n\t" | |||
| "movdqa (%2), %%xmm6 \n\t" | |||
| "FDCT_ROW_SSE2_H1 0 0 \n\t" | |||
| "FDCT_ROW_SSE2 0 \n\t" | |||
| "FDCT_ROW_SSE2_H2 64 0 \n\t" | |||
| "FDCT_ROW_SSE2 64 \n\t" | |||
| "FDCT_ROW_SSE2_H1 16 64 \n\t" | |||
| "FDCT_ROW_SSE2 16 \n\t" | |||
| "FDCT_ROW_SSE2_H2 112 64 \n\t" | |||
| "FDCT_ROW_SSE2 112 \n\t" | |||
| "FDCT_ROW_SSE2_H1 32 128 \n\t" | |||
| "FDCT_ROW_SSE2 32 \n\t" | |||
| "FDCT_ROW_SSE2_H2 96 128 \n\t" | |||
| "FDCT_ROW_SSE2 96 \n\t" | |||
| "FDCT_ROW_SSE2_H1 48 192 \n\t" | |||
| "FDCT_ROW_SSE2 48 \n\t" | |||
| "FDCT_ROW_SSE2_H2 80 192 \n\t" | |||
| "FDCT_ROW_SSE2 80 \n\t" | |||
| : | |||
| : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out) | |||
| ".macro FDCT_ROW_SSE2_H1 i t \n\t" | |||
| "movq \\i(%0), %%xmm2 \n\t" | |||
| "movq \\i+8(%0), %%xmm0 \n\t" | |||
| "movdqa \\t+32(%1), %%xmm3 \n\t" | |||
| "movdqa \\t+48(%1), %%xmm7 \n\t" | |||
| "movdqa \\t(%1), %%xmm4 \n\t" | |||
| "movdqa \\t+16(%1), %%xmm5 \n\t" | |||
| ".endm \n\t" | |||
| ".macro FDCT_ROW_SSE2_H2 i t \n\t" | |||
| "movq \\i(%0), %%xmm2 \n\t" | |||
| "movq \\i+8(%0), %%xmm0 \n\t" | |||
| "movdqa \\t+32(%1), %%xmm3 \n\t" | |||
| "movdqa \\t+48(%1), %%xmm7 \n\t" | |||
| ".endm \n\t" | |||
| ".macro FDCT_ROW_SSE2 i \n\t" | |||
| "movq %%xmm2, %%xmm1 \n\t" | |||
| "pshuflw $27, %%xmm0, %%xmm0 \n\t" | |||
| "paddsw %%xmm0, %%xmm1 \n\t" | |||
| "psubsw %%xmm0, %%xmm2 \n\t" | |||
| "punpckldq %%xmm2, %%xmm1 \n\t" | |||
| "pshufd $78, %%xmm1, %%xmm2 \n\t" | |||
| "pmaddwd %%xmm2, %%xmm3 \n\t" | |||
| "pmaddwd %%xmm1, %%xmm7 \n\t" | |||
| "pmaddwd %%xmm5, %%xmm2 \n\t" | |||
| "pmaddwd %%xmm4, %%xmm1 \n\t" | |||
| "paddd %%xmm7, %%xmm3 \n\t" | |||
| "paddd %%xmm2, %%xmm1 \n\t" | |||
| "paddd %%xmm6, %%xmm3 \n\t" | |||
| "paddd %%xmm6, %%xmm1 \n\t" | |||
| "psrad %3, %%xmm3 \n\t" | |||
| "psrad %3, %%xmm1 \n\t" | |||
| "packssdw %%xmm3, %%xmm1 \n\t" | |||
| "movdqa %%xmm1, \\i(%4) \n\t" | |||
| ".endm \n\t" | |||
| "movdqa (%2), %%xmm6 \n\t" | |||
| "FDCT_ROW_SSE2_H1 0 0 \n\t" | |||
| "FDCT_ROW_SSE2 0 \n\t" | |||
| "FDCT_ROW_SSE2_H2 64 0 \n\t" | |||
| "FDCT_ROW_SSE2 64 \n\t" | |||
| "FDCT_ROW_SSE2_H1 16 64 \n\t" | |||
| "FDCT_ROW_SSE2 16 \n\t" | |||
| "FDCT_ROW_SSE2_H2 112 64 \n\t" | |||
| "FDCT_ROW_SSE2 112 \n\t" | |||
| "FDCT_ROW_SSE2_H1 32 128 \n\t" | |||
| "FDCT_ROW_SSE2 32 \n\t" | |||
| "FDCT_ROW_SSE2_H2 96 128 \n\t" | |||
| "FDCT_ROW_SSE2 96 \n\t" | |||
| "FDCT_ROW_SSE2_H1 48 192 \n\t" | |||
| "FDCT_ROW_SSE2 48 \n\t" | |||
| "FDCT_ROW_SSE2_H2 80 192 \n\t" | |||
| "FDCT_ROW_SSE2 80 \n\t" | |||
| : | |||
| : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out) | |||
| ); | |||
| } | |||
| @@ -45,8 +45,8 @@ static void print_v4sf(const char *str, __m128 a) | |||
| void ff_fft_calc_sse(FFTContext *s, FFTComplex *z) | |||
| { | |||
| int ln = s->nbits; | |||
| int j, np, np2; | |||
| int nblocks, nloops; | |||
| int j, np, np2; | |||
| int nblocks, nloops; | |||
| register FFTComplex *p, *q; | |||
| FFTComplex *cptr, *cptr1; | |||
| int k; | |||
| @@ -47,9 +47,9 @@ | |||
| SUMSUB_BADC( d13, s02, s13, d02 ) | |||
| #define SBUTTERFLY(a,b,t,n)\ | |||
| "movq " #a ", " #t " \n\t" /* abcd */\ | |||
| "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\ | |||
| "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\ | |||
| "movq " #a ", " #t " \n\t" /* abcd */\ | |||
| "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\ | |||
| "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\ | |||
| #define TRANSPOSE4(a,b,c,d,t)\ | |||
| SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\ | |||
| @@ -369,73 +369,73 @@ static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a | |||
| /* motion compensation */ | |||
| #define QPEL_H264V(A,B,C,D,E,F,OP)\ | |||
| "movd (%0), "#F" \n\t"\ | |||
| "movq "#C", %%mm6 \n\t"\ | |||
| "paddw "#D", %%mm6 \n\t"\ | |||
| "psllw $2, %%mm6 \n\t"\ | |||
| "psubw "#B", %%mm6 \n\t"\ | |||
| "psubw "#E", %%mm6 \n\t"\ | |||
| "pmullw %4, %%mm6 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "punpcklbw %%mm7, "#F" \n\t"\ | |||
| "paddw %5, "#A" \n\t"\ | |||
| "paddw "#F", "#A" \n\t"\ | |||
| "paddw "#A", %%mm6 \n\t"\ | |||
| "psraw $5, %%mm6 \n\t"\ | |||
| "packuswb %%mm6, %%mm6 \n\t"\ | |||
| "movd (%0), "#F" \n\t"\ | |||
| "movq "#C", %%mm6 \n\t"\ | |||
| "paddw "#D", %%mm6 \n\t"\ | |||
| "psllw $2, %%mm6 \n\t"\ | |||
| "psubw "#B", %%mm6 \n\t"\ | |||
| "psubw "#E", %%mm6 \n\t"\ | |||
| "pmullw %4, %%mm6 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "punpcklbw %%mm7, "#F" \n\t"\ | |||
| "paddw %5, "#A" \n\t"\ | |||
| "paddw "#F", "#A" \n\t"\ | |||
| "paddw "#A", %%mm6 \n\t"\ | |||
| "psraw $5, %%mm6 \n\t"\ | |||
| "packuswb %%mm6, %%mm6 \n\t"\ | |||
| OP(%%mm6, (%1), A, d)\ | |||
| "add %3, %1 \n\t" | |||
| "add %3, %1 \n\t" | |||
| #define QPEL_H264HV(A,B,C,D,E,F,OF)\ | |||
| "movd (%0), "#F" \n\t"\ | |||
| "movq "#C", %%mm6 \n\t"\ | |||
| "paddw "#D", %%mm6 \n\t"\ | |||
| "psllw $2, %%mm6 \n\t"\ | |||
| "psubw "#B", %%mm6 \n\t"\ | |||
| "psubw "#E", %%mm6 \n\t"\ | |||
| "pmullw %3, %%mm6 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "punpcklbw %%mm7, "#F" \n\t"\ | |||
| "paddw "#F", "#A" \n\t"\ | |||
| "paddw "#A", %%mm6 \n\t"\ | |||
| "movq %%mm6, "#OF"(%1) \n\t" | |||
| "movd (%0), "#F" \n\t"\ | |||
| "movq "#C", %%mm6 \n\t"\ | |||
| "paddw "#D", %%mm6 \n\t"\ | |||
| "psllw $2, %%mm6 \n\t"\ | |||
| "psubw "#B", %%mm6 \n\t"\ | |||
| "psubw "#E", %%mm6 \n\t"\ | |||
| "pmullw %3, %%mm6 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "punpcklbw %%mm7, "#F" \n\t"\ | |||
| "paddw "#F", "#A" \n\t"\ | |||
| "paddw "#A", %%mm6 \n\t"\ | |||
| "movq %%mm6, "#OF"(%1) \n\t" | |||
| #define QPEL_H264(OPNAME, OP, MMX)\ | |||
| static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |||
| int h=4;\ | |||
| \ | |||
| asm volatile(\ | |||
| "pxor %%mm7, %%mm7 \n\t"\ | |||
| "movq %5, %%mm4 \n\t"\ | |||
| "movq %6, %%mm5 \n\t"\ | |||
| "1: \n\t"\ | |||
| "movd -1(%0), %%mm1 \n\t"\ | |||
| "movd (%0), %%mm2 \n\t"\ | |||
| "movd 1(%0), %%mm3 \n\t"\ | |||
| "movd 2(%0), %%mm0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||
| "paddw %%mm0, %%mm1 \n\t"\ | |||
| "paddw %%mm3, %%mm2 \n\t"\ | |||
| "movd -2(%0), %%mm0 \n\t"\ | |||
| "movd 3(%0), %%mm3 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||
| "paddw %%mm3, %%mm0 \n\t"\ | |||
| "psllw $2, %%mm2 \n\t"\ | |||
| "psubw %%mm1, %%mm2 \n\t"\ | |||
| "pmullw %%mm4, %%mm2 \n\t"\ | |||
| "paddw %%mm5, %%mm0 \n\t"\ | |||
| "paddw %%mm2, %%mm0 \n\t"\ | |||
| "psraw $5, %%mm0 \n\t"\ | |||
| "packuswb %%mm0, %%mm0 \n\t"\ | |||
| "pxor %%mm7, %%mm7 \n\t"\ | |||
| "movq %5, %%mm4 \n\t"\ | |||
| "movq %6, %%mm5 \n\t"\ | |||
| "1: \n\t"\ | |||
| "movd -1(%0), %%mm1 \n\t"\ | |||
| "movd (%0), %%mm2 \n\t"\ | |||
| "movd 1(%0), %%mm3 \n\t"\ | |||
| "movd 2(%0), %%mm0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||
| "paddw %%mm0, %%mm1 \n\t"\ | |||
| "paddw %%mm3, %%mm2 \n\t"\ | |||
| "movd -2(%0), %%mm0 \n\t"\ | |||
| "movd 3(%0), %%mm3 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||
| "paddw %%mm3, %%mm0 \n\t"\ | |||
| "psllw $2, %%mm2 \n\t"\ | |||
| "psubw %%mm1, %%mm2 \n\t"\ | |||
| "pmullw %%mm4, %%mm2 \n\t"\ | |||
| "paddw %%mm5, %%mm0 \n\t"\ | |||
| "paddw %%mm2, %%mm0 \n\t"\ | |||
| "psraw $5, %%mm0 \n\t"\ | |||
| "packuswb %%mm0, %%mm0 \n\t"\ | |||
| OP(%%mm0, (%1),%%mm6, d)\ | |||
| "add %3, %0 \n\t"\ | |||
| "add %4, %1 \n\t"\ | |||
| "decl %2 \n\t"\ | |||
| " jnz 1b \n\t"\ | |||
| "add %3, %0 \n\t"\ | |||
| "add %4, %1 \n\t"\ | |||
| "decl %2 \n\t"\ | |||
| " jnz 1b \n\t"\ | |||
| : "+a"(src), "+c"(dst), "+m"(h)\ | |||
| : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ | |||
| : "memory"\ | |||
| @@ -444,22 +444,22 @@ static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i | |||
| static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |||
| src -= 2*srcStride;\ | |||
| asm volatile(\ | |||
| "pxor %%mm7, %%mm7 \n\t"\ | |||
| "movd (%0), %%mm0 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm1 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm2 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm3 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm4 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||
| "pxor %%mm7, %%mm7 \n\t"\ | |||
| "movd (%0), %%mm0 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm1 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm2 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm3 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm4 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||
| QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ | |||
| QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ | |||
| QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ | |||
| @@ -476,22 +476,22 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, | |||
| src -= 2*srcStride+2;\ | |||
| while(w--){\ | |||
| asm volatile(\ | |||
| "pxor %%mm7, %%mm7 \n\t"\ | |||
| "movd (%0), %%mm0 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm1 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm2 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm3 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm4 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||
| "pxor %%mm7, %%mm7 \n\t"\ | |||
| "movd (%0), %%mm0 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm1 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm2 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm3 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm4 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||
| QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\ | |||
| QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\ | |||
| QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\ | |||
| @@ -506,28 +506,28 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, | |||
| }\ | |||
| tmp -= 3*4;\ | |||
| asm volatile(\ | |||
| "movq %4, %%mm6 \n\t"\ | |||
| "1: \n\t"\ | |||
| "movq (%0), %%mm0 \n\t"\ | |||
| "paddw 10(%0), %%mm0 \n\t"\ | |||
| "movq 2(%0), %%mm1 \n\t"\ | |||
| "paddw 8(%0), %%mm1 \n\t"\ | |||
| "movq 4(%0), %%mm2 \n\t"\ | |||
| "paddw 6(%0), %%mm2 \n\t"\ | |||
| "psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\ | |||
| "psraw $2, %%mm0 \n\t"/*(a-b)/4 */\ | |||
| "psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\ | |||
| "paddsw %%mm2, %%mm0 \n\t"\ | |||
| "psraw $2, %%mm0 \n\t"/*((a-b)/4-b)/4 */\ | |||
| "paddw %%mm6, %%mm2 \n\t"\ | |||
| "paddw %%mm2, %%mm0 \n\t"\ | |||
| "psraw $6, %%mm0 \n\t"\ | |||
| "packuswb %%mm0, %%mm0 \n\t"\ | |||
| "movq %4, %%mm6 \n\t"\ | |||
| "1: \n\t"\ | |||
| "movq (%0), %%mm0 \n\t"\ | |||
| "paddw 10(%0), %%mm0 \n\t"\ | |||
| "movq 2(%0), %%mm1 \n\t"\ | |||
| "paddw 8(%0), %%mm1 \n\t"\ | |||
| "movq 4(%0), %%mm2 \n\t"\ | |||
| "paddw 6(%0), %%mm2 \n\t"\ | |||
| "psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\ | |||
| "psraw $2, %%mm0 \n\t"/*(a-b)/4 */\ | |||
| "psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\ | |||
| "paddsw %%mm2, %%mm0 \n\t"\ | |||
| "psraw $2, %%mm0 \n\t"/*((a-b)/4-b)/4 */\ | |||
| "paddw %%mm6, %%mm2 \n\t"\ | |||
| "paddw %%mm2, %%mm0 \n\t"\ | |||
| "psraw $6, %%mm0 \n\t"\ | |||
| "packuswb %%mm0, %%mm0 \n\t"\ | |||
| OP(%%mm0, (%1),%%mm7, d)\ | |||
| "add $24, %0 \n\t"\ | |||
| "add %3, %1 \n\t"\ | |||
| "decl %2 \n\t"\ | |||
| " jnz 1b \n\t"\ | |||
| "add $24, %0 \n\t"\ | |||
| "add %3, %1 \n\t"\ | |||
| "decl %2 \n\t"\ | |||
| " jnz 1b \n\t"\ | |||
| : "+a"(tmp), "+c"(dst), "+m"(h)\ | |||
| : "S"((long)dstStride), "m"(ff_pw_32)\ | |||
| : "memory"\ | |||
| @@ -537,54 +537,54 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, | |||
| static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |||
| int h=8;\ | |||
| asm volatile(\ | |||
| "pxor %%mm7, %%mm7 \n\t"\ | |||
| "movq %5, %%mm6 \n\t"\ | |||
| "1: \n\t"\ | |||
| "movq (%0), %%mm0 \n\t"\ | |||
| "movq 1(%0), %%mm2 \n\t"\ | |||
| "movq %%mm0, %%mm1 \n\t"\ | |||
| "movq %%mm2, %%mm3 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||
| "punpckhbw %%mm7, %%mm1 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||
| "punpckhbw %%mm7, %%mm3 \n\t"\ | |||
| "paddw %%mm2, %%mm0 \n\t"\ | |||
| "paddw %%mm3, %%mm1 \n\t"\ | |||
| "psllw $2, %%mm0 \n\t"\ | |||
| "psllw $2, %%mm1 \n\t"\ | |||
| "movq -1(%0), %%mm2 \n\t"\ | |||
| "movq 2(%0), %%mm4 \n\t"\ | |||
| "movq %%mm2, %%mm3 \n\t"\ | |||
| "movq %%mm4, %%mm5 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||
| "punpckhbw %%mm7, %%mm3 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||
| "punpckhbw %%mm7, %%mm5 \n\t"\ | |||
| "paddw %%mm4, %%mm2 \n\t"\ | |||
| "paddw %%mm3, %%mm5 \n\t"\ | |||
| "psubw %%mm2, %%mm0 \n\t"\ | |||
| "psubw %%mm5, %%mm1 \n\t"\ | |||
| "pmullw %%mm6, %%mm0 \n\t"\ | |||
| "pmullw %%mm6, %%mm1 \n\t"\ | |||
| "movd -2(%0), %%mm2 \n\t"\ | |||
| "movd 7(%0), %%mm5 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm5 \n\t"\ | |||
| "paddw %%mm3, %%mm2 \n\t"\ | |||
| "paddw %%mm5, %%mm4 \n\t"\ | |||
| "movq %6, %%mm5 \n\t"\ | |||
| "paddw %%mm5, %%mm2 \n\t"\ | |||
| "paddw %%mm5, %%mm4 \n\t"\ | |||
| "paddw %%mm2, %%mm0 \n\t"\ | |||
| "paddw %%mm4, %%mm1 \n\t"\ | |||
| "psraw $5, %%mm0 \n\t"\ | |||
| "psraw $5, %%mm1 \n\t"\ | |||
| "packuswb %%mm1, %%mm0 \n\t"\ | |||
| "pxor %%mm7, %%mm7 \n\t"\ | |||
| "movq %5, %%mm6 \n\t"\ | |||
| "1: \n\t"\ | |||
| "movq (%0), %%mm0 \n\t"\ | |||
| "movq 1(%0), %%mm2 \n\t"\ | |||
| "movq %%mm0, %%mm1 \n\t"\ | |||
| "movq %%mm2, %%mm3 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||
| "punpckhbw %%mm7, %%mm1 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||
| "punpckhbw %%mm7, %%mm3 \n\t"\ | |||
| "paddw %%mm2, %%mm0 \n\t"\ | |||
| "paddw %%mm3, %%mm1 \n\t"\ | |||
| "psllw $2, %%mm0 \n\t"\ | |||
| "psllw $2, %%mm1 \n\t"\ | |||
| "movq -1(%0), %%mm2 \n\t"\ | |||
| "movq 2(%0), %%mm4 \n\t"\ | |||
| "movq %%mm2, %%mm3 \n\t"\ | |||
| "movq %%mm4, %%mm5 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||
| "punpckhbw %%mm7, %%mm3 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||
| "punpckhbw %%mm7, %%mm5 \n\t"\ | |||
| "paddw %%mm4, %%mm2 \n\t"\ | |||
| "paddw %%mm3, %%mm5 \n\t"\ | |||
| "psubw %%mm2, %%mm0 \n\t"\ | |||
| "psubw %%mm5, %%mm1 \n\t"\ | |||
| "pmullw %%mm6, %%mm0 \n\t"\ | |||
| "pmullw %%mm6, %%mm1 \n\t"\ | |||
| "movd -2(%0), %%mm2 \n\t"\ | |||
| "movd 7(%0), %%mm5 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm5 \n\t"\ | |||
| "paddw %%mm3, %%mm2 \n\t"\ | |||
| "paddw %%mm5, %%mm4 \n\t"\ | |||
| "movq %6, %%mm5 \n\t"\ | |||
| "paddw %%mm5, %%mm2 \n\t"\ | |||
| "paddw %%mm5, %%mm4 \n\t"\ | |||
| "paddw %%mm2, %%mm0 \n\t"\ | |||
| "paddw %%mm4, %%mm1 \n\t"\ | |||
| "psraw $5, %%mm0 \n\t"\ | |||
| "psraw $5, %%mm1 \n\t"\ | |||
| "packuswb %%mm1, %%mm0 \n\t"\ | |||
| OP(%%mm0, (%1),%%mm5, q)\ | |||
| "add %3, %0 \n\t"\ | |||
| "add %4, %1 \n\t"\ | |||
| "decl %2 \n\t"\ | |||
| " jnz 1b \n\t"\ | |||
| "add %3, %0 \n\t"\ | |||
| "add %4, %1 \n\t"\ | |||
| "decl %2 \n\t"\ | |||
| " jnz 1b \n\t"\ | |||
| : "+a"(src), "+c"(dst), "+m"(h)\ | |||
| : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ | |||
| : "memory"\ | |||
| @@ -597,22 +597,22 @@ static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i | |||
| \ | |||
| while(h--){\ | |||
| asm volatile(\ | |||
| "pxor %%mm7, %%mm7 \n\t"\ | |||
| "movd (%0), %%mm0 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm1 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm2 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm3 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm4 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||
| "pxor %%mm7, %%mm7 \n\t"\ | |||
| "movd (%0), %%mm0 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm1 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm2 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm3 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm4 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||
| QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ | |||
| QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ | |||
| QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ | |||
| @@ -636,22 +636,22 @@ static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, | |||
| src -= 2*srcStride+2;\ | |||
| while(w--){\ | |||
| asm volatile(\ | |||
| "pxor %%mm7, %%mm7 \n\t"\ | |||
| "movd (%0), %%mm0 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm1 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm2 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm3 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm4 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||
| "pxor %%mm7, %%mm7 \n\t"\ | |||
| "movd (%0), %%mm0 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm1 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm2 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm3 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "movd (%0), %%mm4 \n\t"\ | |||
| "add %2, %0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm0 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm1 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm2 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm3 \n\t"\ | |||
| "punpcklbw %%mm7, %%mm4 \n\t"\ | |||
| QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*4)\ | |||
| QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*4)\ | |||
| QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*4)\ | |||
| @@ -670,42 +670,42 @@ static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, | |||
| }\ | |||
| tmp -= 4*4;\ | |||
| asm volatile(\ | |||
| "movq %4, %%mm6 \n\t"\ | |||
| "1: \n\t"\ | |||
| "movq (%0), %%mm0 \n\t"\ | |||
| "movq 8(%0), %%mm3 \n\t"\ | |||
| "movq 2(%0), %%mm1 \n\t"\ | |||
| "movq 10(%0), %%mm4 \n\t"\ | |||
| "paddw %%mm4, %%mm0 \n\t"\ | |||
| "paddw %%mm3, %%mm1 \n\t"\ | |||
| "paddw 18(%0), %%mm3 \n\t"\ | |||
| "paddw 16(%0), %%mm4 \n\t"\ | |||
| "movq 4(%0), %%mm2 \n\t"\ | |||
| "movq 12(%0), %%mm5 \n\t"\ | |||
| "paddw 6(%0), %%mm2 \n\t"\ | |||
| "paddw 14(%0), %%mm5 \n\t"\ | |||
| "psubw %%mm1, %%mm0 \n\t"\ | |||
| "psubw %%mm4, %%mm3 \n\t"\ | |||
| "psraw $2, %%mm0 \n\t"\ | |||
| "psraw $2, %%mm3 \n\t"\ | |||
| "psubw %%mm1, %%mm0 \n\t"\ | |||
| "psubw %%mm4, %%mm3 \n\t"\ | |||
| "paddsw %%mm2, %%mm0 \n\t"\ | |||
| "paddsw %%mm5, %%mm3 \n\t"\ | |||
| "psraw $2, %%mm0 \n\t"\ | |||
| "psraw $2, %%mm3 \n\t"\ | |||
| "paddw %%mm6, %%mm2 \n\t"\ | |||
| "paddw %%mm6, %%mm5 \n\t"\ | |||
| "paddw %%mm2, %%mm0 \n\t"\ | |||
| "paddw %%mm5, %%mm3 \n\t"\ | |||
| "psraw $6, %%mm0 \n\t"\ | |||
| "psraw $6, %%mm3 \n\t"\ | |||
| "packuswb %%mm3, %%mm0 \n\t"\ | |||
| "movq %4, %%mm6 \n\t"\ | |||
| "1: \n\t"\ | |||
| "movq (%0), %%mm0 \n\t"\ | |||
| "movq 8(%0), %%mm3 \n\t"\ | |||
| "movq 2(%0), %%mm1 \n\t"\ | |||
| "movq 10(%0), %%mm4 \n\t"\ | |||
| "paddw %%mm4, %%mm0 \n\t"\ | |||
| "paddw %%mm3, %%mm1 \n\t"\ | |||
| "paddw 18(%0), %%mm3 \n\t"\ | |||
| "paddw 16(%0), %%mm4 \n\t"\ | |||
| "movq 4(%0), %%mm2 \n\t"\ | |||
| "movq 12(%0), %%mm5 \n\t"\ | |||
| "paddw 6(%0), %%mm2 \n\t"\ | |||
| "paddw 14(%0), %%mm5 \n\t"\ | |||
| "psubw %%mm1, %%mm0 \n\t"\ | |||
| "psubw %%mm4, %%mm3 \n\t"\ | |||
| "psraw $2, %%mm0 \n\t"\ | |||
| "psraw $2, %%mm3 \n\t"\ | |||
| "psubw %%mm1, %%mm0 \n\t"\ | |||
| "psubw %%mm4, %%mm3 \n\t"\ | |||
| "paddsw %%mm2, %%mm0 \n\t"\ | |||
| "paddsw %%mm5, %%mm3 \n\t"\ | |||
| "psraw $2, %%mm0 \n\t"\ | |||
| "psraw $2, %%mm3 \n\t"\ | |||
| "paddw %%mm6, %%mm2 \n\t"\ | |||
| "paddw %%mm6, %%mm5 \n\t"\ | |||
| "paddw %%mm2, %%mm0 \n\t"\ | |||
| "paddw %%mm5, %%mm3 \n\t"\ | |||
| "psraw $6, %%mm0 \n\t"\ | |||
| "psraw $6, %%mm3 \n\t"\ | |||
| "packuswb %%mm3, %%mm0 \n\t"\ | |||
| OP(%%mm0, (%1),%%mm7, q)\ | |||
| "add $32, %0 \n\t"\ | |||
| "add %3, %1 \n\t"\ | |||
| "decl %2 \n\t"\ | |||
| " jnz 1b \n\t"\ | |||
| "add $32, %0 \n\t"\ | |||
| "add %3, %1 \n\t"\ | |||
| "decl %2 \n\t"\ | |||
| " jnz 1b \n\t"\ | |||
| : "+a"(tmp), "+c"(dst), "+m"(h)\ | |||
| : "S"((long)dstStride), "m"(ff_pw_32)\ | |||
| : "memory"\ | |||
| @@ -862,15 +862,15 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t * | |||
| }\ | |||
| #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t" | |||
| #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t" | |||
| #define AVG_3DNOW_OP(a,b,temp, size) \ | |||
| "mov" #size " " #b ", " #temp " \n\t"\ | |||
| "pavgusb " #temp ", " #a " \n\t"\ | |||
| "mov" #size " " #a ", " #b " \n\t" | |||
| "mov" #size " " #b ", " #temp " \n\t"\ | |||
| "pavgusb " #temp ", " #a " \n\t"\ | |||
| "mov" #size " " #a ", " #b " \n\t" | |||
| #define AVG_MMX2_OP(a,b,temp, size) \ | |||
| "mov" #size " " #b ", " #temp " \n\t"\ | |||
| "pavgb " #temp ", " #a " \n\t"\ | |||
| "mov" #size " " #a ", " #b " \n\t" | |||
| "mov" #size " " #b ", " #temp " \n\t"\ | |||
| "pavgb " #temp ", " #a " \n\t"\ | |||
| "mov" #size " " #a ", " #b " \n\t" | |||
| QPEL_H264(put_, PUT_OP, 3dnow) | |||
| QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow) | |||
| @@ -38,7 +38,7 @@ | |||
| #if 0 | |||
| /* C row IDCT - its just here to document the MMXEXT and MMX versions */ | |||
| static inline void idct_row (int16_t * row, int offset, | |||
| int16_t * table, int32_t * rounder) | |||
| int16_t * table, int32_t * rounder) | |||
| { | |||
| int C1, C2, C3, C4, C5, C6, C7; | |||
| int a0, a1, a2, a3, b0, b1, b2, b3; | |||
| @@ -77,241 +77,241 @@ static inline void idct_row (int16_t * row, int offset, | |||
| /* MMXEXT row IDCT */ | |||
| #define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \ | |||
| c4, c6, c4, c6, \ | |||
| c1, c3, -c1, -c5, \ | |||
| c5, c7, c3, -c7, \ | |||
| c4, -c6, c4, -c6, \ | |||
| -c4, c2, c4, -c2, \ | |||
| c5, -c1, c3, -c1, \ | |||
| c7, c3, c7, -c5 } | |||
| #define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \ | |||
| c4, c6, c4, c6, \ | |||
| c1, c3, -c1, -c5, \ | |||
| c5, c7, c3, -c7, \ | |||
| c4, -c6, c4, -c6, \ | |||
| -c4, c2, c4, -c2, \ | |||
| c5, -c1, c3, -c1, \ | |||
| c7, c3, c7, -c5 } | |||
| static inline void mmxext_row_head (int16_t * row, int offset, const int16_t * table) | |||
| { | |||
| movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 | |||
| movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 | |||
| movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 | |||
| movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 | |||
| movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 | |||
| movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 | |||
| movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 | |||
| movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 | |||
| movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 | |||
| movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 | |||
| movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 | |||
| pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 | |||
| movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 | |||
| pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 | |||
| pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 | |||
| pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 | |||
| } | |||
| static inline void mmxext_row (const int16_t * table, const int32_t * rounder) | |||
| { | |||
| movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1 | |||
| pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6 | |||
| movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1 | |||
| pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6 | |||
| pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2 | |||
| pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5 | |||
| pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2 | |||
| pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5 | |||
| movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5 | |||
| pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 | |||
| movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5 | |||
| pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 | |||
| paddd_m2r (*rounder, mm3); // mm3 += rounder | |||
| pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7 | |||
| paddd_m2r (*rounder, mm3); // mm3 += rounder | |||
| pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7 | |||
| pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 | |||
| paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder | |||
| pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 | |||
| paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder | |||
| pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3 | |||
| movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder | |||
| pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3 | |||
| movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder | |||
| pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7 | |||
| paddd_r2r (mm7, mm1); // mm1 = b1 b0 | |||
| pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7 | |||
| paddd_r2r (mm7, mm1); // mm1 = b1 b0 | |||
| paddd_m2r (*rounder, mm0); // mm0 += rounder | |||
| psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder | |||
| paddd_m2r (*rounder, mm0); // mm0 += rounder | |||
| psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder | |||
| psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 | |||
| paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder | |||
| psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 | |||
| paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder | |||
| paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder | |||
| psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 | |||
| paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder | |||
| psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 | |||
| paddd_r2r (mm6, mm5); // mm5 = b3 b2 | |||
| movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder | |||
| paddd_r2r (mm6, mm5); // mm5 = b3 b2 | |||
| movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder | |||
| paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder | |||
| psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder | |||
| paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder | |||
| psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder | |||
| } | |||
| static inline void mmxext_row_tail (int16_t * row, int store) | |||
| { | |||
| psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 | |||
| psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 | |||
| psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 | |||
| psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 | |||
| packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 | |||
| packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 | |||
| packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 | |||
| packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 | |||
| movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 | |||
| pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 | |||
| movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 | |||
| pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 | |||
| /* slot */ | |||
| movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 | |||
| movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 | |||
| } | |||
| static inline void mmxext_row_mid (int16_t * row, int store, | |||
| int offset, const int16_t * table) | |||
| int offset, const int16_t * table) | |||
| { | |||
| movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 | |||
| psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 | |||
| movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 | |||
| psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 | |||
| movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 | |||
| psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 | |||
| movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 | |||
| psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 | |||
| packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 | |||
| movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 | |||
| packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 | |||
| movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 | |||
| packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 | |||
| movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 | |||
| packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 | |||
| movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 | |||
| movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 | |||
| pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 | |||
| movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 | |||
| pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 | |||
| movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 | |||
| movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 | |||
| movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 | |||
| movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 | |||
| pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 | |||
| pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 | |||
| movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 | |||
| pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 | |||
| movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 | |||
| pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 | |||
| } | |||
| /* MMX row IDCT */ | |||
| #define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \ | |||
| c4, c6, -c4, -c2, \ | |||
| c1, c3, c3, -c7, \ | |||
| c5, c7, -c1, -c5, \ | |||
| c4, -c6, c4, -c2, \ | |||
| -c4, c2, c4, -c6, \ | |||
| c5, -c1, c7, -c5, \ | |||
| c7, c3, c3, -c1 } | |||
| #define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \ | |||
| c4, c6, -c4, -c2, \ | |||
| c1, c3, c3, -c7, \ | |||
| c5, c7, -c1, -c5, \ | |||
| c4, -c6, c4, -c2, \ | |||
| -c4, c2, c4, -c6, \ | |||
| c5, -c1, c7, -c5, \ | |||
| c7, c3, c3, -c1 } | |||
| static inline void mmx_row_head (int16_t * row, int offset, const int16_t * table) | |||
| { | |||
| movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 | |||
| movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 | |||
| movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 | |||
| movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 | |||
| movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 | |||
| movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 | |||
| movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 | |||
| movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 | |||
| movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 | |||
| movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 | |||
| punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 | |||
| punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 | |||
| movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 | |||
| pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 | |||
| movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 | |||
| pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 | |||
| movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 | |||
| punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 | |||
| movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 | |||
| punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 | |||
| } | |||
| static inline void mmx_row (const int16_t * table, const int32_t * rounder) | |||
| { | |||
| pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 | |||
| punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1 | |||
| pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 | |||
| punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1 | |||
| pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2 | |||
| punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5 | |||
| pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2 | |||
| punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5 | |||
| movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5 | |||
| pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3 | |||
| movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5 | |||
| pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3 | |||
| paddd_m2r (*rounder, mm3); // mm3 += rounder | |||
| pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 | |||
| paddd_m2r (*rounder, mm3); // mm3 += rounder | |||
| pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 | |||
| pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 | |||
| paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder | |||
| pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 | |||
| paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder | |||
| pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3 | |||
| movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder | |||
| pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3 | |||
| movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder | |||
| pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7 | |||
| paddd_r2r (mm7, mm1); // mm1 = b1 b0 | |||
| pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7 | |||
| paddd_r2r (mm7, mm1); // mm1 = b1 b0 | |||
| paddd_m2r (*rounder, mm0); // mm0 += rounder | |||
| psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder | |||
| paddd_m2r (*rounder, mm0); // mm0 += rounder | |||
| psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder | |||
| psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 | |||
| paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder | |||
| psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 | |||
| paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder | |||
| paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder | |||
| psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 | |||
| paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder | |||
| psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 | |||
| paddd_r2r (mm6, mm5); // mm5 = b3 b2 | |||
| movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder | |||
| paddd_r2r (mm6, mm5); // mm5 = b3 b2 | |||
| movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder | |||
| paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder | |||
| psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder | |||
| paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder | |||
| psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder | |||
| } | |||
| static inline void mmx_row_tail (int16_t * row, int store) | |||
| { | |||
| psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 | |||
| psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 | |||
| psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 | |||
| psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 | |||
| packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 | |||
| packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 | |||
| packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 | |||
| packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 | |||
| movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 | |||
| movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5 | |||
| movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 | |||
| movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5 | |||
| pslld_i2r (16, mm7); // mm7 = y7 0 y5 0 | |||
| pslld_i2r (16, mm7); // mm7 = y7 0 y5 0 | |||
| psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4 | |||
| psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4 | |||
| por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4 | |||
| por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4 | |||
| /* slot */ | |||
| movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 | |||
| movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 | |||
| } | |||
| static inline void mmx_row_mid (int16_t * row, int store, | |||
| int offset, const int16_t * table) | |||
| int offset, const int16_t * table) | |||
| { | |||
| movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 | |||
| psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 | |||
| movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 | |||
| psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 | |||
| movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 | |||
| psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 | |||
| movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 | |||
| psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 | |||
| packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 | |||
| movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 | |||
| packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 | |||
| movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 | |||
| packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 | |||
| movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 | |||
| packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 | |||
| movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 | |||
| movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 | |||
| movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5 | |||
| movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 | |||
| movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5 | |||
| punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 | |||
| psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4 | |||
| punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 | |||
| psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4 | |||
| movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 | |||
| pslld_i2r (16, mm1); // mm1 = y7 0 y5 0 | |||
| movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 | |||
| pslld_i2r (16, mm1); // mm1 = y7 0 y5 0 | |||
| movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 | |||
| por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4 | |||
| movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 | |||
| por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4 | |||
| movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 | |||
| punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 | |||
| movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 | |||
| punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 | |||
| movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 | |||
| pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 | |||
| movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 | |||
| pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 | |||
| } | |||
| @@ -403,132 +403,132 @@ static inline void idct_col (int16_t * col, int offset) | |||
| /* column code adapted from peter gubanov */ | |||
| /* http://www.elecard.com/peter/idct.shtml */ | |||
| movq_m2r (*_T1, mm0); // mm0 = T1 | |||
| movq_m2r (*_T1, mm0); // mm0 = T1 | |||
| movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1 | |||
| movq_r2r (mm0, mm2); // mm2 = T1 | |||
| movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1 | |||
| movq_r2r (mm0, mm2); // mm2 = T1 | |||
| movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7 | |||
| pmulhw_r2r (mm1, mm0); // mm0 = T1*x1 | |||
| movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7 | |||
| pmulhw_r2r (mm1, mm0); // mm0 = T1*x1 | |||
| movq_m2r (*_T3, mm5); // mm5 = T3 | |||
| pmulhw_r2r (mm4, mm2); // mm2 = T1*x7 | |||
| movq_m2r (*_T3, mm5); // mm5 = T3 | |||
| pmulhw_r2r (mm4, mm2); // mm2 = T1*x7 | |||
| movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5 | |||
| movq_r2r (mm5, mm7); // mm7 = T3-1 | |||
| movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5 | |||
| movq_r2r (mm5, mm7); // mm7 = T3-1 | |||
| movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3 | |||
| psubsw_r2r (mm4, mm0); // mm0 = v17 | |||
| movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3 | |||
| psubsw_r2r (mm4, mm0); // mm0 = v17 | |||
| movq_m2r (*_T2, mm4); // mm4 = T2 | |||
| pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3 | |||
| movq_m2r (*_T2, mm4); // mm4 = T2 | |||
| pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3 | |||
| paddsw_r2r (mm2, mm1); // mm1 = u17 | |||
| pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5 | |||
| paddsw_r2r (mm2, mm1); // mm1 = u17 | |||
| pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5 | |||
| /* slot */ | |||
| movq_r2r (mm4, mm2); // mm2 = T2 | |||
| paddsw_r2r (mm3, mm5); // mm5 = T3*x3 | |||
| movq_r2r (mm4, mm2); // mm2 = T2 | |||
| paddsw_r2r (mm3, mm5); // mm5 = T3*x3 | |||
| pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2 | |||
| paddsw_r2r (mm6, mm7); // mm7 = T3*x5 | |||
| paddsw_r2r (mm6, mm7); // mm7 = T3*x5 | |||
| psubsw_r2r (mm6, mm5); // mm5 = v35 | |||
| paddsw_r2r (mm3, mm7); // mm7 = u35 | |||
| psubsw_r2r (mm6, mm5); // mm5 = v35 | |||
| paddsw_r2r (mm3, mm7); // mm7 = u35 | |||
| movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6 | |||
| movq_r2r (mm0, mm6); // mm6 = v17 | |||
| movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6 | |||
| movq_r2r (mm0, mm6); // mm6 = v17 | |||
| pmulhw_r2r (mm3, mm2); // mm2 = T2*x6 | |||
| psubsw_r2r (mm5, mm0); // mm0 = b3 | |||
| pmulhw_r2r (mm3, mm2); // mm2 = T2*x6 | |||
| psubsw_r2r (mm5, mm0); // mm0 = b3 | |||
| psubsw_r2r (mm3, mm4); // mm4 = v26 | |||
| paddsw_r2r (mm6, mm5); // mm5 = v12 | |||
| psubsw_r2r (mm3, mm4); // mm4 = v26 | |||
| paddsw_r2r (mm6, mm5); // mm5 = v12 | |||
| movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0 | |||
| movq_r2r (mm1, mm6); // mm6 = u17 | |||
| movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0 | |||
| movq_r2r (mm1, mm6); // mm6 = u17 | |||
| paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26 | |||
| paddsw_r2r (mm7, mm6); // mm6 = b0 | |||
| paddsw_r2r (mm7, mm6); // mm6 = b0 | |||
| psubsw_r2r (mm7, mm1); // mm1 = u12 | |||
| movq_r2r (mm1, mm7); // mm7 = u12 | |||
| psubsw_r2r (mm7, mm1); // mm1 = u12 | |||
| movq_r2r (mm1, mm7); // mm7 = u12 | |||
| movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0 | |||
| paddsw_r2r (mm5, mm1); // mm1 = u12+v12 | |||
| movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0 | |||
| paddsw_r2r (mm5, mm1); // mm1 = u12+v12 | |||
| movq_m2r (*_C4, mm0); // mm0 = C4/2 | |||
| psubsw_r2r (mm5, mm7); // mm7 = u12-v12 | |||
| movq_m2r (*_C4, mm0); // mm0 = C4/2 | |||
| psubsw_r2r (mm5, mm7); // mm7 = u12-v12 | |||
| movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1 | |||
| pmulhw_r2r (mm0, mm1); // mm1 = b1/2 | |||
| movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1 | |||
| pmulhw_r2r (mm0, mm1); // mm1 = b1/2 | |||
| movq_r2r (mm4, mm6); // mm6 = v26 | |||
| pmulhw_r2r (mm0, mm7); // mm7 = b2/2 | |||
| movq_r2r (mm4, mm6); // mm6 = v26 | |||
| pmulhw_r2r (mm0, mm7); // mm7 = b2/2 | |||
| movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4 | |||
| movq_r2r (mm3, mm0); // mm0 = x0 | |||
| movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4 | |||
| movq_r2r (mm3, mm0); // mm0 = x0 | |||
| psubsw_r2r (mm5, mm3); // mm3 = v04 | |||
| paddsw_r2r (mm5, mm0); // mm0 = u04 | |||
| psubsw_r2r (mm5, mm3); // mm3 = v04 | |||
| paddsw_r2r (mm5, mm0); // mm0 = u04 | |||
| paddsw_r2r (mm3, mm4); // mm4 = a1 | |||
| movq_r2r (mm0, mm5); // mm5 = u04 | |||
| paddsw_r2r (mm3, mm4); // mm4 = a1 | |||
| movq_r2r (mm0, mm5); // mm5 = u04 | |||
| psubsw_r2r (mm6, mm3); // mm3 = a2 | |||
| paddsw_r2r (mm2, mm5); // mm5 = a0 | |||
| psubsw_r2r (mm6, mm3); // mm3 = a2 | |||
| paddsw_r2r (mm2, mm5); // mm5 = a0 | |||
| paddsw_r2r (mm1, mm1); // mm1 = b1 | |||
| psubsw_r2r (mm2, mm0); // mm0 = a3 | |||
| paddsw_r2r (mm1, mm1); // mm1 = b1 | |||
| psubsw_r2r (mm2, mm0); // mm0 = a3 | |||
| paddsw_r2r (mm7, mm7); // mm7 = b2 | |||
| movq_r2r (mm3, mm2); // mm2 = a2 | |||
| paddsw_r2r (mm7, mm7); // mm7 = b2 | |||
| movq_r2r (mm3, mm2); // mm2 = a2 | |||
| movq_r2r (mm4, mm6); // mm6 = a1 | |||
| paddsw_r2r (mm7, mm3); // mm3 = a2+b2 | |||
| movq_r2r (mm4, mm6); // mm6 = a1 | |||
| paddsw_r2r (mm7, mm3); // mm3 = a2+b2 | |||
| psraw_i2r (COL_SHIFT, mm3); // mm3 = y2 | |||
| paddsw_r2r (mm1, mm4); // mm4 = a1+b1 | |||
| psraw_i2r (COL_SHIFT, mm3); // mm3 = y2 | |||
| paddsw_r2r (mm1, mm4); // mm4 = a1+b1 | |||
| psraw_i2r (COL_SHIFT, mm4); // mm4 = y1 | |||
| psubsw_r2r (mm1, mm6); // mm6 = a1-b1 | |||
| psraw_i2r (COL_SHIFT, mm4); // mm4 = y1 | |||
| psubsw_r2r (mm1, mm6); // mm6 = a1-b1 | |||
| movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0 | |||
| psubsw_r2r (mm7, mm2); // mm2 = a2-b2 | |||
| movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0 | |||
| psubsw_r2r (mm7, mm2); // mm2 = a2-b2 | |||
| psraw_i2r (COL_SHIFT, mm6); // mm6 = y6 | |||
| movq_r2r (mm5, mm7); // mm7 = a0 | |||
| psraw_i2r (COL_SHIFT, mm6); // mm6 = y6 | |||
| movq_r2r (mm5, mm7); // mm7 = a0 | |||
| movq_r2m (mm4, *(col+offset+1*8)); // save y1 | |||
| psraw_i2r (COL_SHIFT, mm2); // mm2 = y5 | |||
| movq_r2m (mm4, *(col+offset+1*8)); // save y1 | |||
| psraw_i2r (COL_SHIFT, mm2); // mm2 = y5 | |||
| movq_r2m (mm3, *(col+offset+2*8)); // save y2 | |||
| paddsw_r2r (mm1, mm5); // mm5 = a0+b0 | |||
| movq_r2m (mm3, *(col+offset+2*8)); // save y2 | |||
| paddsw_r2r (mm1, mm5); // mm5 = a0+b0 | |||
| movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3 | |||
| psubsw_r2r (mm1, mm7); // mm7 = a0-b0 | |||
| movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3 | |||
| psubsw_r2r (mm1, mm7); // mm7 = a0-b0 | |||
| psraw_i2r (COL_SHIFT, mm5); // mm5 = y0 | |||
| movq_r2r (mm0, mm3); // mm3 = a3 | |||
| psraw_i2r (COL_SHIFT, mm5); // mm5 = y0 | |||
| movq_r2r (mm0, mm3); // mm3 = a3 | |||
| movq_r2m (mm2, *(col+offset+5*8)); // save y5 | |||
| psubsw_r2r (mm4, mm3); // mm3 = a3-b3 | |||
| movq_r2m (mm2, *(col+offset+5*8)); // save y5 | |||
| psubsw_r2r (mm4, mm3); // mm3 = a3-b3 | |||
| psraw_i2r (COL_SHIFT, mm7); // mm7 = y7 | |||
| paddsw_r2r (mm0, mm4); // mm4 = a3+b3 | |||
| psraw_i2r (COL_SHIFT, mm7); // mm7 = y7 | |||
| paddsw_r2r (mm0, mm4); // mm4 = a3+b3 | |||
| movq_r2m (mm5, *(col+offset+0*8)); // save y0 | |||
| psraw_i2r (COL_SHIFT, mm3); // mm3 = y4 | |||
| movq_r2m (mm5, *(col+offset+0*8)); // save y0 | |||
| psraw_i2r (COL_SHIFT, mm3); // mm3 = y4 | |||
| movq_r2m (mm6, *(col+offset+6*8)); // save y6 | |||
| psraw_i2r (COL_SHIFT, mm4); // mm4 = y3 | |||
| movq_r2m (mm6, *(col+offset+6*8)); // save y6 | |||
| psraw_i2r (COL_SHIFT, mm4); // mm4 = y3 | |||
| movq_r2m (mm7, *(col+offset+7*8)); // save y7 | |||
| movq_r2m (mm7, *(col+offset+7*8)); // save y7 | |||
| movq_r2m (mm3, *(col+offset+4*8)); // save y4 | |||
| movq_r2m (mm3, *(col+offset+4*8)); // save y4 | |||
| movq_r2m (mm4, *(col+offset+3*8)); // save y3 | |||
| movq_r2m (mm4, *(col+offset+3*8)); // save y3 | |||
| #undef T1 | |||
| #undef T2 | |||
| @@ -540,61 +540,61 @@ static const int32_t rounder0[] ATTR_ALIGN(8) = | |||
| rounder ((1 << (COL_SHIFT - 1)) - 0.5); | |||
| static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); | |||
| static const int32_t rounder1[] ATTR_ALIGN(8) = | |||
| rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ | |||
| rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ | |||
| static const int32_t rounder7[] ATTR_ALIGN(8) = | |||
| rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ | |||
| rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ | |||
| static const int32_t rounder2[] ATTR_ALIGN(8) = | |||
| rounder (0.60355339059); /* C2 * (C6+C2)/2 */ | |||
| rounder (0.60355339059); /* C2 * (C6+C2)/2 */ | |||
| static const int32_t rounder6[] ATTR_ALIGN(8) = | |||
| rounder (-0.25); /* C2 * (C6-C2)/2 */ | |||
| rounder (-0.25); /* C2 * (C6-C2)/2 */ | |||
| static const int32_t rounder3[] ATTR_ALIGN(8) = | |||
| rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ | |||
| rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ | |||
| static const int32_t rounder5[] ATTR_ALIGN(8) = | |||
| rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ | |||
| rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ | |||
| #undef COL_SHIFT | |||
| #undef ROW_SHIFT | |||
| #define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ | |||
| void idct (int16_t * block) \ | |||
| { \ | |||
| static const int16_t table04[] ATTR_ALIGN(16) = \ | |||
| table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ | |||
| static const int16_t table17[] ATTR_ALIGN(16) = \ | |||
| table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ | |||
| static const int16_t table26[] ATTR_ALIGN(16) = \ | |||
| table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ | |||
| static const int16_t table35[] ATTR_ALIGN(16) = \ | |||
| table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ | |||
| \ | |||
| idct_row_head (block, 0*8, table04); \ | |||
| idct_row (table04, rounder0); \ | |||
| idct_row_mid (block, 0*8, 4*8, table04); \ | |||
| idct_row (table04, rounder4); \ | |||
| idct_row_mid (block, 4*8, 1*8, table17); \ | |||
| idct_row (table17, rounder1); \ | |||
| idct_row_mid (block, 1*8, 7*8, table17); \ | |||
| idct_row (table17, rounder7); \ | |||
| idct_row_mid (block, 7*8, 2*8, table26); \ | |||
| idct_row (table26, rounder2); \ | |||
| idct_row_mid (block, 2*8, 6*8, table26); \ | |||
| idct_row (table26, rounder6); \ | |||
| idct_row_mid (block, 6*8, 3*8, table35); \ | |||
| idct_row (table35, rounder3); \ | |||
| idct_row_mid (block, 3*8, 5*8, table35); \ | |||
| idct_row (table35, rounder5); \ | |||
| idct_row_tail (block, 5*8); \ | |||
| \ | |||
| idct_col (block, 0); \ | |||
| idct_col (block, 4); \ | |||
| #define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ | |||
| void idct (int16_t * block) \ | |||
| { \ | |||
| static const int16_t table04[] ATTR_ALIGN(16) = \ | |||
| table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ | |||
| static const int16_t table17[] ATTR_ALIGN(16) = \ | |||
| table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ | |||
| static const int16_t table26[] ATTR_ALIGN(16) = \ | |||
| table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ | |||
| static const int16_t table35[] ATTR_ALIGN(16) = \ | |||
| table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ | |||
| \ | |||
| idct_row_head (block, 0*8, table04); \ | |||
| idct_row (table04, rounder0); \ | |||
| idct_row_mid (block, 0*8, 4*8, table04); \ | |||
| idct_row (table04, rounder4); \ | |||
| idct_row_mid (block, 4*8, 1*8, table17); \ | |||
| idct_row (table17, rounder1); \ | |||
| idct_row_mid (block, 1*8, 7*8, table17); \ | |||
| idct_row (table17, rounder7); \ | |||
| idct_row_mid (block, 7*8, 2*8, table26); \ | |||
| idct_row (table26, rounder2); \ | |||
| idct_row_mid (block, 2*8, 6*8, table26); \ | |||
| idct_row (table26, rounder6); \ | |||
| idct_row_mid (block, 6*8, 3*8, table35); \ | |||
| idct_row (table35, rounder3); \ | |||
| idct_row_mid (block, 3*8, 5*8, table35); \ | |||
| idct_row (table35, rounder5); \ | |||
| idct_row_tail (block, 5*8); \ | |||
| \ | |||
| idct_col (block, 0); \ | |||
| idct_col (block, 4); \ | |||
| } | |||
| void ff_mmx_idct(DCTELEM *block); | |||
| void ff_mmxext_idct(DCTELEM *block); | |||
| declare_idct (ff_mmxext_idct, mmxext_table, | |||
| mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid) | |||
| mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid) | |||
| declare_idct (ff_mmx_idct, mmx_table, | |||
| mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid) | |||
| mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid) | |||
| @@ -27,257 +27,257 @@ | |||
| * values by ULL, lest they be truncated by the compiler) | |||
| */ | |||
| typedef union { | |||
| long long q; /* Quadword (64-bit) value */ | |||
| unsigned long long uq; /* Unsigned Quadword */ | |||
| int d[2]; /* 2 Doubleword (32-bit) values */ | |||
| unsigned int ud[2]; /* 2 Unsigned Doubleword */ | |||
| short w[4]; /* 4 Word (16-bit) values */ | |||
| unsigned short uw[4]; /* 4 Unsigned Word */ | |||
| char b[8]; /* 8 Byte (8-bit) values */ | |||
| unsigned char ub[8]; /* 8 Unsigned Byte */ | |||
| float s[2]; /* Single-precision (32-bit) value */ | |||
| } mmx_t; /* On an 8-byte (64-bit) boundary */ | |||
| #define mmx_i2r(op,imm,reg) \ | |||
| __asm__ __volatile__ (#op " %0, %%" #reg \ | |||
| : /* nothing */ \ | |||
| : "i" (imm) ) | |||
| #define mmx_m2r(op,mem,reg) \ | |||
| __asm__ __volatile__ (#op " %0, %%" #reg \ | |||
| : /* nothing */ \ | |||
| : "m" (mem)) | |||
| #define mmx_r2m(op,reg,mem) \ | |||
| __asm__ __volatile__ (#op " %%" #reg ", %0" \ | |||
| : "=m" (mem) \ | |||
| : /* nothing */ ) | |||
| #define mmx_r2r(op,regs,regd) \ | |||
| __asm__ __volatile__ (#op " %" #regs ", %" #regd) | |||
| #define emms() __asm__ __volatile__ ("emms") | |||
| #define movd_m2r(var,reg) mmx_m2r (movd, var, reg) | |||
| #define movd_r2m(reg,var) mmx_r2m (movd, reg, var) | |||
| #define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd) | |||
| #define movq_m2r(var,reg) mmx_m2r (movq, var, reg) | |||
| #define movq_r2m(reg,var) mmx_r2m (movq, reg, var) | |||
| #define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd) | |||
| #define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg) | |||
| #define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd) | |||
| #define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg) | |||
| #define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd) | |||
| #define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg) | |||
| #define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd) | |||
| #define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg) | |||
| #define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd) | |||
| #define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg) | |||
| #define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd) | |||
| #define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg) | |||
| #define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd) | |||
| #define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg) | |||
| #define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd) | |||
| #define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg) | |||
| #define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd) | |||
| #define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg) | |||
| #define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd) | |||
| #define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg) | |||
| #define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd) | |||
| #define pand_m2r(var,reg) mmx_m2r (pand, var, reg) | |||
| #define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd) | |||
| #define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg) | |||
| #define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd) | |||
| #define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg) | |||
| #define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd) | |||
| #define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg) | |||
| #define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd) | |||
| #define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg) | |||
| #define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd) | |||
| #define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg) | |||
| #define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd) | |||
| #define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg) | |||
| #define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd) | |||
| #define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg) | |||
| #define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd) | |||
| #define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg) | |||
| #define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd) | |||
| #define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg) | |||
| #define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd) | |||
| #define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg) | |||
| #define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd) | |||
| #define por_m2r(var,reg) mmx_m2r (por, var, reg) | |||
| #define por_r2r(regs,regd) mmx_r2r (por, regs, regd) | |||
| #define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg) | |||
| #define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg) | |||
| #define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd) | |||
| #define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg) | |||
| #define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg) | |||
| #define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd) | |||
| #define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg) | |||
| #define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg) | |||
| #define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd) | |||
| #define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg) | |||
| #define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg) | |||
| #define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd) | |||
| #define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg) | |||
| #define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg) | |||
| #define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd) | |||
| #define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg) | |||
| #define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg) | |||
| #define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd) | |||
| #define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg) | |||
| #define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg) | |||
| #define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd) | |||
| #define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg) | |||
| #define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg) | |||
| #define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd) | |||
| #define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg) | |||
| #define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd) | |||
| #define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg) | |||
| #define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd) | |||
| #define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg) | |||
| #define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd) | |||
| #define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg) | |||
| #define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd) | |||
| #define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg) | |||
| #define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd) | |||
| #define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg) | |||
| #define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd) | |||
| #define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg) | |||
| #define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd) | |||
| #define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg) | |||
| #define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd) | |||
| #define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg) | |||
| #define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd) | |||
| #define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg) | |||
| #define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd) | |||
| #define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg) | |||
| #define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd) | |||
| #define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg) | |||
| #define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd) | |||
| #define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg) | |||
| #define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd) | |||
| #define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg) | |||
| #define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd) | |||
| typedef union { | |||
| long long q; /* Quadword (64-bit) value */ | |||
| unsigned long long uq; /* Unsigned Quadword */ | |||
| int d[2]; /* 2 Doubleword (32-bit) values */ | |||
| unsigned int ud[2]; /* 2 Unsigned Doubleword */ | |||
| short w[4]; /* 4 Word (16-bit) values */ | |||
| unsigned short uw[4]; /* 4 Unsigned Word */ | |||
| char b[8]; /* 8 Byte (8-bit) values */ | |||
| unsigned char ub[8]; /* 8 Unsigned Byte */ | |||
| float s[2]; /* Single-precision (32-bit) value */ | |||
| } mmx_t; /* On an 8-byte (64-bit) boundary */ | |||
| #define mmx_i2r(op,imm,reg) \ | |||
| __asm__ __volatile__ (#op " %0, %%" #reg \ | |||
| : /* nothing */ \ | |||
| : "i" (imm) ) | |||
| #define mmx_m2r(op,mem,reg) \ | |||
| __asm__ __volatile__ (#op " %0, %%" #reg \ | |||
| : /* nothing */ \ | |||
| : "m" (mem)) | |||
| #define mmx_r2m(op,reg,mem) \ | |||
| __asm__ __volatile__ (#op " %%" #reg ", %0" \ | |||
| : "=m" (mem) \ | |||
| : /* nothing */ ) | |||
| #define mmx_r2r(op,regs,regd) \ | |||
| __asm__ __volatile__ (#op " %" #regs ", %" #regd) | |||
| #define emms() __asm__ __volatile__ ("emms") | |||
| #define movd_m2r(var,reg) mmx_m2r (movd, var, reg) | |||
| #define movd_r2m(reg,var) mmx_r2m (movd, reg, var) | |||
| #define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd) | |||
| #define movq_m2r(var,reg) mmx_m2r (movq, var, reg) | |||
| #define movq_r2m(reg,var) mmx_r2m (movq, reg, var) | |||
| #define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd) | |||
| #define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg) | |||
| #define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd) | |||
| #define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg) | |||
| #define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd) | |||
| #define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg) | |||
| #define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd) | |||
| #define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg) | |||
| #define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd) | |||
| #define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg) | |||
| #define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd) | |||
| #define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg) | |||
| #define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd) | |||
| #define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg) | |||
| #define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd) | |||
| #define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg) | |||
| #define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd) | |||
| #define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg) | |||
| #define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd) | |||
| #define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg) | |||
| #define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd) | |||
| #define pand_m2r(var,reg) mmx_m2r (pand, var, reg) | |||
| #define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd) | |||
| #define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg) | |||
| #define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd) | |||
| #define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg) | |||
| #define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd) | |||
| #define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg) | |||
| #define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd) | |||
| #define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg) | |||
| #define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd) | |||
| #define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg) | |||
| #define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd) | |||
| #define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg) | |||
| #define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd) | |||
| #define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg) | |||
| #define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd) | |||
| #define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg) | |||
| #define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd) | |||
| #define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg) | |||
| #define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd) | |||
| #define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg) | |||
| #define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd) | |||
| #define por_m2r(var,reg) mmx_m2r (por, var, reg) | |||
| #define por_r2r(regs,regd) mmx_r2r (por, regs, regd) | |||
| #define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg) | |||
| #define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg) | |||
| #define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd) | |||
| #define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg) | |||
| #define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg) | |||
| #define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd) | |||
| #define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg) | |||
| #define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg) | |||
| #define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd) | |||
| #define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg) | |||
| #define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg) | |||
| #define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd) | |||
| #define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg) | |||
| #define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg) | |||
| #define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd) | |||
| #define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg) | |||
| #define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg) | |||
| #define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd) | |||
| #define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg) | |||
| #define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg) | |||
| #define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd) | |||
| #define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg) | |||
| #define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg) | |||
| #define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd) | |||
| #define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg) | |||
| #define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd) | |||
| #define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg) | |||
| #define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd) | |||
| #define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg) | |||
| #define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd) | |||
| #define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg) | |||
| #define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd) | |||
| #define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg) | |||
| #define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd) | |||
| #define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg) | |||
| #define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd) | |||
| #define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg) | |||
| #define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd) | |||
| #define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg) | |||
| #define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd) | |||
| #define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg) | |||
| #define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd) | |||
| #define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg) | |||
| #define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd) | |||
| #define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg) | |||
| #define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd) | |||
| #define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg) | |||
| #define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd) | |||
| #define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg) | |||
| #define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd) | |||
| #define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg) | |||
| #define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd) | |||
| /* 3DNOW extensions */ | |||
| #define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg) | |||
| #define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd) | |||
| #define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg) | |||
| #define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd) | |||
| /* AMD MMX extensions - also available in intel SSE */ | |||
| #define mmx_m2ri(op,mem,reg,imm) \ | |||
| #define mmx_m2ri(op,mem,reg,imm) \ | |||
| __asm__ __volatile__ (#op " %1, %0, %%" #reg \ | |||
| : /* nothing */ \ | |||
| : "X" (mem), "X" (imm)) | |||
| #define mmx_r2ri(op,regs,regd,imm) \ | |||
| #define mmx_r2ri(op,regs,regd,imm) \ | |||
| __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ | |||
| : /* nothing */ \ | |||
| : "X" (imm) ) | |||
| #define mmx_fetch(mem,hint) \ | |||
| __asm__ __volatile__ ("prefetch" #hint " %0" \ | |||
| : /* nothing */ \ | |||
| : "X" (mem)) | |||
| #define mmx_fetch(mem,hint) \ | |||
| __asm__ __volatile__ ("prefetch" #hint " %0" \ | |||
| : /* nothing */ \ | |||
| : "X" (mem)) | |||
| #define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg) | |||
| #define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg) | |||
| #define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var) | |||
| #define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var) | |||
| #define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg) | |||
| #define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd) | |||
| #define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg) | |||
| #define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd) | |||
| #define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg) | |||
| #define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd) | |||
| #define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg) | |||
| #define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd) | |||
| #define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm) | |||
| #define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm) | |||
| #define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm) | |||
| #define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm) | |||
| #define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg) | |||
| #define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd) | |||
| #define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg) | |||
| #define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd) | |||
| #define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg) | |||
| #define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd) | |||
| #define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg) | |||
| #define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd) | |||
| #define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg) | |||
| #define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd) | |||
| #define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg) | |||
| #define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd) | |||
| #define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg) | |||
| #define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd) | |||
| #define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg) | |||
| #define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd) | |||
| #define pmovmskb(mmreg,reg) \ | |||
| __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg) | |||
| #define pmovmskb(mmreg,reg) \ | |||
| __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg) | |||
| #define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg) | |||
| #define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd) | |||
| #define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg) | |||
| #define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd) | |||
| #define prefetcht0(mem) mmx_fetch (mem, t0) | |||
| #define prefetcht1(mem) mmx_fetch (mem, t1) | |||
| #define prefetcht2(mem) mmx_fetch (mem, t2) | |||
| #define prefetchnta(mem) mmx_fetch (mem, nta) | |||
| #define prefetcht0(mem) mmx_fetch (mem, t0) | |||
| #define prefetcht1(mem) mmx_fetch (mem, t1) | |||
| #define prefetcht2(mem) mmx_fetch (mem, t2) | |||
| #define prefetchnta(mem) mmx_fetch (mem, nta) | |||
| #define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg) | |||
| #define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd) | |||
| #define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg) | |||
| #define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd) | |||
| #define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm) | |||
| #define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm) | |||
| #define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm) | |||
| #define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm) | |||
| #define sfence() __asm__ __volatile__ ("sfence\n\t") | |||
| #define sfence() __asm__ __volatile__ ("sfence\n\t") | |||
| /* SSE2 */ | |||
| #define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm) | |||
| #define pshufhw_r2r(regs,regd,imm) mmx_r2ri(pshufhw, regs, regd, imm) | |||
| #define pshuflw_m2r(var,reg,imm) mmx_m2ri(pshuflw, var, reg, imm) | |||
| #define pshuflw_r2r(regs,regd,imm) mmx_r2ri(pshuflw, regs, regd, imm) | |||
| #define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm) | |||
| #define pshufhw_r2r(regs,regd,imm) mmx_r2ri(pshufhw, regs, regd, imm) | |||
| #define pshuflw_m2r(var,reg,imm) mmx_m2ri(pshuflw, var, reg, imm) | |||
| #define pshuflw_r2r(regs,regd,imm) mmx_r2ri(pshuflw, regs, regd, imm) | |||
| #define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm) | |||
| #define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm) | |||
| #define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg) | |||
| #define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var) | |||
| #define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd) | |||
| #define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg) | |||
| #define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var) | |||
| #define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd) | |||
| #define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg) | |||
| #define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var) | |||
| #define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd) | |||
| #define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg) | |||
| #define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var) | |||
| #define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd) | |||
| #define pmullw_r2m(reg,var) mmx_r2m (pmullw, reg, var) | |||
| #define pmullw_r2m(reg,var) mmx_r2m (pmullw, reg, var) | |||
| #define pslldq_i2r(imm,reg) mmx_i2r (pslldq, imm, reg) | |||
| #define psrldq_i2r(imm,reg) mmx_i2r (psrldq, imm, reg) | |||
| #define pslldq_i2r(imm,reg) mmx_i2r (pslldq, imm, reg) | |||
| #define psrldq_i2r(imm,reg) mmx_i2r (psrldq, imm, reg) | |||
| #define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd) | |||
| #define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd) | |||
| #define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd) | |||
| #define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd) | |||
| #endif /* AVCODEC_I386MMX_H */ | |||
| @@ -34,33 +34,33 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||
| { | |||
| long len= -(stride*h); | |||
| asm volatile( | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm4 \n\t" | |||
| "add %3, %%"REG_a" \n\t" | |||
| "psubusb %%mm0, %%mm2 \n\t" | |||
| "psubusb %%mm4, %%mm0 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm5 \n\t" | |||
| "psubusb %%mm1, %%mm3 \n\t" | |||
| "psubusb %%mm5, %%mm1 \n\t" | |||
| "por %%mm2, %%mm0 \n\t" | |||
| "por %%mm1, %%mm3 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm3, %%mm2 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||
| "punpcklbw %%mm7, %%mm3 \n\t" | |||
| "punpckhbw %%mm7, %%mm2 \n\t" | |||
| "paddw %%mm1, %%mm0 \n\t" | |||
| "paddw %%mm3, %%mm2 \n\t" | |||
| "paddw %%mm2, %%mm0 \n\t" | |||
| "paddw %%mm0, %%mm6 \n\t" | |||
| "add %3, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm4 \n\t" | |||
| "add %3, %%"REG_a" \n\t" | |||
| "psubusb %%mm0, %%mm2 \n\t" | |||
| "psubusb %%mm4, %%mm0 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm5 \n\t" | |||
| "psubusb %%mm1, %%mm3 \n\t" | |||
| "psubusb %%mm5, %%mm1 \n\t" | |||
| "por %%mm2, %%mm0 \n\t" | |||
| "por %%mm1, %%mm3 \n\t" | |||
| "movq %%mm0, %%mm1 \n\t" | |||
| "movq %%mm3, %%mm2 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||
| "punpcklbw %%mm7, %%mm3 \n\t" | |||
| "punpckhbw %%mm7, %%mm2 \n\t" | |||
| "paddw %%mm1, %%mm0 \n\t" | |||
| "paddw %%mm3, %%mm2 \n\t" | |||
| "paddw %%mm2, %%mm0 \n\t" | |||
| "paddw %%mm0, %%mm6 \n\t" | |||
| "add %3, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| : "+a" (len) | |||
| : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride) | |||
| ); | |||
| @@ -70,19 +70,19 @@ static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||
| { | |||
| long len= -(stride*h); | |||
| asm volatile( | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||
| "psadbw %%mm2, %%mm0 \n\t" | |||
| "add %3, %%"REG_a" \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||
| "psadbw %%mm1, %%mm3 \n\t" | |||
| "paddw %%mm3, %%mm0 \n\t" | |||
| "paddw %%mm0, %%mm6 \n\t" | |||
| "add %3, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||
| "psadbw %%mm2, %%mm0 \n\t" | |||
| "add %3, %%"REG_a" \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||
| "psadbw %%mm1, %%mm3 \n\t" | |||
| "paddw %%mm3, %%mm0 \n\t" | |||
| "paddw %%mm0, %%mm6 \n\t" | |||
| "add %3, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| : "+a" (len) | |||
| : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride) | |||
| ); | |||
| @@ -92,23 +92,23 @@ static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, in | |||
| { | |||
| long len= -(stride*h); | |||
| asm volatile( | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||
| "pavgb %%mm2, %%mm0 \n\t" | |||
| "movq (%3, %%"REG_a"), %%mm2 \n\t" | |||
| "psadbw %%mm2, %%mm0 \n\t" | |||
| "add %4, %%"REG_a" \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||
| "pavgb %%mm1, %%mm3 \n\t" | |||
| "movq (%3, %%"REG_a"), %%mm1 \n\t" | |||
| "psadbw %%mm1, %%mm3 \n\t" | |||
| "paddw %%mm3, %%mm0 \n\t" | |||
| "paddw %%mm0, %%mm6 \n\t" | |||
| "add %4, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||
| "pavgb %%mm2, %%mm0 \n\t" | |||
| "movq (%3, %%"REG_a"), %%mm2 \n\t" | |||
| "psadbw %%mm2, %%mm0 \n\t" | |||
| "add %4, %%"REG_a" \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||
| "pavgb %%mm1, %%mm3 \n\t" | |||
| "movq (%3, %%"REG_a"), %%mm1 \n\t" | |||
| "psadbw %%mm1, %%mm3 \n\t" | |||
| "paddw %%mm3, %%mm0 \n\t" | |||
| "paddw %%mm0, %%mm6 \n\t" | |||
| "add %4, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| : "+a" (len) | |||
| : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride) | |||
| ); | |||
| @@ -118,34 +118,34 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||
| { //FIXME reuse src | |||
| long len= -(stride*h); | |||
| asm volatile( | |||
| ".balign 16 \n\t" | |||
| "movq "MANGLE(bone)", %%mm5 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||
| "movq 1(%1, %%"REG_a"), %%mm1 \n\t" | |||
| "movq 1(%2, %%"REG_a"), %%mm3 \n\t" | |||
| "pavgb %%mm2, %%mm0 \n\t" | |||
| "pavgb %%mm1, %%mm3 \n\t" | |||
| "psubusb %%mm5, %%mm3 \n\t" | |||
| "pavgb %%mm3, %%mm0 \n\t" | |||
| "movq (%3, %%"REG_a"), %%mm2 \n\t" | |||
| "psadbw %%mm2, %%mm0 \n\t" | |||
| "add %4, %%"REG_a" \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||
| "movq 1(%2, %%"REG_a"), %%mm4 \n\t" | |||
| "pavgb %%mm3, %%mm1 \n\t" | |||
| "pavgb %%mm4, %%mm2 \n\t" | |||
| "psubusb %%mm5, %%mm2 \n\t" | |||
| "pavgb %%mm1, %%mm2 \n\t" | |||
| "movq (%3, %%"REG_a"), %%mm1 \n\t" | |||
| "psadbw %%mm1, %%mm2 \n\t" | |||
| "paddw %%mm2, %%mm0 \n\t" | |||
| "paddw %%mm0, %%mm6 \n\t" | |||
| "add %4, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| ".balign 16 \n\t" | |||
| "movq "MANGLE(bone)", %%mm5 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm2 \n\t" | |||
| "movq 1(%1, %%"REG_a"), %%mm1 \n\t" | |||
| "movq 1(%2, %%"REG_a"), %%mm3 \n\t" | |||
| "pavgb %%mm2, %%mm0 \n\t" | |||
| "pavgb %%mm1, %%mm3 \n\t" | |||
| "psubusb %%mm5, %%mm3 \n\t" | |||
| "pavgb %%mm3, %%mm0 \n\t" | |||
| "movq (%3, %%"REG_a"), %%mm2 \n\t" | |||
| "psadbw %%mm2, %%mm0 \n\t" | |||
| "add %4, %%"REG_a" \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||
| "movq 1(%2, %%"REG_a"), %%mm4 \n\t" | |||
| "pavgb %%mm3, %%mm1 \n\t" | |||
| "pavgb %%mm4, %%mm2 \n\t" | |||
| "psubusb %%mm5, %%mm2 \n\t" | |||
| "pavgb %%mm1, %%mm2 \n\t" | |||
| "movq (%3, %%"REG_a"), %%mm1 \n\t" | |||
| "psadbw %%mm1, %%mm2 \n\t" | |||
| "paddw %%mm2, %%mm0 \n\t" | |||
| "paddw %%mm0, %%mm6 \n\t" | |||
| "add %4, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| : "+a" (len) | |||
| : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride) | |||
| ); | |||
| @@ -155,35 +155,35 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int | |||
| { | |||
| long len= -(stride*h); | |||
| asm volatile( | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpcklbw %%mm7, %%mm1 \n\t" | |||
| "punpckhbw %%mm7, %%mm2 \n\t" | |||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||
| "paddw %%mm0, %%mm1 \n\t" | |||
| "paddw %%mm2, %%mm3 \n\t" | |||
| "movq (%3, %%"REG_a"), %%mm4 \n\t" | |||
| "movq (%3, %%"REG_a"), %%mm2 \n\t" | |||
| "paddw %%mm5, %%mm1 \n\t" | |||
| "paddw %%mm5, %%mm3 \n\t" | |||
| "psrlw $1, %%mm1 \n\t" | |||
| "psrlw $1, %%mm3 \n\t" | |||
| "packuswb %%mm3, %%mm1 \n\t" | |||
| "psubusb %%mm1, %%mm4 \n\t" | |||
| "psubusb %%mm2, %%mm1 \n\t" | |||
| "por %%mm4, %%mm1 \n\t" | |||
| "movq %%mm1, %%mm0 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||
| "paddw %%mm1, %%mm0 \n\t" | |||
| "paddw %%mm0, %%mm6 \n\t" | |||
| "add %4, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm2 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm3 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpcklbw %%mm7, %%mm1 \n\t" | |||
| "punpckhbw %%mm7, %%mm2 \n\t" | |||
| "punpckhbw %%mm7, %%mm3 \n\t" | |||
| "paddw %%mm0, %%mm1 \n\t" | |||
| "paddw %%mm2, %%mm3 \n\t" | |||
| "movq (%3, %%"REG_a"), %%mm4 \n\t" | |||
| "movq (%3, %%"REG_a"), %%mm2 \n\t" | |||
| "paddw %%mm5, %%mm1 \n\t" | |||
| "paddw %%mm5, %%mm3 \n\t" | |||
| "psrlw $1, %%mm1 \n\t" | |||
| "psrlw $1, %%mm3 \n\t" | |||
| "packuswb %%mm3, %%mm1 \n\t" | |||
| "psubusb %%mm1, %%mm4 \n\t" | |||
| "psubusb %%mm2, %%mm1 \n\t" | |||
| "por %%mm4, %%mm1 \n\t" | |||
| "movq %%mm1, %%mm0 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||
| "paddw %%mm1, %%mm0 \n\t" | |||
| "paddw %%mm0, %%mm6 \n\t" | |||
| "add %4, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| : "+a" (len) | |||
| : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride) | |||
| ); | |||
| @@ -193,47 +193,47 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | |||
| { | |||
| long len= -(stride*h); | |||
| asm volatile( | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm1 \n\t" | |||
| "movq %%mm0, %%mm4 \n\t" | |||
| "movq %%mm1, %%mm2 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpcklbw %%mm7, %%mm1 \n\t" | |||
| "punpckhbw %%mm7, %%mm4 \n\t" | |||
| "punpckhbw %%mm7, %%mm2 \n\t" | |||
| "paddw %%mm1, %%mm0 \n\t" | |||
| "paddw %%mm2, %%mm4 \n\t" | |||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||
| "movq 1(%2, %%"REG_a"), %%mm3 \n\t" | |||
| "movq %%mm2, %%mm1 \n\t" | |||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||
| "paddw %%mm0, %%mm2 \n\t" | |||
| "paddw %%mm4, %%mm1 \n\t" | |||
| "movq %%mm3, %%mm4 \n\t" | |||
| "punpcklbw %%mm7, %%mm3 \n\t" | |||
| "punpckhbw %%mm7, %%mm4 \n\t" | |||
| "paddw %%mm3, %%mm2 \n\t" | |||
| "paddw %%mm4, %%mm1 \n\t" | |||
| "movq (%3, %%"REG_a"), %%mm3 \n\t" | |||
| "movq (%3, %%"REG_a"), %%mm4 \n\t" | |||
| "paddw %%mm5, %%mm2 \n\t" | |||
| "paddw %%mm5, %%mm1 \n\t" | |||
| "psrlw $2, %%mm2 \n\t" | |||
| "psrlw $2, %%mm1 \n\t" | |||
| "packuswb %%mm1, %%mm2 \n\t" | |||
| "psubusb %%mm2, %%mm3 \n\t" | |||
| "psubusb %%mm4, %%mm2 \n\t" | |||
| "por %%mm3, %%mm2 \n\t" | |||
| "movq %%mm2, %%mm0 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpckhbw %%mm7, %%mm2 \n\t" | |||
| "paddw %%mm2, %%mm0 \n\t" | |||
| "paddw %%mm0, %%mm6 \n\t" | |||
| "add %4, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" | |||
| "movq (%2, %%"REG_a"), %%mm1 \n\t" | |||
| "movq %%mm0, %%mm4 \n\t" | |||
| "movq %%mm1, %%mm2 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpcklbw %%mm7, %%mm1 \n\t" | |||
| "punpckhbw %%mm7, %%mm4 \n\t" | |||
| "punpckhbw %%mm7, %%mm2 \n\t" | |||
| "paddw %%mm1, %%mm0 \n\t" | |||
| "paddw %%mm2, %%mm4 \n\t" | |||
| "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |||
| "movq 1(%2, %%"REG_a"), %%mm3 \n\t" | |||
| "movq %%mm2, %%mm1 \n\t" | |||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||
| "punpckhbw %%mm7, %%mm1 \n\t" | |||
| "paddw %%mm0, %%mm2 \n\t" | |||
| "paddw %%mm4, %%mm1 \n\t" | |||
| "movq %%mm3, %%mm4 \n\t" | |||
| "punpcklbw %%mm7, %%mm3 \n\t" | |||
| "punpckhbw %%mm7, %%mm4 \n\t" | |||
| "paddw %%mm3, %%mm2 \n\t" | |||
| "paddw %%mm4, %%mm1 \n\t" | |||
| "movq (%3, %%"REG_a"), %%mm3 \n\t" | |||
| "movq (%3, %%"REG_a"), %%mm4 \n\t" | |||
| "paddw %%mm5, %%mm2 \n\t" | |||
| "paddw %%mm5, %%mm1 \n\t" | |||
| "psrlw $2, %%mm2 \n\t" | |||
| "psrlw $2, %%mm1 \n\t" | |||
| "packuswb %%mm1, %%mm2 \n\t" | |||
| "psubusb %%mm2, %%mm3 \n\t" | |||
| "psubusb %%mm4, %%mm2 \n\t" | |||
| "por %%mm3, %%mm2 \n\t" | |||
| "movq %%mm2, %%mm0 \n\t" | |||
| "punpcklbw %%mm7, %%mm0 \n\t" | |||
| "punpckhbw %%mm7, %%mm2 \n\t" | |||
| "paddw %%mm2, %%mm0 \n\t" | |||
| "paddw %%mm0, %%mm6 \n\t" | |||
| "add %4, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| : "+a" (len) | |||
| : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride) | |||
| ); | |||
| @@ -243,13 +243,13 @@ static inline int sum_mmx(void) | |||
| { | |||
| int ret; | |||
| asm volatile( | |||
| "movq %%mm6, %%mm0 \n\t" | |||
| "psrlq $32, %%mm6 \n\t" | |||
| "paddw %%mm0, %%mm6 \n\t" | |||
| "movq %%mm6, %%mm0 \n\t" | |||
| "psrlq $16, %%mm6 \n\t" | |||
| "paddw %%mm0, %%mm6 \n\t" | |||
| "movd %%mm6, %0 \n\t" | |||
| "movq %%mm6, %%mm0 \n\t" | |||
| "psrlq $32, %%mm6 \n\t" | |||
| "paddw %%mm0, %%mm6 \n\t" | |||
| "movq %%mm6, %%mm0 \n\t" | |||
| "psrlq $16, %%mm6 \n\t" | |||
| "paddw %%mm0, %%mm6 \n\t" | |||
| "movd %%mm6, %0 \n\t" | |||
| : "=r" (ret) | |||
| ); | |||
| return ret&0xFFFF; | |||
| @@ -259,7 +259,7 @@ static inline int sum_mmx2(void) | |||
| { | |||
| int ret; | |||
| asm volatile( | |||
| "movd %%mm6, %0 \n\t" | |||
| "movd %%mm6, %0 \n\t" | |||
| : "=r" (ret) | |||
| ); | |||
| return ret; | |||
| @@ -270,8 +270,8 @@ static inline int sum_mmx2(void) | |||
| static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |||
| {\ | |||
| assert(h==8);\ | |||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t":);\ | |||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t":);\ | |||
| \ | |||
| sad8_1_ ## suf(blk1, blk2, stride, 8);\ | |||
| \ | |||
| @@ -280,9 +280,9 @@ static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h | |||
| static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |||
| {\ | |||
| assert(h==8);\ | |||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t"\ | |||
| "movq %0, %%mm5 \n\t"\ | |||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t"\ | |||
| "movq %0, %%mm5 \n\t"\ | |||
| :: "m"(round_tab[1]) \ | |||
| );\ | |||
| \ | |||
| @@ -294,9 +294,9 @@ static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in | |||
| static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |||
| {\ | |||
| assert(h==8);\ | |||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t"\ | |||
| "movq %0, %%mm5 \n\t"\ | |||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t"\ | |||
| "movq %0, %%mm5 \n\t"\ | |||
| :: "m"(round_tab[1]) \ | |||
| );\ | |||
| \ | |||
| @@ -308,9 +308,9 @@ static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in | |||
| static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |||
| {\ | |||
| assert(h==8);\ | |||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t"\ | |||
| "movq %0, %%mm5 \n\t"\ | |||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t"\ | |||
| "movq %0, %%mm5 \n\t"\ | |||
| :: "m"(round_tab[2]) \ | |||
| );\ | |||
| \ | |||
| @@ -321,8 +321,8 @@ static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i | |||
| \ | |||
| static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |||
| {\ | |||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t":);\ | |||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t":);\ | |||
| \ | |||
| sad8_1_ ## suf(blk1 , blk2 , stride, h);\ | |||
| sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\ | |||
| @@ -331,9 +331,9 @@ static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int | |||
| }\ | |||
| static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |||
| {\ | |||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t"\ | |||
| "movq %0, %%mm5 \n\t"\ | |||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t"\ | |||
| "movq %0, %%mm5 \n\t"\ | |||
| :: "m"(round_tab[1]) \ | |||
| );\ | |||
| \ | |||
| @@ -344,9 +344,9 @@ static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i | |||
| }\ | |||
| static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |||
| {\ | |||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t"\ | |||
| "movq %0, %%mm5 \n\t"\ | |||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t"\ | |||
| "movq %0, %%mm5 \n\t"\ | |||
| :: "m"(round_tab[1]) \ | |||
| );\ | |||
| \ | |||
| @@ -357,9 +357,9 @@ static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i | |||
| }\ | |||
| static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | |||
| {\ | |||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t"\ | |||
| "movq %0, %%mm5 \n\t"\ | |||
| asm volatile("pxor %%mm7, %%mm7 \n\t"\ | |||
| "pxor %%mm6, %%mm6 \n\t"\ | |||
| "movq %0, %%mm5 \n\t"\ | |||
| :: "m"(round_tab[2]) \ | |||
| );\ | |||
| \ | |||
| @@ -384,15 +384,15 @@ void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx) | |||
| c->pix_abs[1][2] = sad8_y2_mmx; | |||
| c->pix_abs[1][3] = sad8_xy2_mmx; | |||
| c->sad[0]= sad16_mmx; | |||
| c->sad[0]= sad16_mmx; | |||
| c->sad[1]= sad8_mmx; | |||
| } | |||
| if (mm_flags & MM_MMXEXT) { | |||
| c->pix_abs[0][0] = sad16_mmx2; | |||
| c->pix_abs[1][0] = sad8_mmx2; | |||
| c->pix_abs[0][0] = sad16_mmx2; | |||
| c->pix_abs[1][0] = sad8_mmx2; | |||
| c->sad[0]= sad16_mmx2; | |||
| c->sad[1]= sad8_mmx2; | |||
| c->sad[0]= sad16_mmx2; | |||
| c->sad[1]= sad8_mmx2; | |||
| if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | |||
| c->pix_abs[0][1] = sad16_x2_mmx2; | |||
| @@ -57,52 +57,52 @@ static void dct_unquantize_h263_intra_mmx(MpegEncContext *s, | |||
| nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; | |||
| //printf("%d %d ", qmul, qadd); | |||
| asm volatile( | |||
| "movd %1, %%mm6 \n\t" //qmul | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "movd %2, %%mm5 \n\t" //qadd | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "packssdw %%mm5, %%mm5 \n\t" | |||
| "packssdw %%mm5, %%mm5 \n\t" | |||
| "psubw %%mm5, %%mm7 \n\t" | |||
| "pxor %%mm4, %%mm4 \n\t" | |||
| ".balign 16\n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %3), %%mm0 \n\t" | |||
| "movq 8(%0, %3), %%mm1 \n\t" | |||
| "pmullw %%mm6, %%mm0 \n\t" | |||
| "pmullw %%mm6, %%mm1 \n\t" | |||
| "movq (%0, %3), %%mm2 \n\t" | |||
| "movq 8(%0, %3), %%mm3 \n\t" | |||
| "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "paddw %%mm7, %%mm0 \n\t" | |||
| "paddw %%mm7, %%mm1 \n\t" | |||
| "pxor %%mm0, %%mm2 \n\t" | |||
| "pxor %%mm1, %%mm3 \n\t" | |||
| "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 | |||
| "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 | |||
| "pandn %%mm2, %%mm0 \n\t" | |||
| "pandn %%mm3, %%mm1 \n\t" | |||
| "movq %%mm0, (%0, %3) \n\t" | |||
| "movq %%mm1, 8(%0, %3) \n\t" | |||
| "add $16, %3 \n\t" | |||
| "jng 1b \n\t" | |||
| ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) | |||
| : "memory" | |||
| ); | |||
| "movd %1, %%mm6 \n\t" //qmul | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "movd %2, %%mm5 \n\t" //qadd | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "packssdw %%mm5, %%mm5 \n\t" | |||
| "packssdw %%mm5, %%mm5 \n\t" | |||
| "psubw %%mm5, %%mm7 \n\t" | |||
| "pxor %%mm4, %%mm4 \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %3), %%mm0 \n\t" | |||
| "movq 8(%0, %3), %%mm1 \n\t" | |||
| "pmullw %%mm6, %%mm0 \n\t" | |||
| "pmullw %%mm6, %%mm1 \n\t" | |||
| "movq (%0, %3), %%mm2 \n\t" | |||
| "movq 8(%0, %3), %%mm3 \n\t" | |||
| "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "paddw %%mm7, %%mm0 \n\t" | |||
| "paddw %%mm7, %%mm1 \n\t" | |||
| "pxor %%mm0, %%mm2 \n\t" | |||
| "pxor %%mm1, %%mm3 \n\t" | |||
| "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 | |||
| "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 | |||
| "pandn %%mm2, %%mm0 \n\t" | |||
| "pandn %%mm3, %%mm1 \n\t" | |||
| "movq %%mm0, (%0, %3) \n\t" | |||
| "movq %%mm1, 8(%0, %3) \n\t" | |||
| "add $16, %3 \n\t" | |||
| "jng 1b \n\t" | |||
| ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) | |||
| : "memory" | |||
| ); | |||
| block[0]= level; | |||
| } | |||
| @@ -120,52 +120,52 @@ static void dct_unquantize_h263_inter_mmx(MpegEncContext *s, | |||
| nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; | |||
| //printf("%d %d ", qmul, qadd); | |||
| asm volatile( | |||
| "movd %1, %%mm6 \n\t" //qmul | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "movd %2, %%mm5 \n\t" //qadd | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "packssdw %%mm5, %%mm5 \n\t" | |||
| "packssdw %%mm5, %%mm5 \n\t" | |||
| "psubw %%mm5, %%mm7 \n\t" | |||
| "pxor %%mm4, %%mm4 \n\t" | |||
| ".balign 16\n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %3), %%mm0 \n\t" | |||
| "movq 8(%0, %3), %%mm1 \n\t" | |||
| "pmullw %%mm6, %%mm0 \n\t" | |||
| "pmullw %%mm6, %%mm1 \n\t" | |||
| "movq (%0, %3), %%mm2 \n\t" | |||
| "movq 8(%0, %3), %%mm3 \n\t" | |||
| "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "paddw %%mm7, %%mm0 \n\t" | |||
| "paddw %%mm7, %%mm1 \n\t" | |||
| "pxor %%mm0, %%mm2 \n\t" | |||
| "pxor %%mm1, %%mm3 \n\t" | |||
| "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 | |||
| "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 | |||
| "pandn %%mm2, %%mm0 \n\t" | |||
| "pandn %%mm3, %%mm1 \n\t" | |||
| "movq %%mm0, (%0, %3) \n\t" | |||
| "movq %%mm1, 8(%0, %3) \n\t" | |||
| "add $16, %3 \n\t" | |||
| "jng 1b \n\t" | |||
| ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) | |||
| : "memory" | |||
| ); | |||
| "movd %1, %%mm6 \n\t" //qmul | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "movd %2, %%mm5 \n\t" //qadd | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "packssdw %%mm5, %%mm5 \n\t" | |||
| "packssdw %%mm5, %%mm5 \n\t" | |||
| "psubw %%mm5, %%mm7 \n\t" | |||
| "pxor %%mm4, %%mm4 \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %3), %%mm0 \n\t" | |||
| "movq 8(%0, %3), %%mm1 \n\t" | |||
| "pmullw %%mm6, %%mm0 \n\t" | |||
| "pmullw %%mm6, %%mm1 \n\t" | |||
| "movq (%0, %3), %%mm2 \n\t" | |||
| "movq 8(%0, %3), %%mm3 \n\t" | |||
| "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "paddw %%mm7, %%mm0 \n\t" | |||
| "paddw %%mm7, %%mm1 \n\t" | |||
| "pxor %%mm0, %%mm2 \n\t" | |||
| "pxor %%mm1, %%mm3 \n\t" | |||
| "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 | |||
| "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 | |||
| "pandn %%mm2, %%mm0 \n\t" | |||
| "pandn %%mm3, %%mm1 \n\t" | |||
| "movq %%mm0, (%0, %3) \n\t" | |||
| "movq %%mm1, 8(%0, %3) \n\t" | |||
| "add $16, %3 \n\t" | |||
| "jng 1b \n\t" | |||
| ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) | |||
| : "memory" | |||
| ); | |||
| } | |||
| @@ -216,54 +216,54 @@ static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s, | |||
| /* XXX: only mpeg1 */ | |||
| quant_matrix = s->intra_matrix; | |||
| asm volatile( | |||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||
| "psrlw $15, %%mm7 \n\t" | |||
| "movd %2, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "mov %3, %%"REG_a" \n\t" | |||
| ".balign 16\n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm4 \n\t" | |||
| "movq 8(%1, %%"REG_a"), %%mm5 \n\t" | |||
| "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] | |||
| "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] | |||
| "pxor %%mm2, %%mm2 \n\t" | |||
| "pxor %%mm3, %%mm3 \n\t" | |||
| "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) | |||
| "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) | |||
| "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q | |||
| "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q | |||
| "pxor %%mm4, %%mm4 \n\t" | |||
| "pxor %%mm5, %%mm5 \n\t" // FIXME slow | |||
| "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | |||
| "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | |||
| "psraw $3, %%mm0 \n\t" | |||
| "psraw $3, %%mm1 \n\t" | |||
| "psubw %%mm7, %%mm0 \n\t" | |||
| "psubw %%mm7, %%mm1 \n\t" | |||
| "por %%mm7, %%mm0 \n\t" | |||
| "por %%mm7, %%mm1 \n\t" | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "psubw %%mm2, %%mm0 \n\t" | |||
| "psubw %%mm3, %%mm1 \n\t" | |||
| "pandn %%mm0, %%mm4 \n\t" | |||
| "pandn %%mm1, %%mm5 \n\t" | |||
| "movq %%mm4, (%0, %%"REG_a") \n\t" | |||
| "movq %%mm5, 8(%0, %%"REG_a") \n\t" | |||
| "add $16, %%"REG_a" \n\t" | |||
| "js 1b \n\t" | |||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) | |||
| : "%"REG_a, "memory" | |||
| ); | |||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||
| "psrlw $15, %%mm7 \n\t" | |||
| "movd %2, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "mov %3, %%"REG_a" \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm4 \n\t" | |||
| "movq 8(%1, %%"REG_a"), %%mm5 \n\t" | |||
| "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] | |||
| "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] | |||
| "pxor %%mm2, %%mm2 \n\t" | |||
| "pxor %%mm3, %%mm3 \n\t" | |||
| "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) | |||
| "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) | |||
| "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q | |||
| "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q | |||
| "pxor %%mm4, %%mm4 \n\t" | |||
| "pxor %%mm5, %%mm5 \n\t" // FIXME slow | |||
| "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | |||
| "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | |||
| "psraw $3, %%mm0 \n\t" | |||
| "psraw $3, %%mm1 \n\t" | |||
| "psubw %%mm7, %%mm0 \n\t" | |||
| "psubw %%mm7, %%mm1 \n\t" | |||
| "por %%mm7, %%mm0 \n\t" | |||
| "por %%mm7, %%mm1 \n\t" | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "psubw %%mm2, %%mm0 \n\t" | |||
| "psubw %%mm3, %%mm1 \n\t" | |||
| "pandn %%mm0, %%mm4 \n\t" | |||
| "pandn %%mm1, %%mm5 \n\t" | |||
| "movq %%mm4, (%0, %%"REG_a") \n\t" | |||
| "movq %%mm5, 8(%0, %%"REG_a") \n\t" | |||
| "add $16, %%"REG_a" \n\t" | |||
| "js 1b \n\t" | |||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) | |||
| : "%"REG_a, "memory" | |||
| ); | |||
| block[0]= block0; | |||
| } | |||
| @@ -279,58 +279,58 @@ static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s, | |||
| quant_matrix = s->inter_matrix; | |||
| asm volatile( | |||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||
| "psrlw $15, %%mm7 \n\t" | |||
| "movd %2, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "mov %3, %%"REG_a" \n\t" | |||
| ".balign 16\n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm4 \n\t" | |||
| "movq 8(%1, %%"REG_a"), %%mm5 \n\t" | |||
| "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] | |||
| "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] | |||
| "pxor %%mm2, %%mm2 \n\t" | |||
| "pxor %%mm3, %%mm3 \n\t" | |||
| "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) | |||
| "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) | |||
| "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2 | |||
| "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2 | |||
| "paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1 | |||
| "paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1 | |||
| "pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q | |||
| "pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q | |||
| "pxor %%mm4, %%mm4 \n\t" | |||
| "pxor %%mm5, %%mm5 \n\t" // FIXME slow | |||
| "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | |||
| "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | |||
| "psraw $4, %%mm0 \n\t" | |||
| "psraw $4, %%mm1 \n\t" | |||
| "psubw %%mm7, %%mm0 \n\t" | |||
| "psubw %%mm7, %%mm1 \n\t" | |||
| "por %%mm7, %%mm0 \n\t" | |||
| "por %%mm7, %%mm1 \n\t" | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "psubw %%mm2, %%mm0 \n\t" | |||
| "psubw %%mm3, %%mm1 \n\t" | |||
| "pandn %%mm0, %%mm4 \n\t" | |||
| "pandn %%mm1, %%mm5 \n\t" | |||
| "movq %%mm4, (%0, %%"REG_a") \n\t" | |||
| "movq %%mm5, 8(%0, %%"REG_a") \n\t" | |||
| "add $16, %%"REG_a" \n\t" | |||
| "js 1b \n\t" | |||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) | |||
| : "%"REG_a, "memory" | |||
| ); | |||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||
| "psrlw $15, %%mm7 \n\t" | |||
| "movd %2, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "mov %3, %%"REG_a" \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm4 \n\t" | |||
| "movq 8(%1, %%"REG_a"), %%mm5 \n\t" | |||
| "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] | |||
| "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] | |||
| "pxor %%mm2, %%mm2 \n\t" | |||
| "pxor %%mm3, %%mm3 \n\t" | |||
| "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) | |||
| "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) | |||
| "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2 | |||
| "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2 | |||
| "paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1 | |||
| "paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1 | |||
| "pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q | |||
| "pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q | |||
| "pxor %%mm4, %%mm4 \n\t" | |||
| "pxor %%mm5, %%mm5 \n\t" // FIXME slow | |||
| "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | |||
| "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | |||
| "psraw $4, %%mm0 \n\t" | |||
| "psraw $4, %%mm1 \n\t" | |||
| "psubw %%mm7, %%mm0 \n\t" | |||
| "psubw %%mm7, %%mm1 \n\t" | |||
| "por %%mm7, %%mm0 \n\t" | |||
| "por %%mm7, %%mm1 \n\t" | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "psubw %%mm2, %%mm0 \n\t" | |||
| "psubw %%mm3, %%mm1 \n\t" | |||
| "pandn %%mm0, %%mm4 \n\t" | |||
| "pandn %%mm1, %%mm5 \n\t" | |||
| "movq %%mm4, (%0, %%"REG_a") \n\t" | |||
| "movq %%mm5, 8(%0, %%"REG_a") \n\t" | |||
| "add $16, %%"REG_a" \n\t" | |||
| "js 1b \n\t" | |||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) | |||
| : "%"REG_a, "memory" | |||
| ); | |||
| } | |||
| static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s, | |||
| @@ -351,50 +351,50 @@ static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s, | |||
| block0 = block[0] * s->c_dc_scale; | |||
| quant_matrix = s->intra_matrix; | |||
| asm volatile( | |||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||
| "psrlw $15, %%mm7 \n\t" | |||
| "movd %2, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "mov %3, %%"REG_a" \n\t" | |||
| ".balign 16\n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm4 \n\t" | |||
| "movq 8(%1, %%"REG_a"), %%mm5 \n\t" | |||
| "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] | |||
| "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] | |||
| "pxor %%mm2, %%mm2 \n\t" | |||
| "pxor %%mm3, %%mm3 \n\t" | |||
| "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) | |||
| "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) | |||
| "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q | |||
| "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q | |||
| "pxor %%mm4, %%mm4 \n\t" | |||
| "pxor %%mm5, %%mm5 \n\t" // FIXME slow | |||
| "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | |||
| "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | |||
| "psraw $3, %%mm0 \n\t" | |||
| "psraw $3, %%mm1 \n\t" | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "psubw %%mm2, %%mm0 \n\t" | |||
| "psubw %%mm3, %%mm1 \n\t" | |||
| "pandn %%mm0, %%mm4 \n\t" | |||
| "pandn %%mm1, %%mm5 \n\t" | |||
| "movq %%mm4, (%0, %%"REG_a") \n\t" | |||
| "movq %%mm5, 8(%0, %%"REG_a") \n\t" | |||
| "add $16, %%"REG_a" \n\t" | |||
| "jng 1b \n\t" | |||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) | |||
| : "%"REG_a, "memory" | |||
| ); | |||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||
| "psrlw $15, %%mm7 \n\t" | |||
| "movd %2, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "mov %3, %%"REG_a" \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm4 \n\t" | |||
| "movq 8(%1, %%"REG_a"), %%mm5 \n\t" | |||
| "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] | |||
| "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] | |||
| "pxor %%mm2, %%mm2 \n\t" | |||
| "pxor %%mm3, %%mm3 \n\t" | |||
| "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) | |||
| "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) | |||
| "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q | |||
| "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q | |||
| "pxor %%mm4, %%mm4 \n\t" | |||
| "pxor %%mm5, %%mm5 \n\t" // FIXME slow | |||
| "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | |||
| "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | |||
| "psraw $3, %%mm0 \n\t" | |||
| "psraw $3, %%mm1 \n\t" | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "psubw %%mm2, %%mm0 \n\t" | |||
| "psubw %%mm3, %%mm1 \n\t" | |||
| "pandn %%mm0, %%mm4 \n\t" | |||
| "pandn %%mm1, %%mm5 \n\t" | |||
| "movq %%mm4, (%0, %%"REG_a") \n\t" | |||
| "movq %%mm5, 8(%0, %%"REG_a") \n\t" | |||
| "add $16, %%"REG_a" \n\t" | |||
| "jng 1b \n\t" | |||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) | |||
| : "%"REG_a, "memory" | |||
| ); | |||
| block[0]= block0; | |||
| //Note, we dont do mismatch control for intra as errors cannot accumulate | |||
| } | |||
| @@ -412,68 +412,68 @@ static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s, | |||
| quant_matrix = s->inter_matrix; | |||
| asm volatile( | |||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||
| "psrlq $48, %%mm7 \n\t" | |||
| "movd %2, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "mov %3, %%"REG_a" \n\t" | |||
| ".balign 16\n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm4 \n\t" | |||
| "movq 8(%1, %%"REG_a"), %%mm5 \n\t" | |||
| "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] | |||
| "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] | |||
| "pxor %%mm2, %%mm2 \n\t" | |||
| "pxor %%mm3, %%mm3 \n\t" | |||
| "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) | |||
| "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) | |||
| "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2 | |||
| "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2 | |||
| "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*2*q | |||
| "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*2*q | |||
| "paddw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q | |||
| "paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q | |||
| "pxor %%mm4, %%mm4 \n\t" | |||
| "pxor %%mm5, %%mm5 \n\t" // FIXME slow | |||
| "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | |||
| "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | |||
| "psrlw $4, %%mm0 \n\t" | |||
| "psrlw $4, %%mm1 \n\t" | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "psubw %%mm2, %%mm0 \n\t" | |||
| "psubw %%mm3, %%mm1 \n\t" | |||
| "pandn %%mm0, %%mm4 \n\t" | |||
| "pandn %%mm1, %%mm5 \n\t" | |||
| "pxor %%mm4, %%mm7 \n\t" | |||
| "pxor %%mm5, %%mm7 \n\t" | |||
| "movq %%mm4, (%0, %%"REG_a") \n\t" | |||
| "movq %%mm5, 8(%0, %%"REG_a") \n\t" | |||
| "add $16, %%"REG_a" \n\t" | |||
| "jng 1b \n\t" | |||
| "movd 124(%0, %3), %%mm0 \n\t" | |||
| "movq %%mm7, %%mm6 \n\t" | |||
| "psrlq $32, %%mm7 \n\t" | |||
| "pxor %%mm6, %%mm7 \n\t" | |||
| "movq %%mm7, %%mm6 \n\t" | |||
| "psrlq $16, %%mm7 \n\t" | |||
| "pxor %%mm6, %%mm7 \n\t" | |||
| "pslld $31, %%mm7 \n\t" | |||
| "psrlq $15, %%mm7 \n\t" | |||
| "pxor %%mm7, %%mm0 \n\t" | |||
| "movd %%mm0, 124(%0, %3) \n\t" | |||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs) | |||
| : "%"REG_a, "memory" | |||
| ); | |||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||
| "psrlq $48, %%mm7 \n\t" | |||
| "movd %2, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| "mov %3, %%"REG_a" \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%0, %%"REG_a"), %%mm0 \n\t" | |||
| "movq 8(%0, %%"REG_a"), %%mm1 \n\t" | |||
| "movq (%1, %%"REG_a"), %%mm4 \n\t" | |||
| "movq 8(%1, %%"REG_a"), %%mm5 \n\t" | |||
| "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] | |||
| "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] | |||
| "pxor %%mm2, %%mm2 \n\t" | |||
| "pxor %%mm3, %%mm3 \n\t" | |||
| "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) | |||
| "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) | |||
| "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2 | |||
| "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2 | |||
| "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*2*q | |||
| "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*2*q | |||
| "paddw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q | |||
| "paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q | |||
| "pxor %%mm4, %%mm4 \n\t" | |||
| "pxor %%mm5, %%mm5 \n\t" // FIXME slow | |||
| "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 | |||
| "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 | |||
| "psrlw $4, %%mm0 \n\t" | |||
| "psrlw $4, %%mm1 \n\t" | |||
| "pxor %%mm2, %%mm0 \n\t" | |||
| "pxor %%mm3, %%mm1 \n\t" | |||
| "psubw %%mm2, %%mm0 \n\t" | |||
| "psubw %%mm3, %%mm1 \n\t" | |||
| "pandn %%mm0, %%mm4 \n\t" | |||
| "pandn %%mm1, %%mm5 \n\t" | |||
| "pxor %%mm4, %%mm7 \n\t" | |||
| "pxor %%mm5, %%mm7 \n\t" | |||
| "movq %%mm4, (%0, %%"REG_a") \n\t" | |||
| "movq %%mm5, 8(%0, %%"REG_a") \n\t" | |||
| "add $16, %%"REG_a" \n\t" | |||
| "jng 1b \n\t" | |||
| "movd 124(%0, %3), %%mm0 \n\t" | |||
| "movq %%mm7, %%mm6 \n\t" | |||
| "psrlq $32, %%mm7 \n\t" | |||
| "pxor %%mm6, %%mm7 \n\t" | |||
| "movq %%mm7, %%mm6 \n\t" | |||
| "psrlq $16, %%mm7 \n\t" | |||
| "pxor %%mm6, %%mm7 \n\t" | |||
| "pslld $31, %%mm7 \n\t" | |||
| "psrlq $15, %%mm7 \n\t" | |||
| "pxor %%mm7, %%mm0 \n\t" | |||
| "movd %%mm0, 124(%0, %3) \n\t" | |||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs) | |||
| : "%"REG_a, "memory" | |||
| ); | |||
| } | |||
| /* draw the edges of width 'w' of an image of size width, height | |||
| @@ -488,79 +488,79 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) | |||
| ptr = buf; | |||
| if(w==8) | |||
| { | |||
| asm volatile( | |||
| "1: \n\t" | |||
| "movd (%0), %%mm0 \n\t" | |||
| "punpcklbw %%mm0, %%mm0 \n\t" | |||
| "punpcklwd %%mm0, %%mm0 \n\t" | |||
| "punpckldq %%mm0, %%mm0 \n\t" | |||
| "movq %%mm0, -8(%0) \n\t" | |||
| "movq -8(%0, %2), %%mm1 \n\t" | |||
| "punpckhbw %%mm1, %%mm1 \n\t" | |||
| "punpckhwd %%mm1, %%mm1 \n\t" | |||
| "punpckhdq %%mm1, %%mm1 \n\t" | |||
| "movq %%mm1, (%0, %2) \n\t" | |||
| "add %1, %0 \n\t" | |||
| "cmp %3, %0 \n\t" | |||
| " jb 1b \n\t" | |||
| : "+r" (ptr) | |||
| : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) | |||
| ); | |||
| asm volatile( | |||
| "1: \n\t" | |||
| "movd (%0), %%mm0 \n\t" | |||
| "punpcklbw %%mm0, %%mm0 \n\t" | |||
| "punpcklwd %%mm0, %%mm0 \n\t" | |||
| "punpckldq %%mm0, %%mm0 \n\t" | |||
| "movq %%mm0, -8(%0) \n\t" | |||
| "movq -8(%0, %2), %%mm1 \n\t" | |||
| "punpckhbw %%mm1, %%mm1 \n\t" | |||
| "punpckhwd %%mm1, %%mm1 \n\t" | |||
| "punpckhdq %%mm1, %%mm1 \n\t" | |||
| "movq %%mm1, (%0, %2) \n\t" | |||
| "add %1, %0 \n\t" | |||
| "cmp %3, %0 \n\t" | |||
| " jb 1b \n\t" | |||
| : "+r" (ptr) | |||
| : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) | |||
| ); | |||
| } | |||
| else | |||
| { | |||
| asm volatile( | |||
| "1: \n\t" | |||
| "movd (%0), %%mm0 \n\t" | |||
| "punpcklbw %%mm0, %%mm0 \n\t" | |||
| "punpcklwd %%mm0, %%mm0 \n\t" | |||
| "punpckldq %%mm0, %%mm0 \n\t" | |||
| "movq %%mm0, -8(%0) \n\t" | |||
| "movq %%mm0, -16(%0) \n\t" | |||
| "movq -8(%0, %2), %%mm1 \n\t" | |||
| "punpckhbw %%mm1, %%mm1 \n\t" | |||
| "punpckhwd %%mm1, %%mm1 \n\t" | |||
| "punpckhdq %%mm1, %%mm1 \n\t" | |||
| "movq %%mm1, (%0, %2) \n\t" | |||
| "movq %%mm1, 8(%0, %2) \n\t" | |||
| "add %1, %0 \n\t" | |||
| "cmp %3, %0 \n\t" | |||
| " jb 1b \n\t" | |||
| : "+r" (ptr) | |||
| : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) | |||
| ); | |||
| asm volatile( | |||
| "1: \n\t" | |||
| "movd (%0), %%mm0 \n\t" | |||
| "punpcklbw %%mm0, %%mm0 \n\t" | |||
| "punpcklwd %%mm0, %%mm0 \n\t" | |||
| "punpckldq %%mm0, %%mm0 \n\t" | |||
| "movq %%mm0, -8(%0) \n\t" | |||
| "movq %%mm0, -16(%0) \n\t" | |||
| "movq -8(%0, %2), %%mm1 \n\t" | |||
| "punpckhbw %%mm1, %%mm1 \n\t" | |||
| "punpckhwd %%mm1, %%mm1 \n\t" | |||
| "punpckhdq %%mm1, %%mm1 \n\t" | |||
| "movq %%mm1, (%0, %2) \n\t" | |||
| "movq %%mm1, 8(%0, %2) \n\t" | |||
| "add %1, %0 \n\t" | |||
| "cmp %3, %0 \n\t" | |||
| " jb 1b \n\t" | |||
| : "+r" (ptr) | |||
| : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) | |||
| ); | |||
| } | |||
| for(i=0;i<w;i+=4) { | |||
| /* top and bottom (and hopefully also the corners) */ | |||
| ptr= buf - (i + 1) * wrap - w; | |||
| asm volatile( | |||
| "1: \n\t" | |||
| "movq (%1, %0), %%mm0 \n\t" | |||
| "movq %%mm0, (%0) \n\t" | |||
| "movq %%mm0, (%0, %2) \n\t" | |||
| "movq %%mm0, (%0, %2, 2) \n\t" | |||
| "movq %%mm0, (%0, %3) \n\t" | |||
| "add $8, %0 \n\t" | |||
| "cmp %4, %0 \n\t" | |||
| " jb 1b \n\t" | |||
| : "+r" (ptr) | |||
| : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w) | |||
| ); | |||
| ptr= last_line + (i + 1) * wrap - w; | |||
| asm volatile( | |||
| "1: \n\t" | |||
| "movq (%1, %0), %%mm0 \n\t" | |||
| "movq %%mm0, (%0) \n\t" | |||
| "movq %%mm0, (%0, %2) \n\t" | |||
| "movq %%mm0, (%0, %2, 2) \n\t" | |||
| "movq %%mm0, (%0, %3) \n\t" | |||
| "add $8, %0 \n\t" | |||
| "cmp %4, %0 \n\t" | |||
| " jb 1b \n\t" | |||
| : "+r" (ptr) | |||
| : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w) | |||
| ); | |||
| ptr= buf - (i + 1) * wrap - w; | |||
| asm volatile( | |||
| "1: \n\t" | |||
| "movq (%1, %0), %%mm0 \n\t" | |||
| "movq %%mm0, (%0) \n\t" | |||
| "movq %%mm0, (%0, %2) \n\t" | |||
| "movq %%mm0, (%0, %2, 2) \n\t" | |||
| "movq %%mm0, (%0, %3) \n\t" | |||
| "add $8, %0 \n\t" | |||
| "cmp %4, %0 \n\t" | |||
| " jb 1b \n\t" | |||
| : "+r" (ptr) | |||
| : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w) | |||
| ); | |||
| ptr= last_line + (i + 1) * wrap - w; | |||
| asm volatile( | |||
| "1: \n\t" | |||
| "movq (%1, %0), %%mm0 \n\t" | |||
| "movq %%mm0, (%0) \n\t" | |||
| "movq %%mm0, (%0, %2) \n\t" | |||
| "movq %%mm0, (%0, %2, 2) \n\t" | |||
| "movq %%mm0, (%0, %3) \n\t" | |||
| "add $8, %0 \n\t" | |||
| "cmp %4, %0 \n\t" | |||
| " jb 1b \n\t" | |||
| : "+r" (ptr) | |||
| : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w) | |||
| ); | |||
| } | |||
| } | |||
| @@ -572,47 +572,47 @@ static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){ | |||
| s->dct_count[intra]++; | |||
| asm volatile( | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "1: \n\t" | |||
| "pxor %%mm0, %%mm0 \n\t" | |||
| "pxor %%mm1, %%mm1 \n\t" | |||
| "movq (%0), %%mm2 \n\t" | |||
| "movq 8(%0), %%mm3 \n\t" | |||
| "pcmpgtw %%mm2, %%mm0 \n\t" | |||
| "pcmpgtw %%mm3, %%mm1 \n\t" | |||
| "pxor %%mm0, %%mm2 \n\t" | |||
| "pxor %%mm1, %%mm3 \n\t" | |||
| "psubw %%mm0, %%mm2 \n\t" | |||
| "psubw %%mm1, %%mm3 \n\t" | |||
| "movq %%mm2, %%mm4 \n\t" | |||
| "movq %%mm3, %%mm5 \n\t" | |||
| "psubusw (%2), %%mm2 \n\t" | |||
| "psubusw 8(%2), %%mm3 \n\t" | |||
| "pxor %%mm0, %%mm2 \n\t" | |||
| "pxor %%mm1, %%mm3 \n\t" | |||
| "psubw %%mm0, %%mm2 \n\t" | |||
| "psubw %%mm1, %%mm3 \n\t" | |||
| "movq %%mm2, (%0) \n\t" | |||
| "movq %%mm3, 8(%0) \n\t" | |||
| "movq %%mm4, %%mm2 \n\t" | |||
| "movq %%mm5, %%mm3 \n\t" | |||
| "punpcklwd %%mm7, %%mm4 \n\t" | |||
| "punpckhwd %%mm7, %%mm2 \n\t" | |||
| "punpcklwd %%mm7, %%mm5 \n\t" | |||
| "punpckhwd %%mm7, %%mm3 \n\t" | |||
| "paddd (%1), %%mm4 \n\t" | |||
| "paddd 8(%1), %%mm2 \n\t" | |||
| "paddd 16(%1), %%mm5 \n\t" | |||
| "paddd 24(%1), %%mm3 \n\t" | |||
| "movq %%mm4, (%1) \n\t" | |||
| "movq %%mm2, 8(%1) \n\t" | |||
| "movq %%mm5, 16(%1) \n\t" | |||
| "movq %%mm3, 24(%1) \n\t" | |||
| "add $16, %0 \n\t" | |||
| "add $32, %1 \n\t" | |||
| "add $16, %2 \n\t" | |||
| "cmp %3, %0 \n\t" | |||
| " jb 1b \n\t" | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "1: \n\t" | |||
| "pxor %%mm0, %%mm0 \n\t" | |||
| "pxor %%mm1, %%mm1 \n\t" | |||
| "movq (%0), %%mm2 \n\t" | |||
| "movq 8(%0), %%mm3 \n\t" | |||
| "pcmpgtw %%mm2, %%mm0 \n\t" | |||
| "pcmpgtw %%mm3, %%mm1 \n\t" | |||
| "pxor %%mm0, %%mm2 \n\t" | |||
| "pxor %%mm1, %%mm3 \n\t" | |||
| "psubw %%mm0, %%mm2 \n\t" | |||
| "psubw %%mm1, %%mm3 \n\t" | |||
| "movq %%mm2, %%mm4 \n\t" | |||
| "movq %%mm3, %%mm5 \n\t" | |||
| "psubusw (%2), %%mm2 \n\t" | |||
| "psubusw 8(%2), %%mm3 \n\t" | |||
| "pxor %%mm0, %%mm2 \n\t" | |||
| "pxor %%mm1, %%mm3 \n\t" | |||
| "psubw %%mm0, %%mm2 \n\t" | |||
| "psubw %%mm1, %%mm3 \n\t" | |||
| "movq %%mm2, (%0) \n\t" | |||
| "movq %%mm3, 8(%0) \n\t" | |||
| "movq %%mm4, %%mm2 \n\t" | |||
| "movq %%mm5, %%mm3 \n\t" | |||
| "punpcklwd %%mm7, %%mm4 \n\t" | |||
| "punpckhwd %%mm7, %%mm2 \n\t" | |||
| "punpcklwd %%mm7, %%mm5 \n\t" | |||
| "punpckhwd %%mm7, %%mm3 \n\t" | |||
| "paddd (%1), %%mm4 \n\t" | |||
| "paddd 8(%1), %%mm2 \n\t" | |||
| "paddd 16(%1), %%mm5 \n\t" | |||
| "paddd 24(%1), %%mm3 \n\t" | |||
| "movq %%mm4, (%1) \n\t" | |||
| "movq %%mm2, 8(%1) \n\t" | |||
| "movq %%mm5, 16(%1) \n\t" | |||
| "movq %%mm3, 24(%1) \n\t" | |||
| "add $16, %0 \n\t" | |||
| "add $32, %1 \n\t" | |||
| "add $16, %2 \n\t" | |||
| "cmp %3, %0 \n\t" | |||
| " jb 1b \n\t" | |||
| : "+r" (block), "+r" (sum), "+r" (offset) | |||
| : "r"(block+64) | |||
| ); | |||
| @@ -626,47 +626,47 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){ | |||
| s->dct_count[intra]++; | |||
| asm volatile( | |||
| "pxor %%xmm7, %%xmm7 \n\t" | |||
| "1: \n\t" | |||
| "pxor %%xmm0, %%xmm0 \n\t" | |||
| "pxor %%xmm1, %%xmm1 \n\t" | |||
| "movdqa (%0), %%xmm2 \n\t" | |||
| "movdqa 16(%0), %%xmm3 \n\t" | |||
| "pcmpgtw %%xmm2, %%xmm0 \n\t" | |||
| "pcmpgtw %%xmm3, %%xmm1 \n\t" | |||
| "pxor %%xmm0, %%xmm2 \n\t" | |||
| "pxor %%xmm1, %%xmm3 \n\t" | |||
| "psubw %%xmm0, %%xmm2 \n\t" | |||
| "psubw %%xmm1, %%xmm3 \n\t" | |||
| "movdqa %%xmm2, %%xmm4 \n\t" | |||
| "movdqa %%xmm3, %%xmm5 \n\t" | |||
| "psubusw (%2), %%xmm2 \n\t" | |||
| "psubusw 16(%2), %%xmm3 \n\t" | |||
| "pxor %%xmm0, %%xmm2 \n\t" | |||
| "pxor %%xmm1, %%xmm3 \n\t" | |||
| "psubw %%xmm0, %%xmm2 \n\t" | |||
| "psubw %%xmm1, %%xmm3 \n\t" | |||
| "movdqa %%xmm2, (%0) \n\t" | |||
| "movdqa %%xmm3, 16(%0) \n\t" | |||
| "movdqa %%xmm4, %%xmm6 \n\t" | |||
| "movdqa %%xmm5, %%xmm0 \n\t" | |||
| "punpcklwd %%xmm7, %%xmm4 \n\t" | |||
| "punpckhwd %%xmm7, %%xmm6 \n\t" | |||
| "punpcklwd %%xmm7, %%xmm5 \n\t" | |||
| "punpckhwd %%xmm7, %%xmm0 \n\t" | |||
| "paddd (%1), %%xmm4 \n\t" | |||
| "paddd 16(%1), %%xmm6 \n\t" | |||
| "paddd 32(%1), %%xmm5 \n\t" | |||
| "paddd 48(%1), %%xmm0 \n\t" | |||
| "movdqa %%xmm4, (%1) \n\t" | |||
| "movdqa %%xmm6, 16(%1) \n\t" | |||
| "movdqa %%xmm5, 32(%1) \n\t" | |||
| "movdqa %%xmm0, 48(%1) \n\t" | |||
| "add $32, %0 \n\t" | |||
| "add $64, %1 \n\t" | |||
| "add $32, %2 \n\t" | |||
| "cmp %3, %0 \n\t" | |||
| " jb 1b \n\t" | |||
| "pxor %%xmm7, %%xmm7 \n\t" | |||
| "1: \n\t" | |||
| "pxor %%xmm0, %%xmm0 \n\t" | |||
| "pxor %%xmm1, %%xmm1 \n\t" | |||
| "movdqa (%0), %%xmm2 \n\t" | |||
| "movdqa 16(%0), %%xmm3 \n\t" | |||
| "pcmpgtw %%xmm2, %%xmm0 \n\t" | |||
| "pcmpgtw %%xmm3, %%xmm1 \n\t" | |||
| "pxor %%xmm0, %%xmm2 \n\t" | |||
| "pxor %%xmm1, %%xmm3 \n\t" | |||
| "psubw %%xmm0, %%xmm2 \n\t" | |||
| "psubw %%xmm1, %%xmm3 \n\t" | |||
| "movdqa %%xmm2, %%xmm4 \n\t" | |||
| "movdqa %%xmm3, %%xmm5 \n\t" | |||
| "psubusw (%2), %%xmm2 \n\t" | |||
| "psubusw 16(%2), %%xmm3 \n\t" | |||
| "pxor %%xmm0, %%xmm2 \n\t" | |||
| "pxor %%xmm1, %%xmm3 \n\t" | |||
| "psubw %%xmm0, %%xmm2 \n\t" | |||
| "psubw %%xmm1, %%xmm3 \n\t" | |||
| "movdqa %%xmm2, (%0) \n\t" | |||
| "movdqa %%xmm3, 16(%0) \n\t" | |||
| "movdqa %%xmm4, %%xmm6 \n\t" | |||
| "movdqa %%xmm5, %%xmm0 \n\t" | |||
| "punpcklwd %%xmm7, %%xmm4 \n\t" | |||
| "punpckhwd %%xmm7, %%xmm6 \n\t" | |||
| "punpcklwd %%xmm7, %%xmm5 \n\t" | |||
| "punpckhwd %%xmm7, %%xmm0 \n\t" | |||
| "paddd (%1), %%xmm4 \n\t" | |||
| "paddd 16(%1), %%xmm6 \n\t" | |||
| "paddd 32(%1), %%xmm5 \n\t" | |||
| "paddd 48(%1), %%xmm0 \n\t" | |||
| "movdqa %%xmm4, (%1) \n\t" | |||
| "movdqa %%xmm6, 16(%1) \n\t" | |||
| "movdqa %%xmm5, 32(%1) \n\t" | |||
| "movdqa %%xmm0, 48(%1) \n\t" | |||
| "add $32, %0 \n\t" | |||
| "add $64, %1 \n\t" | |||
| "add $32, %2 \n\t" | |||
| "cmp %3, %0 \n\t" | |||
| " jb 1b \n\t" | |||
| : "+r" (block), "+r" (sum), "+r" (offset) | |||
| : "r"(block+64) | |||
| ); | |||
| @@ -705,10 +705,10 @@ void MPV_common_init_mmx(MpegEncContext *s) | |||
| draw_edges = draw_edges_mmx; | |||
| if (mm_flags & MM_SSE2) { | |||
| s->denoise_dct= denoise_dct_sse2; | |||
| } else { | |||
| s->denoise_dct= denoise_dct_mmx; | |||
| } | |||
| s->denoise_dct= denoise_dct_sse2; | |||
| } else { | |||
| s->denoise_dct= denoise_dct_mmx; | |||
| } | |||
| if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ | |||
| if(mm_flags & MM_SSE2){ | |||
| @@ -21,26 +21,26 @@ | |||
| #undef PMAXW | |||
| #ifdef HAVE_MMX2 | |||
| #define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t" | |||
| #define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t" | |||
| #define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t" | |||
| #define PMAX(a,b) \ | |||
| "pshufw $0x0E," #a ", " #b " \n\t"\ | |||
| PMAXW(b, a)\ | |||
| "pshufw $0x01," #a ", " #b " \n\t"\ | |||
| PMAXW(b, a) | |||
| "pshufw $0x0E," #a ", " #b " \n\t"\ | |||
| PMAXW(b, a)\ | |||
| "pshufw $0x01," #a ", " #b " \n\t"\ | |||
| PMAXW(b, a) | |||
| #else | |||
| #define SPREADW(a) \ | |||
| "punpcklwd " #a ", " #a " \n\t"\ | |||
| "punpcklwd " #a ", " #a " \n\t" | |||
| "punpcklwd " #a ", " #a " \n\t"\ | |||
| "punpcklwd " #a ", " #a " \n\t" | |||
| #define PMAXW(a,b) \ | |||
| "psubusw " #a ", " #b " \n\t"\ | |||
| "paddw " #a ", " #b " \n\t" | |||
| "psubusw " #a ", " #b " \n\t"\ | |||
| "paddw " #a ", " #b " \n\t" | |||
| #define PMAX(a,b) \ | |||
| "movq " #a ", " #b " \n\t"\ | |||
| "psrlq $32, " #a " \n\t"\ | |||
| PMAXW(b, a)\ | |||
| "movq " #a ", " #b " \n\t"\ | |||
| "psrlq $16, " #a " \n\t"\ | |||
| PMAXW(b, a) | |||
| "movq " #a ", " #b " \n\t"\ | |||
| "psrlq $32, " #a " \n\t"\ | |||
| PMAXW(b, a)\ | |||
| "movq " #a ", " #b " \n\t"\ | |||
| "psrlq $16, " #a " \n\t"\ | |||
| PMAXW(b, a) | |||
| #endif | |||
| @@ -71,18 +71,18 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | |||
| if (!s->h263_aic) { | |||
| #if 1 | |||
| asm volatile ( | |||
| "mul %%ecx \n\t" | |||
| : "=d" (level), "=a"(dummy) | |||
| : "a" ((block[0]>>2) + q), "c" (inverse[q<<1]) | |||
| "mul %%ecx \n\t" | |||
| : "=d" (level), "=a"(dummy) | |||
| : "a" ((block[0]>>2) + q), "c" (inverse[q<<1]) | |||
| ); | |||
| #else | |||
| asm volatile ( | |||
| "xorl %%edx, %%edx \n\t" | |||
| "divw %%cx \n\t" | |||
| "movzwl %%ax, %%eax \n\t" | |||
| : "=a" (level) | |||
| : "a" ((block[0]>>2) + q), "c" (q<<1) | |||
| : "%edx" | |||
| "xorl %%edx, %%edx \n\t" | |||
| "divw %%cx \n\t" | |||
| "movzwl %%ax, %%eax \n\t" | |||
| : "=a" (level) | |||
| : "a" ((block[0]>>2) + q), "c" (q<<1) | |||
| : "%edx" | |||
| ); | |||
| #endif | |||
| } else | |||
| @@ -103,94 +103,94 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | |||
| if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){ | |||
| asm volatile( | |||
| "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 | |||
| "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 | |||
| SPREADW(%%mm3) | |||
| "pxor %%mm7, %%mm7 \n\t" // 0 | |||
| "pxor %%mm4, %%mm4 \n\t" // 0 | |||
| "movq (%2), %%mm5 \n\t" // qmat[0] | |||
| "pxor %%mm6, %%mm6 \n\t" | |||
| "psubw (%3), %%mm6 \n\t" // -bias[0] | |||
| "mov $-128, %%"REG_a" \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "pxor %%mm1, %%mm1 \n\t" // 0 | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] | |||
| "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 | |||
| "pxor %%mm1, %%mm0 \n\t" | |||
| "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) | |||
| "psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] | |||
| "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 | |||
| "por %%mm0, %%mm4 \n\t" | |||
| "pxor %%mm1, %%mm0 \n\t" | |||
| "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) | |||
| "movq %%mm0, (%5, %%"REG_a") \n\t" | |||
| "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 | |||
| "movq (%4, %%"REG_a"), %%mm1 \n\t" | |||
| "movq %%mm7, (%1, %%"REG_a") \n\t" // 0 | |||
| "pandn %%mm1, %%mm0 \n\t" | |||
| PMAXW(%%mm0, %%mm3) | |||
| "add $8, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| PMAX(%%mm3, %%mm0) | |||
| "movd %%mm3, %%"REG_a" \n\t" | |||
| "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 | |||
| : "+a" (last_non_zero_p1) | |||
| "pxor %%mm7, %%mm7 \n\t" // 0 | |||
| "pxor %%mm4, %%mm4 \n\t" // 0 | |||
| "movq (%2), %%mm5 \n\t" // qmat[0] | |||
| "pxor %%mm6, %%mm6 \n\t" | |||
| "psubw (%3), %%mm6 \n\t" // -bias[0] | |||
| "mov $-128, %%"REG_a" \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "pxor %%mm1, %%mm1 \n\t" // 0 | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] | |||
| "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 | |||
| "pxor %%mm1, %%mm0 \n\t" | |||
| "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) | |||
| "psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] | |||
| "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 | |||
| "por %%mm0, %%mm4 \n\t" | |||
| "pxor %%mm1, %%mm0 \n\t" | |||
| "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) | |||
| "movq %%mm0, (%5, %%"REG_a") \n\t" | |||
| "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 | |||
| "movq (%4, %%"REG_a"), %%mm1 \n\t" | |||
| "movq %%mm7, (%1, %%"REG_a") \n\t" // 0 | |||
| "pandn %%mm1, %%mm0 \n\t" | |||
| PMAXW(%%mm0, %%mm3) | |||
| "add $8, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| PMAX(%%mm3, %%mm0) | |||
| "movd %%mm3, %%"REG_a" \n\t" | |||
| "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 | |||
| : "+a" (last_non_zero_p1) | |||
| : "r" (block+64), "r" (qmat), "r" (bias), | |||
| "r" (inv_zigzag_direct16+64), "r" (temp_block+64) | |||
| ); | |||
| // note the asm is split cuz gcc doesnt like that many operands ... | |||
| asm volatile( | |||
| "movd %1, %%mm1 \n\t" // max_qcoeff | |||
| SPREADW(%%mm1) | |||
| "psubusw %%mm1, %%mm4 \n\t" | |||
| "packuswb %%mm4, %%mm4 \n\t" | |||
| "movd %%mm4, %0 \n\t" // *overflow | |||
| "movd %1, %%mm1 \n\t" // max_qcoeff | |||
| SPREADW(%%mm1) | |||
| "psubusw %%mm1, %%mm4 \n\t" | |||
| "packuswb %%mm4, %%mm4 \n\t" | |||
| "movd %%mm4, %0 \n\t" // *overflow | |||
| : "=g" (*overflow) | |||
| : "g" (s->max_qcoeff) | |||
| ); | |||
| }else{ // FMT_H263 | |||
| asm volatile( | |||
| "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 | |||
| "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 | |||
| SPREADW(%%mm3) | |||
| "pxor %%mm7, %%mm7 \n\t" // 0 | |||
| "pxor %%mm4, %%mm4 \n\t" // 0 | |||
| "mov $-128, %%"REG_a" \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "pxor %%mm1, %%mm1 \n\t" // 0 | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] | |||
| "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 | |||
| "pxor %%mm1, %%mm0 \n\t" | |||
| "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) | |||
| "movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0] | |||
| "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] | |||
| "movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i] | |||
| "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 | |||
| "por %%mm0, %%mm4 \n\t" | |||
| "pxor %%mm1, %%mm0 \n\t" | |||
| "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) | |||
| "movq %%mm0, (%5, %%"REG_a") \n\t" | |||
| "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 | |||
| "movq (%4, %%"REG_a"), %%mm1 \n\t" | |||
| "movq %%mm7, (%1, %%"REG_a") \n\t" // 0 | |||
| "pandn %%mm1, %%mm0 \n\t" | |||
| PMAXW(%%mm0, %%mm3) | |||
| "add $8, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| PMAX(%%mm3, %%mm0) | |||
| "movd %%mm3, %%"REG_a" \n\t" | |||
| "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 | |||
| : "+a" (last_non_zero_p1) | |||
| "pxor %%mm7, %%mm7 \n\t" // 0 | |||
| "pxor %%mm4, %%mm4 \n\t" // 0 | |||
| "mov $-128, %%"REG_a" \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "pxor %%mm1, %%mm1 \n\t" // 0 | |||
| "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] | |||
| "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 | |||
| "pxor %%mm1, %%mm0 \n\t" | |||
| "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) | |||
| "movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0] | |||
| "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] | |||
| "movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i] | |||
| "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 | |||
| "por %%mm0, %%mm4 \n\t" | |||
| "pxor %%mm1, %%mm0 \n\t" | |||
| "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) | |||
| "movq %%mm0, (%5, %%"REG_a") \n\t" | |||
| "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 | |||
| "movq (%4, %%"REG_a"), %%mm1 \n\t" | |||
| "movq %%mm7, (%1, %%"REG_a") \n\t" // 0 | |||
| "pandn %%mm1, %%mm0 \n\t" | |||
| PMAXW(%%mm0, %%mm3) | |||
| "add $8, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| PMAX(%%mm3, %%mm0) | |||
| "movd %%mm3, %%"REG_a" \n\t" | |||
| "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 | |||
| : "+a" (last_non_zero_p1) | |||
| : "r" (block+64), "r" (qmat+64), "r" (bias+64), | |||
| "r" (inv_zigzag_direct16+64), "r" (temp_block+64) | |||
| ); | |||
| // note the asm is split cuz gcc doesnt like that many operands ... | |||
| asm volatile( | |||
| "movd %1, %%mm1 \n\t" // max_qcoeff | |||
| SPREADW(%%mm1) | |||
| "psubusw %%mm1, %%mm4 \n\t" | |||
| "packuswb %%mm4, %%mm4 \n\t" | |||
| "movd %%mm4, %0 \n\t" // *overflow | |||
| "movd %1, %%mm1 \n\t" // max_qcoeff | |||
| SPREADW(%%mm1) | |||
| "psubusw %%mm1, %%mm4 \n\t" | |||
| "packuswb %%mm4, %%mm4 \n\t" | |||
| "movd %%mm4, %0 \n\t" // *overflow | |||
| : "=g" (*overflow) | |||
| : "g" (s->max_qcoeff) | |||
| ); | |||
| @@ -257,13 +257,13 @@ enum PixelFormat avcodec_get_pix_fmt(const char* name) | |||
| for (i=0; i < PIX_FMT_NB; i++) | |||
| if (!strcmp(pix_fmt_info[i].name, name)) | |||
| break; | |||
| break; | |||
| return i; | |||
| } | |||
| /* Picture field are filled with 'ptr' addresses. Also return size */ | |||
| int avpicture_fill(AVPicture *picture, uint8_t *ptr, | |||
| int pix_fmt, int width, int height) | |||
| int pix_fmt, int width, int height) | |||
| { | |||
| int size, w2, h2, size2; | |||
| PixFmtInfo *pinfo; | |||
| @@ -373,36 +373,36 @@ int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height, | |||
| pix_fmt == PIX_FMT_RGB565 || | |||
| pix_fmt == PIX_FMT_RGB555) | |||
| w = width * 2; | |||
| else if (pix_fmt == PIX_FMT_UYVY411) | |||
| w = width + width/2; | |||
| else if (pix_fmt == PIX_FMT_PAL8) | |||
| w = width; | |||
| else | |||
| w = width * (pf->depth * pf->nb_channels / 8); | |||
| data_planes = 1; | |||
| h = height; | |||
| else if (pix_fmt == PIX_FMT_UYVY411) | |||
| w = width + width/2; | |||
| else if (pix_fmt == PIX_FMT_PAL8) | |||
| w = width; | |||
| else | |||
| w = width * (pf->depth * pf->nb_channels / 8); | |||
| data_planes = 1; | |||
| h = height; | |||
| } else { | |||
| data_planes = pf->nb_channels; | |||
| w = (width*pf->depth + 7)/8; | |||
| h = height; | |||
| w = (width*pf->depth + 7)/8; | |||
| h = height; | |||
| } | |||
| for (i=0; i<data_planes; i++) { | |||
| if (i == 1) { | |||
| w = width >> pf->x_chroma_shift; | |||
| h = height >> pf->y_chroma_shift; | |||
| } | |||
| w = width >> pf->x_chroma_shift; | |||
| h = height >> pf->y_chroma_shift; | |||
| } | |||
| s = src->data[i]; | |||
| for(j=0; j<h; j++) { | |||
| memcpy(dest, s, w); | |||
| dest += w; | |||
| s += src->linesize[i]; | |||
| } | |||
| for(j=0; j<h; j++) { | |||
| memcpy(dest, s, w); | |||
| dest += w; | |||
| s += src->linesize[i]; | |||
| } | |||
| } | |||
| if (pf->pixel_type == FF_PIXEL_PALETTE) | |||
| memcpy((unsigned char *)(((size_t)dest + 3) & ~3), src->data[1], 256 * 4); | |||
| memcpy((unsigned char *)(((size_t)dest + 3) & ~3), src->data[1], 256 * 4); | |||
| return size; | |||
| } | |||
| @@ -486,9 +486,9 @@ static int avg_bits_per_pixel(int pix_fmt) | |||
| case PIX_FMT_RGB555: | |||
| bits = 16; | |||
| break; | |||
| case PIX_FMT_UYVY411: | |||
| bits = 12; | |||
| break; | |||
| case PIX_FMT_UYVY411: | |||
| bits = 12; | |||
| break; | |||
| default: | |||
| bits = pf->depth * pf->nb_channels; | |||
| break; | |||
| @@ -604,9 +604,9 @@ void img_copy(AVPicture *dst, const AVPicture *src, | |||
| case PIX_FMT_RGB555: | |||
| bits = 16; | |||
| break; | |||
| case PIX_FMT_UYVY411: | |||
| bits = 12; | |||
| break; | |||
| case PIX_FMT_UYVY411: | |||
| bits = 12; | |||
| break; | |||
| default: | |||
| bits = pf->depth * pf->nb_channels; | |||
| break; | |||
| @@ -910,11 +910,11 @@ static void uyvy411_to_yuv411p(AVPicture *dst, const AVPicture *src, | |||
| cr = cr1; | |||
| for(w = width; w >= 4; w -= 4) { | |||
| cb[0] = p[0]; | |||
| lum[0] = p[1]; | |||
| lum[0] = p[1]; | |||
| lum[1] = p[2]; | |||
| cr[0] = p[3]; | |||
| lum[2] = p[4]; | |||
| lum[3] = p[5]; | |||
| lum[2] = p[4]; | |||
| lum[3] = p[5]; | |||
| p += 6; | |||
| lum += 4; | |||
| cb++; | |||
| @@ -996,7 +996,7 @@ static void yuv420p_to_uyvy422(AVPicture *dst, const AVPicture *src, | |||
| #define SCALEBITS 10 | |||
| #define ONE_HALF (1 << (SCALEBITS - 1)) | |||
| #define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5)) | |||
| #define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5)) | |||
| #define YUV_TO_RGB1_CCIR(cb1, cr1)\ | |||
| {\ | |||
| @@ -1046,7 +1046,7 @@ static void yuv420p_to_uyvy422(AVPicture *dst, const AVPicture *src, | |||
| static inline int C_JPEG_TO_CCIR(int y) { | |||
| y = (((y - 128) * FIX(112.0/127.0) + (ONE_HALF + (128 << SCALEBITS))) >> SCALEBITS); | |||
| if (y < 16) | |||
| y = 16; | |||
| y = 16; | |||
| return y; | |||
| } | |||
| @@ -1681,7 +1681,7 @@ static void gray_to_monoblack(AVPicture *dst, const AVPicture *src, | |||
| typedef struct ConvertEntry { | |||
| void (*convert)(AVPicture *dst, | |||
| const AVPicture *src, int width, int height); | |||
| const AVPicture *src, int width, int height); | |||
| } ConvertEntry; | |||
| /* Add each new convertion function in this table. In order to be able | |||
| @@ -1721,7 +1721,7 @@ static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = { | |||
| [PIX_FMT_RGBA32] = { | |||
| .convert = yuv420p_to_rgba32 | |||
| }, | |||
| [PIX_FMT_UYVY422] = { | |||
| [PIX_FMT_UYVY422] = { | |||
| .convert = yuv420p_to_uyvy422, | |||
| }, | |||
| }, | |||
| @@ -2224,7 +2224,7 @@ static int get_alpha_info_pal8(const AVPicture *src, int width, int height) | |||
| * @return ored mask of FF_ALPHA_xxx constants | |||
| */ | |||
| int img_get_alpha_info(const AVPicture *src, | |||
| int pix_fmt, int width, int height) | |||
| int pix_fmt, int width, int height) | |||
| { | |||
| PixFmtInfo *pf = &pix_fmt_info[pix_fmt]; | |||
| int ret; | |||
| @@ -2300,10 +2300,10 @@ int img_get_alpha_info(const AVPicture *src, | |||
| /* filter parameters: [-1 4 2 4 -1] // 8 */ | |||
| static void deinterlace_line(uint8_t *dst, | |||
| const uint8_t *lum_m4, const uint8_t *lum_m3, | |||
| const uint8_t *lum_m2, const uint8_t *lum_m1, | |||
| const uint8_t *lum, | |||
| int size) | |||
| const uint8_t *lum_m4, const uint8_t *lum_m3, | |||
| const uint8_t *lum_m2, const uint8_t *lum_m1, | |||
| const uint8_t *lum, | |||
| int size) | |||
| { | |||
| #ifndef HAVE_MMX | |||
| uint8_t *cm = cropTbl + MAX_NEG_CROP; | |||
| @@ -2421,7 +2421,7 @@ static void deinterlace_bottom_field(uint8_t *dst, int dst_wrap, | |||
| } | |||
| static void deinterlace_bottom_field_inplace(uint8_t *src1, int src_wrap, | |||
| int width, int height) | |||
| int width, int height) | |||
| { | |||
| uint8_t *src_m1, *src_0, *src_p1, *src_p2; | |||
| int y; | |||
| @@ -2455,7 +2455,7 @@ int avpicture_deinterlace(AVPicture *dst, const AVPicture *src, | |||
| if (pix_fmt != PIX_FMT_YUV420P && | |||
| pix_fmt != PIX_FMT_YUV422P && | |||
| pix_fmt != PIX_FMT_YUV444P && | |||
| pix_fmt != PIX_FMT_YUV411P) | |||
| pix_fmt != PIX_FMT_YUV411P) | |||
| return -1; | |||
| if ((width & 3) != 0 || (height & 3) != 0) | |||
| return -1; | |||
| @@ -821,7 +821,7 @@ static void glue(RGB_NAME, _to_pal8)(AVPicture *dst, const AVPicture *src, | |||
| #ifdef RGBA_IN | |||
| static int glue(get_alpha_info_, RGB_NAME)(const AVPicture *src, | |||
| int width, int height) | |||
| int width, int height) | |||
| { | |||
| const unsigned char *p; | |||
| int src_wrap, ret, x, y; | |||
| @@ -64,8 +64,8 @@ static inline int get_phase(int pos) | |||
| /* This function must be optimized */ | |||
| static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src, | |||
| int src_width, int src_start, int src_incr, | |||
| int16_t *filters) | |||
| int src_width, int src_start, int src_incr, | |||
| int16_t *filters) | |||
| { | |||
| int src_pos, phase, sum, i; | |||
| const uint8_t *s; | |||
| @@ -108,7 +108,7 @@ static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src, | |||
| /* This function must be optimized */ | |||
| static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src, | |||
| int wrap, int16_t *filter) | |||
| int wrap, int16_t *filter) | |||
| { | |||
| int sum, i; | |||
| const uint8_t *s; | |||
| @@ -167,7 +167,7 @@ static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src, | |||
| /* XXX: do four pixels at a time */ | |||
| static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, | |||
| const uint8_t *src, int src_width, | |||
| const uint8_t *src, int src_width, | |||
| int src_start, int src_incr, int16_t *filters) | |||
| { | |||
| int src_pos, phase; | |||
| @@ -212,7 +212,7 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, | |||
| } | |||
| static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, | |||
| int wrap, int16_t *filter) | |||
| int wrap, int16_t *filter) | |||
| { | |||
| int sum, i, v; | |||
| const uint8_t *s; | |||
| @@ -277,18 +277,18 @@ static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, | |||
| #endif | |||
| #ifdef HAVE_ALTIVEC | |||
| typedef union { | |||
| typedef union { | |||
| vector unsigned char v; | |||
| unsigned char c[16]; | |||
| } vec_uc_t; | |||
| typedef union { | |||
| typedef union { | |||
| vector signed short v; | |||
| signed short s[8]; | |||
| } vec_ss_t; | |||
| void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, | |||
| int wrap, int16_t *filter) | |||
| int wrap, int16_t *filter) | |||
| { | |||
| int sum, i; | |||
| const uint8_t *s; | |||
| @@ -405,7 +405,7 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, | |||
| /* slow version to handle limit cases. Does not need optimisation */ | |||
| static void h_resample_slow(uint8_t *dst, int dst_width, | |||
| const uint8_t *src, int src_width, | |||
| const uint8_t *src, int src_width, | |||
| int src_start, int src_incr, int16_t *filters) | |||
| { | |||
| int src_pos, phase, sum, j, v, i; | |||
| @@ -441,8 +441,8 @@ static void h_resample_slow(uint8_t *dst, int dst_width, | |||
| } | |||
| static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src, | |||
| int src_width, int src_start, int src_incr, | |||
| int16_t *filters) | |||
| int src_width, int src_start, int src_incr, | |||
| int16_t *filters) | |||
| { | |||
| int n, src_end; | |||
| @@ -559,7 +559,7 @@ ImgReSampleContext *img_resample_full_init(int owidth, int oheight, | |||
| ImgReSampleContext *s; | |||
| if (!owidth || !oheight || !iwidth || !iheight) | |||
| return NULL; | |||
| return NULL; | |||
| s = av_mallocz(sizeof(ImgReSampleContext)); | |||
| if (!s) | |||
| @@ -70,13 +70,13 @@ static void build_modpred(Indeo3DecodeContext *s) | |||
| for (i=0; i < 128; ++i) { | |||
| s->ModPred[i+0*128] = (i > 126) ? 254 : 2*((i + 1) - ((i + 1) % 2)); | |||
| s->ModPred[i+1*128] = (i == 7) ? 20 : ((i == 119 || i == 120) | |||
| ? 236 : 2*((i + 2) - ((i + 1) % 3))); | |||
| ? 236 : 2*((i + 2) - ((i + 1) % 3))); | |||
| s->ModPred[i+2*128] = (i > 125) ? 248 : 2*((i + 2) - ((i + 2) % 4)); | |||
| s->ModPred[i+3*128] = 2*((i + 1) - ((i - 3) % 5)); | |||
| s->ModPred[i+3*128] = 2*((i + 1) - ((i - 3) % 5)); | |||
| s->ModPred[i+4*128] = (i == 8) ? 20 : 2*((i + 1) - ((i - 3) % 6)); | |||
| s->ModPred[i+5*128] = 2*((i + 4) - ((i + 3) % 7)); | |||
| s->ModPred[i+5*128] = 2*((i + 4) - ((i + 3) % 7)); | |||
| s->ModPred[i+6*128] = (i > 123) ? 240 : 2*((i + 4) - ((i + 4) % 8)); | |||
| s->ModPred[i+7*128] = 2*((i + 5) - ((i + 4) % 9)); | |||
| s->ModPred[i+7*128] = 2*((i + 5) - ((i + 4) % 9)); | |||
| } | |||
| s->corrector_type = (unsigned short *) av_malloc (24 * 256 * sizeof(unsigned short)); | |||
| @@ -84,8 +84,8 @@ static void build_modpred(Indeo3DecodeContext *s) | |||
| for (i=0; i < 24; ++i) { | |||
| for (j=0; j < 256; ++j) { | |||
| s->corrector_type[i*256+j] = (j < corrector_type_0[i]) | |||
| ? 1 : ((j < 248 || (i == 16 && j == 248)) | |||
| ? 0 : corrector_type_2[j - 248]); | |||
| ? 1 : ((j < 248 || (i == 16 && j == 248)) | |||
| ? 0 : corrector_type_2[j - 248]); | |||
| } | |||
| } | |||
| } | |||
| @@ -83,10 +83,10 @@ | |||
| */ | |||
| #if CONST_BITS == 8 | |||
| #define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */ | |||
| #define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */ | |||
| #define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */ | |||
| #define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */ | |||
| #define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */ | |||
| #define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */ | |||
| #define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */ | |||
| #define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */ | |||
| #else | |||
| #define FIX_0_382683433 FIX(0.382683433) | |||
| #define FIX_0_541196100 FIX(0.541196100) | |||
| @@ -135,7 +135,7 @@ static always_inline void row_fdct(DCTELEM * data){ | |||
| /* Even part */ | |||
| tmp10 = tmp0 + tmp3; /* phase 2 */ | |||
| tmp10 = tmp0 + tmp3; /* phase 2 */ | |||
| tmp13 = tmp0 - tmp3; | |||
| tmp11 = tmp1 + tmp2; | |||
| tmp12 = tmp1 - tmp2; | |||
| @@ -144,30 +144,30 @@ static always_inline void row_fdct(DCTELEM * data){ | |||
| dataptr[4] = tmp10 - tmp11; | |||
| z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ | |||
| dataptr[2] = tmp13 + z1; /* phase 5 */ | |||
| dataptr[2] = tmp13 + z1; /* phase 5 */ | |||
| dataptr[6] = tmp13 - z1; | |||
| /* Odd part */ | |||
| tmp10 = tmp4 + tmp5; /* phase 2 */ | |||
| tmp10 = tmp4 + tmp5; /* phase 2 */ | |||
| tmp11 = tmp5 + tmp6; | |||
| tmp12 = tmp6 + tmp7; | |||
| /* The rotator is modified from fig 4-8 to avoid extra negations. */ | |||
| z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ | |||
| z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ | |||
| z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ | |||
| z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ | |||
| z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ | |||
| z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ | |||
| z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ | |||
| z11 = tmp7 + z3; /* phase 5 */ | |||
| z11 = tmp7 + z3; /* phase 5 */ | |||
| z13 = tmp7 - z3; | |||
| dataptr[5] = z13 + z2; /* phase 6 */ | |||
| dataptr[5] = z13 + z2; /* phase 6 */ | |||
| dataptr[3] = z13 - z2; | |||
| dataptr[1] = z11 + z4; | |||
| dataptr[7] = z11 - z4; | |||
| dataptr += DCTSIZE; /* advance pointer to next row */ | |||
| dataptr += DCTSIZE; /* advance pointer to next row */ | |||
| } | |||
| } | |||
| @@ -202,7 +202,7 @@ fdct_ifast (DCTELEM * data) | |||
| /* Even part */ | |||
| tmp10 = tmp0 + tmp3; /* phase 2 */ | |||
| tmp10 = tmp0 + tmp3; /* phase 2 */ | |||
| tmp13 = tmp0 - tmp3; | |||
| tmp11 = tmp1 + tmp2; | |||
| tmp12 = tmp1 - tmp2; | |||
| @@ -216,7 +216,7 @@ fdct_ifast (DCTELEM * data) | |||
| /* Odd part */ | |||
| tmp10 = tmp4 + tmp5; /* phase 2 */ | |||
| tmp10 = tmp4 + tmp5; /* phase 2 */ | |||
| tmp11 = tmp5 + tmp6; | |||
| tmp12 = tmp6 + tmp7; | |||
| @@ -226,7 +226,7 @@ fdct_ifast (DCTELEM * data) | |||
| z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ | |||
| z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ | |||
| z11 = tmp7 + z3; /* phase 5 */ | |||
| z11 = tmp7 + z3; /* phase 5 */ | |||
| z13 = tmp7 - z3; | |||
| dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ | |||
| @@ -234,7 +234,7 @@ fdct_ifast (DCTELEM * data) | |||
| dataptr[DCTSIZE*1] = z11 + z4; | |||
| dataptr[DCTSIZE*7] = z11 - z4; | |||
| dataptr++; /* advance pointer to next column */ | |||
| dataptr++; /* advance pointer to next column */ | |||
| } | |||
| } | |||
| @@ -293,7 +293,7 @@ fdct_ifast248 (DCTELEM * data) | |||
| dataptr[DCTSIZE*3] = tmp13 + z1; | |||
| dataptr[DCTSIZE*7] = tmp13 - z1; | |||
| dataptr++; /* advance pointer to next column */ | |||
| dataptr++; /* advance pointer to next column */ | |||
| } | |||
| } | |||
| @@ -92,10 +92,10 @@ | |||
| #if BITS_IN_JSAMPLE == 8 | |||
| #define CONST_BITS 13 | |||
| #define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */ | |||
| #define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */ | |||
| #else | |||
| #define CONST_BITS 13 | |||
| #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ | |||
| #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ | |||
| #endif | |||
| /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus | |||
| @@ -106,18 +106,18 @@ | |||
| */ | |||
| #if CONST_BITS == 13 | |||
| #define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */ | |||
| #define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */ | |||
| #define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */ | |||
| #define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */ | |||
| #define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */ | |||
| #define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */ | |||
| #define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */ | |||
| #define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */ | |||
| #define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */ | |||
| #define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */ | |||
| #define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */ | |||
| #define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */ | |||
| #define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */ | |||
| #define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */ | |||
| #define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */ | |||
| #define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */ | |||
| #define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */ | |||
| #define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */ | |||
| #define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */ | |||
| #define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */ | |||
| #define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */ | |||
| #define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */ | |||
| #define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */ | |||
| #define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */ | |||
| #else | |||
| #define FIX_0_298631336 FIX(0.298631336) | |||
| #define FIX_0_390180644 FIX(0.390180644) | |||
| @@ -185,9 +185,9 @@ static always_inline void row_fdct(DCTELEM * data){ | |||
| z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | |||
| dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | |||
| CONST_BITS-PASS1_BITS); | |||
| CONST_BITS-PASS1_BITS); | |||
| dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), | |||
| CONST_BITS-PASS1_BITS); | |||
| CONST_BITS-PASS1_BITS); | |||
| /* Odd part per figure 8 --- note paper omits factor of sqrt(2). | |||
| * cK represents cos(K*pi/16). | |||
| @@ -217,7 +217,7 @@ static always_inline void row_fdct(DCTELEM * data){ | |||
| dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); | |||
| dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); | |||
| dataptr += DCTSIZE; /* advance pointer to next row */ | |||
| dataptr += DCTSIZE; /* advance pointer to next row */ | |||
| } | |||
| } | |||
| @@ -267,9 +267,9 @@ ff_jpeg_fdct_islow (DCTELEM * data) | |||
| z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | |||
| dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | |||
| CONST_BITS+PASS1_BITS); | |||
| CONST_BITS+PASS1_BITS); | |||
| dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), | |||
| CONST_BITS+PASS1_BITS); | |||
| CONST_BITS+PASS1_BITS); | |||
| /* Odd part per figure 8 --- note paper omits factor of sqrt(2). | |||
| * cK represents cos(K*pi/16). | |||
| @@ -295,15 +295,15 @@ ff_jpeg_fdct_islow (DCTELEM * data) | |||
| z4 += z5; | |||
| dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, | |||
| CONST_BITS+PASS1_BITS); | |||
| CONST_BITS+PASS1_BITS); | |||
| dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, | |||
| CONST_BITS+PASS1_BITS); | |||
| CONST_BITS+PASS1_BITS); | |||
| dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, | |||
| CONST_BITS+PASS1_BITS); | |||
| CONST_BITS+PASS1_BITS); | |||
| dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, | |||
| CONST_BITS+PASS1_BITS); | |||
| CONST_BITS+PASS1_BITS); | |||
| dataptr++; /* advance pointer to next column */ | |||
| dataptr++; /* advance pointer to next column */ | |||
| } | |||
| } | |||
| @@ -350,9 +350,9 @@ ff_fdct248_islow (DCTELEM * data) | |||
| z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | |||
| dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | |||
| CONST_BITS+PASS1_BITS); | |||
| CONST_BITS+PASS1_BITS); | |||
| dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), | |||
| CONST_BITS+PASS1_BITS); | |||
| CONST_BITS+PASS1_BITS); | |||
| tmp10 = tmp4 + tmp7; | |||
| tmp11 = tmp5 + tmp6; | |||
| @@ -364,10 +364,10 @@ ff_fdct248_islow (DCTELEM * data) | |||
| z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | |||
| dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | |||
| CONST_BITS+PASS1_BITS); | |||
| CONST_BITS+PASS1_BITS); | |||
| dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), | |||
| CONST_BITS+PASS1_BITS); | |||
| CONST_BITS+PASS1_BITS); | |||
| dataptr++; /* advance pointer to next column */ | |||
| dataptr++; /* advance pointer to next column */ | |||
| } | |||
| } | |||
| @@ -81,8 +81,8 @@ | |||
| */ | |||
| typedef struct LclContext { | |||
| AVCodecContext *avctx; | |||
| AVFrame pic; | |||
| AVCodecContext *avctx; | |||
| AVFrame pic; | |||
| PutBitContext pb; | |||
| // Image type | |||
| @@ -198,8 +198,8 @@ static unsigned int mszh_decomp(unsigned char * srcptr, int srclen, unsigned cha | |||
| */ | |||
| static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size) | |||
| { | |||
| LclContext * const c = (LclContext *)avctx->priv_data; | |||
| unsigned char *encoded = (unsigned char *)buf; | |||
| LclContext * const c = (LclContext *)avctx->priv_data; | |||
| unsigned char *encoded = (unsigned char *)buf; | |||
| unsigned int pixel_ptr; | |||
| int row, col; | |||
| unsigned char *outptr; | |||
| @@ -214,15 +214,15 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 | |||
| #endif | |||
| unsigned int len = buf_size; | |||
| if(c->pic.data[0]) | |||
| avctx->release_buffer(avctx, &c->pic); | |||
| if(c->pic.data[0]) | |||
| avctx->release_buffer(avctx, &c->pic); | |||
| c->pic.reference = 0; | |||
| c->pic.buffer_hints = FF_BUFFER_HINTS_VALID; | |||
| if(avctx->get_buffer(avctx, &c->pic) < 0){ | |||
| av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |||
| return -1; | |||
| } | |||
| c->pic.reference = 0; | |||
| c->pic.buffer_hints = FF_BUFFER_HINTS_VALID; | |||
| if(avctx->get_buffer(avctx, &c->pic) < 0){ | |||
| av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |||
| return -1; | |||
| } | |||
| outptr = c->pic.data[0]; // Output image pointer | |||
| @@ -358,7 +358,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 | |||
| pixel_ptr = row * width * 3; | |||
| yq = encoded[pixel_ptr++]; | |||
| uqvq = encoded[pixel_ptr++]; | |||
| uqvq+=(encoded[pixel_ptr++] << 8); | |||
| uqvq+=(encoded[pixel_ptr++] << 8); | |||
| for (col = 1; col < width; col++) { | |||
| encoded[pixel_ptr] = yq -= encoded[pixel_ptr]; | |||
| uqvq -= (encoded[pixel_ptr+1] | (encoded[pixel_ptr+2]<<8)); | |||
| @@ -588,8 +588,8 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, | |||
| c->zstream.avail_in = avctx->width*3; | |||
| zret = deflate(&(c->zstream), Z_NO_FLUSH); | |||
| if (zret != Z_OK) { | |||
| av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret); | |||
| return -1; | |||
| av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret); | |||
| return -1; | |||
| } | |||
| } | |||
| zret = deflate(&(c->zstream), Z_FINISH); | |||
| @@ -714,7 +714,7 @@ static int decode_init(AVCodecContext *avctx) | |||
| break; | |||
| default: | |||
| if ((c->compression < Z_NO_COMPRESSION) || (c->compression > Z_BEST_COMPRESSION)) { | |||
| av_log(avctx, AV_LOG_ERROR, "Unsupported compression level for ZLIB: (%d).\n", c->compression); | |||
| av_log(avctx, AV_LOG_ERROR, "Unsupported compression level for ZLIB: (%d).\n", c->compression); | |||
| return 1; | |||
| } | |||
| av_log(avctx, AV_LOG_INFO, "Compression level for ZLIB: (%d).\n", c->compression); | |||
| @@ -851,15 +851,15 @@ static int encode_init(AVCodecContext *avctx) | |||
| */ | |||
| static int decode_end(AVCodecContext *avctx) | |||
| { | |||
| LclContext * const c = (LclContext *)avctx->priv_data; | |||
| LclContext * const c = (LclContext *)avctx->priv_data; | |||
| if (c->pic.data[0]) | |||
| avctx->release_buffer(avctx, &c->pic); | |||
| if (c->pic.data[0]) | |||
| avctx->release_buffer(avctx, &c->pic); | |||
| #ifdef CONFIG_ZLIB | |||
| inflateEnd(&(c->zstream)); | |||
| #endif | |||
| return 0; | |||
| return 0; | |||
| } | |||
| @@ -883,28 +883,28 @@ static int encode_end(AVCodecContext *avctx) | |||
| } | |||
| AVCodec mszh_decoder = { | |||
| "mszh", | |||
| CODEC_TYPE_VIDEO, | |||
| CODEC_ID_MSZH, | |||
| sizeof(LclContext), | |||
| decode_init, | |||
| NULL, | |||
| decode_end, | |||
| decode_frame, | |||
| CODEC_CAP_DR1, | |||
| "mszh", | |||
| CODEC_TYPE_VIDEO, | |||
| CODEC_ID_MSZH, | |||
| sizeof(LclContext), | |||
| decode_init, | |||
| NULL, | |||
| decode_end, | |||
| decode_frame, | |||
| CODEC_CAP_DR1, | |||
| }; | |||
| AVCodec zlib_decoder = { | |||
| "zlib", | |||
| CODEC_TYPE_VIDEO, | |||
| CODEC_ID_ZLIB, | |||
| sizeof(LclContext), | |||
| decode_init, | |||
| NULL, | |||
| decode_end, | |||
| decode_frame, | |||
| CODEC_CAP_DR1, | |||
| "zlib", | |||
| CODEC_TYPE_VIDEO, | |||
| CODEC_ID_ZLIB, | |||
| sizeof(LclContext), | |||
| decode_init, | |||
| NULL, | |||
| decode_end, | |||
| decode_frame, | |||
| CODEC_CAP_DR1, | |||
| }; | |||
| #ifdef CONFIG_ENCODERS | |||
| @@ -42,7 +42,7 @@ void pp_postprocess(uint8_t * src[3], int srcStride[3], | |||
| uint8_t * dst[3], int dstStride[3], | |||
| int horizontalSize, int verticalSize, | |||
| QP_STORE_T *QP_store, int QP_stride, | |||
| pp_mode_t *mode, pp_context_t *ppContext, int pict_type); | |||
| pp_mode_t *mode, pp_context_t *ppContext, int pict_type); | |||
| /** | |||
| @@ -26,35 +26,35 @@ | |||
| #endif | |||
| #define ALTIVEC_TRANSPOSE_8x8_SHORT(src_a,src_b,src_c,src_d,src_e,src_f,src_g,src_h) \ | |||
| do { \ | |||
| __typeof__(src_a) tempA1, tempB1, tempC1, tempD1; \ | |||
| __typeof__(src_a) tempE1, tempF1, tempG1, tempH1; \ | |||
| __typeof__(src_a) tempA2, tempB2, tempC2, tempD2; \ | |||
| __typeof__(src_a) tempE2, tempF2, tempG2, tempH2; \ | |||
| tempA1 = vec_mergeh (src_a, src_e); \ | |||
| tempB1 = vec_mergel (src_a, src_e); \ | |||
| tempC1 = vec_mergeh (src_b, src_f); \ | |||
| tempD1 = vec_mergel (src_b, src_f); \ | |||
| tempE1 = vec_mergeh (src_c, src_g); \ | |||
| tempF1 = vec_mergel (src_c, src_g); \ | |||
| tempG1 = vec_mergeh (src_d, src_h); \ | |||
| tempH1 = vec_mergel (src_d, src_h); \ | |||
| tempA2 = vec_mergeh (tempA1, tempE1); \ | |||
| tempB2 = vec_mergel (tempA1, tempE1); \ | |||
| tempC2 = vec_mergeh (tempB1, tempF1); \ | |||
| tempD2 = vec_mergel (tempB1, tempF1); \ | |||
| tempE2 = vec_mergeh (tempC1, tempG1); \ | |||
| tempF2 = vec_mergel (tempC1, tempG1); \ | |||
| tempG2 = vec_mergeh (tempD1, tempH1); \ | |||
| tempH2 = vec_mergel (tempD1, tempH1); \ | |||
| src_a = vec_mergeh (tempA2, tempE2); \ | |||
| src_b = vec_mergel (tempA2, tempE2); \ | |||
| src_c = vec_mergeh (tempB2, tempF2); \ | |||
| src_d = vec_mergel (tempB2, tempF2); \ | |||
| src_e = vec_mergeh (tempC2, tempG2); \ | |||
| src_f = vec_mergel (tempC2, tempG2); \ | |||
| src_g = vec_mergeh (tempD2, tempH2); \ | |||
| src_h = vec_mergel (tempD2, tempH2); \ | |||
| do { \ | |||
| __typeof__(src_a) tempA1, tempB1, tempC1, tempD1; \ | |||
| __typeof__(src_a) tempE1, tempF1, tempG1, tempH1; \ | |||
| __typeof__(src_a) tempA2, tempB2, tempC2, tempD2; \ | |||
| __typeof__(src_a) tempE2, tempF2, tempG2, tempH2; \ | |||
| tempA1 = vec_mergeh (src_a, src_e); \ | |||
| tempB1 = vec_mergel (src_a, src_e); \ | |||
| tempC1 = vec_mergeh (src_b, src_f); \ | |||
| tempD1 = vec_mergel (src_b, src_f); \ | |||
| tempE1 = vec_mergeh (src_c, src_g); \ | |||
| tempF1 = vec_mergel (src_c, src_g); \ | |||
| tempG1 = vec_mergeh (src_d, src_h); \ | |||
| tempH1 = vec_mergel (src_d, src_h); \ | |||
| tempA2 = vec_mergeh (tempA1, tempE1); \ | |||
| tempB2 = vec_mergel (tempA1, tempE1); \ | |||
| tempC2 = vec_mergeh (tempB1, tempF1); \ | |||
| tempD2 = vec_mergel (tempB1, tempF1); \ | |||
| tempE2 = vec_mergeh (tempC1, tempG1); \ | |||
| tempF2 = vec_mergel (tempC1, tempG1); \ | |||
| tempG2 = vec_mergeh (tempD1, tempH1); \ | |||
| tempH2 = vec_mergel (tempD1, tempH1); \ | |||
| src_a = vec_mergeh (tempA2, tempE2); \ | |||
| src_b = vec_mergel (tempA2, tempE2); \ | |||
| src_c = vec_mergeh (tempB2, tempF2); \ | |||
| src_d = vec_mergel (tempB2, tempF2); \ | |||
| src_e = vec_mergeh (tempC2, tempG2); \ | |||
| src_f = vec_mergel (tempC2, tempG2); \ | |||
| src_g = vec_mergeh (tempD2, tempH2); \ | |||
| src_h = vec_mergel (tempD2, tempH2); \ | |||
| } while (0) | |||
| @@ -94,25 +94,25 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) | |||
| vector signed short v_srcAss0, v_srcAss1, v_srcAss2, v_srcAss3, v_srcAss4, v_srcAss5, v_srcAss6, v_srcAss7; | |||
| #define LOAD_LINE(i) \ | |||
| register int j##i = i * stride; \ | |||
| vector unsigned char perm##i = vec_lvsl(j##i, src2); \ | |||
| const vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \ | |||
| vector unsigned char v_srcA2##i; \ | |||
| if (two_vectors) \ | |||
| v_srcA2##i = vec_ld(j##i + 16, src2); \ | |||
| const vector unsigned char v_srcA##i = \ | |||
| vec_perm(v_srcA1##i, v_srcA2##i, perm##i); \ | |||
| #define LOAD_LINE(i) \ | |||
| register int j##i = i * stride; \ | |||
| vector unsigned char perm##i = vec_lvsl(j##i, src2); \ | |||
| const vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \ | |||
| vector unsigned char v_srcA2##i; \ | |||
| if (two_vectors) \ | |||
| v_srcA2##i = vec_ld(j##i + 16, src2); \ | |||
| const vector unsigned char v_srcA##i = \ | |||
| vec_perm(v_srcA1##i, v_srcA2##i, perm##i); \ | |||
| v_srcAss##i = \ | |||
| (vector signed short)vec_mergeh((vector signed char)zero, \ | |||
| (vector signed char)v_srcA##i) | |||
| (vector signed short)vec_mergeh((vector signed char)zero, \ | |||
| (vector signed char)v_srcA##i) | |||
| #define LOAD_LINE_ALIGNED(i) \ | |||
| register int j##i = i * stride; \ | |||
| const vector unsigned char v_srcA##i = vec_ld(j##i, src2); \ | |||
| v_srcAss##i = \ | |||
| (vector signed short)vec_mergeh((vector signed char)zero, \ | |||
| (vector signed char)v_srcA##i) | |||
| (vector signed short)vec_mergeh((vector signed char)zero, \ | |||
| (vector signed char)v_srcA##i) | |||
| // special casing the aligned case is worthwhile, as all call from | |||
| // the (transposed) horizontable deblocks will be aligned, i naddition | |||
| @@ -139,15 +139,15 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) | |||
| #undef LOAD_LINE | |||
| #undef LOAD_LINE_ALIGNED | |||
| #define ITER(i, j) \ | |||
| const vector signed short v_diff##i = \ | |||
| vec_sub(v_srcAss##i, v_srcAss##j); \ | |||
| const vector signed short v_sum##i = \ | |||
| vec_add(v_diff##i, v_dcOffset); \ | |||
| const vector signed short v_comp##i = \ | |||
| (vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \ | |||
| v_dcThreshold); \ | |||
| const vector signed short v_part##i = vec_and(mask, v_comp##i); \ | |||
| #define ITER(i, j) \ | |||
| const vector signed short v_diff##i = \ | |||
| vec_sub(v_srcAss##i, v_srcAss##j); \ | |||
| const vector signed short v_sum##i = \ | |||
| vec_add(v_diff##i, v_dcOffset); \ | |||
| const vector signed short v_comp##i = \ | |||
| (vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \ | |||
| v_dcThreshold); \ | |||
| const vector signed short v_part##i = vec_and(mask, v_comp##i); \ | |||
| v_numEq = vec_sum4s(v_part##i, v_numEq); | |||
| ITER(0, 1); | |||
| @@ -167,13 +167,13 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) | |||
| if (numEq > c->ppMode.flatnessThreshold) | |||
| { | |||
| const vector unsigned char mmoP1 = (const vector unsigned char) | |||
| AVV(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, | |||
| 0x00, 0x01, 0x12, 0x13, 0x08, 0x09, 0x1A, 0x1B); | |||
| AVV(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, | |||
| 0x00, 0x01, 0x12, 0x13, 0x08, 0x09, 0x1A, 0x1B); | |||
| const vector unsigned char mmoP2 = (const vector unsigned char) | |||
| AVV(0x04, 0x05, 0x16, 0x17, 0x0C, 0x0D, 0x1E, 0x1F, | |||
| 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f); | |||
| AVV(0x04, 0x05, 0x16, 0x17, 0x0C, 0x0D, 0x1E, 0x1F, | |||
| 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f); | |||
| const vector unsigned char mmoP = (const vector unsigned char) | |||
| vec_lvsl(8, (unsigned char*)0); | |||
| vec_lvsl(8, (unsigned char*)0); | |||
| vector signed short mmoL1 = vec_perm(v_srcAss0, v_srcAss2, mmoP1); | |||
| vector signed short mmoL2 = vec_perm(v_srcAss4, v_srcAss6, mmoP2); | |||
| @@ -185,9 +185,9 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) | |||
| vector unsigned short mmoSum = (vector unsigned short)vec_add(mmoDiff, v2QP); | |||
| if (vec_any_gt(mmoSum, v4QP)) | |||
| return 0; | |||
| return 0; | |||
| else | |||
| return 1; | |||
| return 1; | |||
| } | |||
| else return 2; | |||
| } | |||
| @@ -218,21 +218,21 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) | |||
| vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9; | |||
| #define LOAD_LINE(i) \ | |||
| const vector unsigned char perml##i = \ | |||
| vec_lvsl(i * stride, src2); \ | |||
| const vector unsigned char perml##i = \ | |||
| vec_lvsl(i * stride, src2); \ | |||
| vbA##i = vec_ld(i * stride, src2); \ | |||
| vbB##i = vec_ld(i * stride + 16, src2); \ | |||
| vbT##i = vec_perm(vbA##i, vbB##i, perml##i); \ | |||
| vb##i = \ | |||
| (vector signed short)vec_mergeh((vector unsigned char)zero, \ | |||
| (vector unsigned char)vbT##i) | |||
| (vector signed short)vec_mergeh((vector unsigned char)zero, \ | |||
| (vector unsigned char)vbT##i) | |||
| #define LOAD_LINE_ALIGNED(i) \ | |||
| register int j##i = i * stride; \ | |||
| vbT##i = vec_ld(j##i, src2); \ | |||
| vb##i = \ | |||
| (vector signed short)vec_mergeh((vector signed char)zero, \ | |||
| (vector signed char)vbT##i) | |||
| (vector signed short)vec_mergeh((vector signed char)zero, \ | |||
| (vector signed char)vbT##i) | |||
| // special casing the aligned case is worthwhile, as all call from | |||
| // the (transposed) horizontable deblocks will be aligned, in addition | |||
| @@ -308,11 +308,11 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) | |||
| const vector signed short temp91 = vec_sub(v_sumsB8, vb5); | |||
| const vector signed short v_sumsB9 = vec_add(temp91, v_last); | |||
| #define COMPUTE_VR(i, j, k) \ | |||
| const vector signed short temps1##i = \ | |||
| vec_add(v_sumsB##i, v_sumsB##k); \ | |||
| const vector signed short temps2##i = \ | |||
| vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \ | |||
| #define COMPUTE_VR(i, j, k) \ | |||
| const vector signed short temps1##i = \ | |||
| vec_add(v_sumsB##i, v_sumsB##k); \ | |||
| const vector signed short temps2##i = \ | |||
| vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \ | |||
| const vector signed short vr##j = vec_sra(temps2##i, v_4) | |||
| COMPUTE_VR(0, 1, 2); | |||
| @@ -326,31 +326,31 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) | |||
| const vector signed char neg1 = vec_splat_s8(-1); | |||
| const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |||
| 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | |||
| #define PACK_AND_STORE(i) \ | |||
| const vector unsigned char perms##i = \ | |||
| vec_lvsr(i * stride, src2); \ | |||
| const vector unsigned char vf##i = \ | |||
| vec_packsu(vr##i, (vector signed short)zero); \ | |||
| const vector unsigned char vg##i = \ | |||
| vec_perm(vf##i, vbT##i, permHH); \ | |||
| const vector unsigned char mask##i = \ | |||
| vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \ | |||
| const vector unsigned char vg2##i = \ | |||
| vec_perm(vg##i, vg##i, perms##i); \ | |||
| const vector unsigned char svA##i = \ | |||
| vec_sel(vbA##i, vg2##i, mask##i); \ | |||
| const vector unsigned char svB##i = \ | |||
| vec_sel(vg2##i, vbB##i, mask##i); \ | |||
| vec_st(svA##i, i * stride, src2); \ | |||
| 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | |||
| #define PACK_AND_STORE(i) \ | |||
| const vector unsigned char perms##i = \ | |||
| vec_lvsr(i * stride, src2); \ | |||
| const vector unsigned char vf##i = \ | |||
| vec_packsu(vr##i, (vector signed short)zero); \ | |||
| const vector unsigned char vg##i = \ | |||
| vec_perm(vf##i, vbT##i, permHH); \ | |||
| const vector unsigned char mask##i = \ | |||
| vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \ | |||
| const vector unsigned char vg2##i = \ | |||
| vec_perm(vg##i, vg##i, perms##i); \ | |||
| const vector unsigned char svA##i = \ | |||
| vec_sel(vbA##i, vg2##i, mask##i); \ | |||
| const vector unsigned char svB##i = \ | |||
| vec_sel(vg2##i, vbB##i, mask##i); \ | |||
| vec_st(svA##i, i * stride, src2); \ | |||
| vec_st(svB##i, i * stride + 16, src2) | |||
| #define PACK_AND_STORE_ALIGNED(i) \ | |||
| const vector unsigned char vf##i = \ | |||
| vec_packsu(vr##i, (vector signed short)zero); \ | |||
| const vector unsigned char vg##i = \ | |||
| vec_perm(vf##i, vbT##i, permHH); \ | |||
| #define PACK_AND_STORE_ALIGNED(i) \ | |||
| const vector unsigned char vf##i = \ | |||
| vec_packsu(vr##i, (vector signed short)zero); \ | |||
| const vector unsigned char vg##i = \ | |||
| vec_perm(vf##i, vbT##i, permHH); \ | |||
| vec_st(vg##i, i * stride, src2) | |||
| // special casing the aligned case is worthwhile, as all call from | |||
| @@ -398,17 +398,17 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext | |||
| vqp = vec_splat(vqp, 0); | |||
| #define LOAD_LINE(i) \ | |||
| const vector unsigned char perm##i = \ | |||
| vec_lvsl(i * stride, src2); \ | |||
| const vector unsigned char vbA##i = \ | |||
| vec_ld(i * stride, src2); \ | |||
| const vector unsigned char vbB##i = \ | |||
| vec_ld(i * stride + 16, src2); \ | |||
| const vector unsigned char vbT##i = \ | |||
| vec_perm(vbA##i, vbB##i, perm##i); \ | |||
| const vector signed short vb##i = \ | |||
| (vector signed short)vec_mergeh((vector unsigned char)zero, \ | |||
| (vector unsigned char)vbT##i) | |||
| const vector unsigned char perm##i = \ | |||
| vec_lvsl(i * stride, src2); \ | |||
| const vector unsigned char vbA##i = \ | |||
| vec_ld(i * stride, src2); \ | |||
| const vector unsigned char vbB##i = \ | |||
| vec_ld(i * stride + 16, src2); \ | |||
| const vector unsigned char vbT##i = \ | |||
| vec_perm(vbA##i, vbB##i, perm##i); \ | |||
| const vector signed short vb##i = \ | |||
| (vector signed short)vec_mergeh((vector unsigned char)zero, \ | |||
| (vector unsigned char)vbT##i) | |||
| src2 += stride*3; | |||
| @@ -426,7 +426,7 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext | |||
| const vector signed short v_2 = vec_splat_s16(2); | |||
| const vector signed short v_5 = vec_splat_s16(5); | |||
| const vector signed short v_32 = vec_sl(v_1, | |||
| (vector unsigned short)v_5); | |||
| (vector unsigned short)v_5); | |||
| /* middle energy */ | |||
| const vector signed short l3minusl6 = vec_sub(vb3, vb6); | |||
| const vector signed short l5minusl4 = vec_sub(vb5, vb4); | |||
| @@ -483,22 +483,22 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext | |||
| const vector signed char neg1 = vec_splat_s8(-1); | |||
| const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |||
| 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | |||
| #define STORE(i) \ | |||
| const vector unsigned char perms##i = \ | |||
| vec_lvsr(i * stride, src2); \ | |||
| const vector unsigned char vg##i = \ | |||
| vec_perm(st##i, vbT##i, permHH); \ | |||
| const vector unsigned char mask##i = \ | |||
| vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \ | |||
| const vector unsigned char vg2##i = \ | |||
| vec_perm(vg##i, vg##i, perms##i); \ | |||
| const vector unsigned char svA##i = \ | |||
| vec_sel(vbA##i, vg2##i, mask##i); \ | |||
| const vector unsigned char svB##i = \ | |||
| vec_sel(vg2##i, vbB##i, mask##i); \ | |||
| vec_st(svA##i, i * stride, src2); \ | |||
| 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | |||
| #define STORE(i) \ | |||
| const vector unsigned char perms##i = \ | |||
| vec_lvsr(i * stride, src2); \ | |||
| const vector unsigned char vg##i = \ | |||
| vec_perm(st##i, vbT##i, permHH); \ | |||
| const vector unsigned char mask##i = \ | |||
| vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \ | |||
| const vector unsigned char vg2##i = \ | |||
| vec_perm(vg##i, vg##i, perms##i); \ | |||
| const vector unsigned char svA##i = \ | |||
| vec_sel(vbA##i, vg2##i, mask##i); \ | |||
| const vector unsigned char svB##i = \ | |||
| vec_sel(vg2##i, vbB##i, mask##i); \ | |||
| vec_st(svA##i, i * stride, src2); \ | |||
| vec_st(svB##i, i * stride + 16, src2) | |||
| STORE(4); | |||
| @@ -522,11 +522,11 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||
| dt[0] = deringThreshold; | |||
| v_dt = vec_splat(vec_ld(0, dt), 0); | |||
| #define LOAD_LINE(i) \ | |||
| const vector unsigned char perm##i = \ | |||
| vec_lvsl(i * stride, srcCopy); \ | |||
| vector unsigned char sA##i = vec_ld(i * stride, srcCopy); \ | |||
| vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy); \ | |||
| #define LOAD_LINE(i) \ | |||
| const vector unsigned char perm##i = \ | |||
| vec_lvsl(i * stride, srcCopy); \ | |||
| vector unsigned char sA##i = vec_ld(i * stride, srcCopy); \ | |||
| vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy); \ | |||
| vector unsigned char src##i = vec_perm(sA##i, sB##i, perm##i) | |||
| LOAD_LINE(0); | |||
| @@ -545,13 +545,13 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||
| { | |||
| const vector unsigned char trunc_perm = (vector unsigned char) | |||
| AVV(0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, | |||
| 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18); | |||
| 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18); | |||
| const vector unsigned char trunc_src12 = vec_perm(src1, src2, trunc_perm); | |||
| const vector unsigned char trunc_src34 = vec_perm(src3, src4, trunc_perm); | |||
| const vector unsigned char trunc_src56 = vec_perm(src5, src6, trunc_perm); | |||
| const vector unsigned char trunc_src78 = vec_perm(src7, src8, trunc_perm); | |||
| #define EXTRACT(op) do { \ | |||
| #define EXTRACT(op) do { \ | |||
| const vector unsigned char s##op##_1 = vec_##op(trunc_src12, trunc_src34); \ | |||
| const vector unsigned char s##op##_2 = vec_##op(trunc_src56, trunc_src78); \ | |||
| const vector unsigned char s##op##_6 = vec_##op(s##op##_1, s##op##_2); \ | |||
| @@ -584,29 +584,29 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||
| { | |||
| const vector unsigned short mask1 = (vector unsigned short) | |||
| AVV(0x0001, 0x0002, 0x0004, 0x0008, | |||
| 0x0010, 0x0020, 0x0040, 0x0080); | |||
| 0x0010, 0x0020, 0x0040, 0x0080); | |||
| const vector unsigned short mask2 = (vector unsigned short) | |||
| AVV(0x0100, 0x0200, 0x0000, 0x0000, | |||
| 0x0000, 0x0000, 0x0000, 0x0000); | |||
| 0x0000, 0x0000, 0x0000, 0x0000); | |||
| const vector unsigned int vuint32_16 = vec_sl(vec_splat_u32(1), vec_splat_u32(4)); | |||
| const vector unsigned int vuint32_1 = vec_splat_u32(1); | |||
| #define COMPARE(i) \ | |||
| vector signed int sum##i; \ | |||
| do { \ | |||
| const vector unsigned char cmp##i = \ | |||
| (vector unsigned char)vec_cmpgt(src##i, v_avg); \ | |||
| const vector unsigned short cmpHi##i = \ | |||
| (vector unsigned short)vec_mergeh(cmp##i, cmp##i); \ | |||
| const vector unsigned short cmpLi##i = \ | |||
| (vector unsigned short)vec_mergel(cmp##i, cmp##i); \ | |||
| const vector signed short cmpHf##i = \ | |||
| (vector signed short)vec_and(cmpHi##i, mask1); \ | |||
| const vector signed short cmpLf##i = \ | |||
| (vector signed short)vec_and(cmpLi##i, mask2); \ | |||
| const vector signed int sump##i = vec_sum4s(cmpHf##i, zero); \ | |||
| const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i); \ | |||
| #define COMPARE(i) \ | |||
| vector signed int sum##i; \ | |||
| do { \ | |||
| const vector unsigned char cmp##i = \ | |||
| (vector unsigned char)vec_cmpgt(src##i, v_avg); \ | |||
| const vector unsigned short cmpHi##i = \ | |||
| (vector unsigned short)vec_mergeh(cmp##i, cmp##i); \ | |||
| const vector unsigned short cmpLi##i = \ | |||
| (vector unsigned short)vec_mergel(cmp##i, cmp##i); \ | |||
| const vector signed short cmpHf##i = \ | |||
| (vector signed short)vec_and(cmpHi##i, mask1); \ | |||
| const vector signed short cmpLf##i = \ | |||
| (vector signed short)vec_and(cmpLi##i, mask2); \ | |||
| const vector signed int sump##i = vec_sum4s(cmpHf##i, zero); \ | |||
| const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i); \ | |||
| sum##i = vec_sums(sumq##i, zero); } while (0) | |||
| COMPARE(0); | |||
| @@ -643,11 +643,11 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||
| const vector signed int t2B = vec_or(sumB, tB); | |||
| const vector signed int t2C = vec_or(sumC, tC); | |||
| const vector signed int t3A = vec_and(vec_sra(t2A, vuint32_1), | |||
| vec_sl(t2A, vuint32_1)); | |||
| vec_sl(t2A, vuint32_1)); | |||
| const vector signed int t3B = vec_and(vec_sra(t2B, vuint32_1), | |||
| vec_sl(t2B, vuint32_1)); | |||
| vec_sl(t2B, vuint32_1)); | |||
| const vector signed int t3C = vec_and(vec_sra(t2C, vuint32_1), | |||
| vec_sl(t2C, vuint32_1)); | |||
| vec_sl(t2C, vuint32_1)); | |||
| const vector signed int yA = vec_and(t2A, t3A); | |||
| const vector signed int yB = vec_and(t2B, t3B); | |||
| const vector signed int yC = vec_and(t2C, t3C); | |||
| @@ -659,15 +659,15 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||
| const vector signed int sumBd4 = vec_perm(yB, yC, strangeperm1); | |||
| const vector signed int sumBd8 = vec_perm(yB, yC, strangeperm2); | |||
| const vector signed int sumAp = vec_and(yA, | |||
| vec_and(sumAd4,sumAd8)); | |||
| vec_and(sumAd4,sumAd8)); | |||
| const vector signed int sumBp = vec_and(yB, | |||
| vec_and(sumBd4,sumBd8)); | |||
| vec_and(sumBd4,sumBd8)); | |||
| sumA2 = vec_or(sumAp, | |||
| vec_sra(sumAp, | |||
| vuint32_16)); | |||
| vec_sra(sumAp, | |||
| vuint32_16)); | |||
| sumB2 = vec_or(sumBp, | |||
| vec_sra(sumBp, | |||
| vuint32_16)); | |||
| vec_sra(sumBp, | |||
| vuint32_16)); | |||
| } | |||
| vec_st(sumA2, 0, S); | |||
| vec_st(sumB2, 16, S); | |||
| @@ -686,84 +686,84 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||
| const vector unsigned char permA1 = (vector unsigned char) | |||
| AVV(0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F, | |||
| 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F); | |||
| 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F); | |||
| const vector unsigned char permA2 = (vector unsigned char) | |||
| AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11, | |||
| 0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F); | |||
| 0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F); | |||
| const vector unsigned char permA1inc = (vector unsigned char) | |||
| AVV(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, | |||
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); | |||
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); | |||
| const vector unsigned char permA2inc = (vector unsigned char) | |||
| AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, | |||
| 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); | |||
| 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); | |||
| const vector unsigned char magic = (vector unsigned char) | |||
| AVV(0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02, | |||
| 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); | |||
| 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); | |||
| const vector unsigned char extractPerm = (vector unsigned char) | |||
| AVV(0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01, | |||
| 0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01); | |||
| 0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01); | |||
| const vector unsigned char extractPermInc = (vector unsigned char) | |||
| AVV(0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, | |||
| 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01); | |||
| 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01); | |||
| const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0); | |||
| const vector unsigned char tenRight = (vector unsigned char) | |||
| AVV(0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |||
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); | |||
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); | |||
| const vector unsigned char eightLeft = (vector unsigned char) | |||
| AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |||
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08); | |||
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08); | |||
| #define F_INIT(i) \ | |||
| vector unsigned char tenRightM##i = tenRight; \ | |||
| vector unsigned char permA1M##i = permA1; \ | |||
| vector unsigned char permA2M##i = permA2; \ | |||
| #define F_INIT(i) \ | |||
| vector unsigned char tenRightM##i = tenRight; \ | |||
| vector unsigned char permA1M##i = permA1; \ | |||
| vector unsigned char permA2M##i = permA2; \ | |||
| vector unsigned char extractPermM##i = extractPerm | |||
| #define F2(i, j, k, l) \ | |||
| if (S[i] & (1 << (l+1))) { \ | |||
| const vector unsigned char a_##j##_A##l = \ | |||
| vec_perm(src##i, src##j, permA1M##i); \ | |||
| const vector unsigned char a_##j##_B##l = \ | |||
| vec_perm(a_##j##_A##l, src##k, permA2M##i); \ | |||
| const vector signed int a_##j##_sump##l = \ | |||
| (vector signed int)vec_msum(a_##j##_B##l, magic, \ | |||
| (vector unsigned int)zero); \ | |||
| vector signed int F_##j##_##l = \ | |||
| vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4); \ | |||
| F_##j##_##l = vec_splat(F_##j##_##l, 3); \ | |||
| const vector signed int p_##j##_##l = \ | |||
| (vector signed int)vec_perm(src##j, \ | |||
| (vector unsigned char)zero, \ | |||
| extractPermM##i); \ | |||
| const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2); \ | |||
| const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2); \ | |||
| vector signed int newpm_##j##_##l; \ | |||
| if (vec_all_lt(sum_##j##_##l, F_##j##_##l)) \ | |||
| newpm_##j##_##l = sum_##j##_##l; \ | |||
| else if (vec_all_gt(diff_##j##_##l, F_##j##_##l)) \ | |||
| newpm_##j##_##l = diff_##j##_##l; \ | |||
| else newpm_##j##_##l = F_##j##_##l; \ | |||
| const vector unsigned char newpm2_##j##_##l = \ | |||
| vec_splat((vector unsigned char)newpm_##j##_##l, 15); \ | |||
| const vector unsigned char mask##j##l = vec_add(identity, \ | |||
| tenRightM##i); \ | |||
| src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l); \ | |||
| } \ | |||
| permA1M##i = vec_add(permA1M##i, permA1inc); \ | |||
| permA2M##i = vec_add(permA2M##i, permA2inc); \ | |||
| tenRightM##i = vec_sro(tenRightM##i, eightLeft); \ | |||
| #define F2(i, j, k, l) \ | |||
| if (S[i] & (1 << (l+1))) { \ | |||
| const vector unsigned char a_##j##_A##l = \ | |||
| vec_perm(src##i, src##j, permA1M##i); \ | |||
| const vector unsigned char a_##j##_B##l = \ | |||
| vec_perm(a_##j##_A##l, src##k, permA2M##i); \ | |||
| const vector signed int a_##j##_sump##l = \ | |||
| (vector signed int)vec_msum(a_##j##_B##l, magic, \ | |||
| (vector unsigned int)zero); \ | |||
| vector signed int F_##j##_##l = \ | |||
| vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4); \ | |||
| F_##j##_##l = vec_splat(F_##j##_##l, 3); \ | |||
| const vector signed int p_##j##_##l = \ | |||
| (vector signed int)vec_perm(src##j, \ | |||
| (vector unsigned char)zero, \ | |||
| extractPermM##i); \ | |||
| const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2);\ | |||
| const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2);\ | |||
| vector signed int newpm_##j##_##l; \ | |||
| if (vec_all_lt(sum_##j##_##l, F_##j##_##l)) \ | |||
| newpm_##j##_##l = sum_##j##_##l; \ | |||
| else if (vec_all_gt(diff_##j##_##l, F_##j##_##l)) \ | |||
| newpm_##j##_##l = diff_##j##_##l; \ | |||
| else newpm_##j##_##l = F_##j##_##l; \ | |||
| const vector unsigned char newpm2_##j##_##l = \ | |||
| vec_splat((vector unsigned char)newpm_##j##_##l, 15); \ | |||
| const vector unsigned char mask##j##l = vec_add(identity, \ | |||
| tenRightM##i); \ | |||
| src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l); \ | |||
| } \ | |||
| permA1M##i = vec_add(permA1M##i, permA1inc); \ | |||
| permA2M##i = vec_add(permA2M##i, permA2inc); \ | |||
| tenRightM##i = vec_sro(tenRightM##i, eightLeft); \ | |||
| extractPermM##i = vec_add(extractPermM##i, extractPermInc) | |||
| #define ITER(i, j, k) \ | |||
| F_INIT(i); \ | |||
| F2(i, j, k, 0); \ | |||
| F2(i, j, k, 1); \ | |||
| F2(i, j, k, 2); \ | |||
| F2(i, j, k, 3); \ | |||
| F2(i, j, k, 4); \ | |||
| F2(i, j, k, 5); \ | |||
| F2(i, j, k, 6); \ | |||
| #define ITER(i, j, k) \ | |||
| F_INIT(i); \ | |||
| F2(i, j, k, 0); \ | |||
| F2(i, j, k, 1); \ | |||
| F2(i, j, k, 2); \ | |||
| F2(i, j, k, 3); \ | |||
| F2(i, j, k, 4); \ | |||
| F2(i, j, k, 5); \ | |||
| F2(i, j, k, 6); \ | |||
| F2(i, j, k, 7) | |||
| ITER(0, 1, 2); | |||
| @@ -777,16 +777,16 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||
| const vector signed char neg1 = vec_splat_s8(-1); | |||
| #define STORE_LINE(i) \ | |||
| const vector unsigned char permST##i = \ | |||
| vec_lvsr(i * stride, srcCopy); \ | |||
| const vector unsigned char maskST##i = \ | |||
| vec_perm((vector unsigned char)zero, \ | |||
| (vector unsigned char)neg1, permST##i); \ | |||
| src##i = vec_perm(src##i ,src##i, permST##i); \ | |||
| sA##i= vec_sel(sA##i, src##i, maskST##i); \ | |||
| sB##i= vec_sel(src##i, sB##i, maskST##i); \ | |||
| vec_st(sA##i, i * stride, srcCopy); \ | |||
| #define STORE_LINE(i) \ | |||
| const vector unsigned char permST##i = \ | |||
| vec_lvsr(i * stride, srcCopy); \ | |||
| const vector unsigned char maskST##i = \ | |||
| vec_perm((vector unsigned char)zero, \ | |||
| (vector unsigned char)neg1, permST##i); \ | |||
| src##i = vec_perm(src##i ,src##i, permST##i); \ | |||
| sA##i= vec_sel(sA##i, src##i, maskST##i); \ | |||
| sB##i= vec_sel(src##i, sB##i, maskST##i); \ | |||
| vec_st(sA##i, i * stride, srcCopy); \ | |||
| vec_st(sB##i, i * stride + 16, srcCopy) | |||
| STORE_LINE(1); | |||
| @@ -808,7 +808,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||
| #define do_a_deblock_altivec(a...) do_a_deblock_C(a) | |||
| static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||
| uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) | |||
| uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) | |||
| { | |||
| const vector signed int zero = vec_splat_s32(0); | |||
| const vector signed short vsint16_1 = vec_splat_s16(1); | |||
| @@ -820,16 +820,16 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||
| tempBluredPast[128]= maxNoise[1]; | |||
| tempBluredPast[129]= maxNoise[2]; | |||
| #define LOAD_LINE(src, i) \ | |||
| register int j##src##i = i * stride; \ | |||
| vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \ | |||
| const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \ | |||
| #define LOAD_LINE(src, i) \ | |||
| register int j##src##i = i * stride; \ | |||
| vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \ | |||
| const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \ | |||
| const vector unsigned char v_##src##A2##i = vec_ld(j##src##i + 16, src); \ | |||
| const vector unsigned char v_##src##A##i = \ | |||
| vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i); \ | |||
| vector signed short v_##src##Ass##i = \ | |||
| (vector signed short)vec_mergeh((vector signed char)zero, \ | |||
| (vector signed char)v_##src##A##i) | |||
| const vector unsigned char v_##src##A##i = \ | |||
| vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i); \ | |||
| vector signed short v_##src##Ass##i = \ | |||
| (vector signed short)vec_mergeh((vector signed char)zero, \ | |||
| (vector signed char)v_##src##A##i) | |||
| LOAD_LINE(src, 0); | |||
| LOAD_LINE(src, 1); | |||
| @@ -850,10 +850,10 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||
| LOAD_LINE(tempBlured, 7); | |||
| #undef LOAD_LINE | |||
| #define ACCUMULATE_DIFFS(i) \ | |||
| vector signed short v_d##i = vec_sub(v_tempBluredAss##i, \ | |||
| v_srcAss##i); \ | |||
| v_dp = vec_msums(v_d##i, v_d##i, v_dp); \ | |||
| #define ACCUMULATE_DIFFS(i) \ | |||
| vector signed short v_d##i = vec_sub(v_tempBluredAss##i, \ | |||
| v_srcAss##i); \ | |||
| v_dp = vec_msums(v_d##i, v_d##i, v_dp); \ | |||
| v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp) | |||
| ACCUMULATE_DIFFS(0); | |||
| @@ -916,12 +916,12 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||
| const vector signed short vsint16_4 = vec_splat_s16(4); | |||
| const vector unsigned short vuint16_3 = vec_splat_u16(3); | |||
| #define OP(i) \ | |||
| const vector signed short v_temp##i = \ | |||
| vec_mladd(v_tempBluredAss##i, \ | |||
| vsint16_7, v_srcAss##i); \ | |||
| const vector signed short v_temp2##i = \ | |||
| vec_add(v_temp##i, vsint16_4); \ | |||
| #define OP(i) \ | |||
| const vector signed short v_temp##i = \ | |||
| vec_mladd(v_tempBluredAss##i, \ | |||
| vsint16_7, v_srcAss##i); \ | |||
| const vector signed short v_temp2##i = \ | |||
| vec_add(v_temp##i, vsint16_4); \ | |||
| v_tempBluredAss##i = vec_sr(v_temp2##i, vuint16_3) | |||
| OP(0); | |||
| @@ -937,12 +937,12 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||
| const vector signed short vsint16_3 = vec_splat_s16(3); | |||
| const vector signed short vsint16_2 = vec_splat_s16(2); | |||
| #define OP(i) \ | |||
| const vector signed short v_temp##i = \ | |||
| vec_mladd(v_tempBluredAss##i, \ | |||
| vsint16_3, v_srcAss##i); \ | |||
| const vector signed short v_temp2##i = \ | |||
| vec_add(v_temp##i, vsint16_2); \ | |||
| #define OP(i) \ | |||
| const vector signed short v_temp##i = \ | |||
| vec_mladd(v_tempBluredAss##i, \ | |||
| vsint16_3, v_srcAss##i); \ | |||
| const vector signed short v_temp2##i = \ | |||
| vec_add(v_temp##i, vsint16_2); \ | |||
| v_tempBluredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2) | |||
| OP(0); | |||
| @@ -959,24 +959,24 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||
| const vector signed char neg1 = vec_splat_s8(-1); | |||
| const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |||
| 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | |||
| #define PACK_AND_STORE(src, i) \ | |||
| const vector unsigned char perms##src##i = \ | |||
| vec_lvsr(i * stride, src); \ | |||
| const vector unsigned char vf##src##i = \ | |||
| vec_packsu(v_tempBluredAss##i, (vector signed short)zero); \ | |||
| const vector unsigned char vg##src##i = \ | |||
| vec_perm(vf##src##i, v_##src##A##i, permHH); \ | |||
| const vector unsigned char mask##src##i = \ | |||
| 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | |||
| #define PACK_AND_STORE(src, i) \ | |||
| const vector unsigned char perms##src##i = \ | |||
| vec_lvsr(i * stride, src); \ | |||
| const vector unsigned char vf##src##i = \ | |||
| vec_packsu(v_tempBluredAss##i, (vector signed short)zero); \ | |||
| const vector unsigned char vg##src##i = \ | |||
| vec_perm(vf##src##i, v_##src##A##i, permHH); \ | |||
| const vector unsigned char mask##src##i = \ | |||
| vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \ | |||
| const vector unsigned char vg2##src##i = \ | |||
| vec_perm(vg##src##i, vg##src##i, perms##src##i); \ | |||
| const vector unsigned char svA##src##i = \ | |||
| vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i); \ | |||
| const vector unsigned char svB##src##i = \ | |||
| vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \ | |||
| vec_st(svA##src##i, i * stride, src); \ | |||
| const vector unsigned char vg2##src##i = \ | |||
| vec_perm(vg##src##i, vg##src##i, perms##src##i); \ | |||
| const vector unsigned char svA##src##i = \ | |||
| vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i); \ | |||
| const vector unsigned char svB##src##i = \ | |||
| vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \ | |||
| vec_st(svA##src##i, i * stride, src); \ | |||
| vec_st(svB##src##i, i * stride + 16, src) | |||
| PACK_AND_STORE(src, 0); | |||
| @@ -1001,14 +1001,14 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | |||
| static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) { | |||
| const vector unsigned char zero = vec_splat_u8(0); | |||
| #define LOAD_DOUBLE_LINE(i, j) \ | |||
| vector unsigned char perm1##i = vec_lvsl(i * stride, src); \ | |||
| vector unsigned char perm2##i = vec_lvsl(j * stride, src); \ | |||
| vector unsigned char srcA##i = vec_ld(i * stride, src); \ | |||
| #define LOAD_DOUBLE_LINE(i, j) \ | |||
| vector unsigned char perm1##i = vec_lvsl(i * stride, src); \ | |||
| vector unsigned char perm2##i = vec_lvsl(j * stride, src); \ | |||
| vector unsigned char srcA##i = vec_ld(i * stride, src); \ | |||
| vector unsigned char srcB##i = vec_ld(i * stride + 16, src); \ | |||
| vector unsigned char srcC##i = vec_ld(j * stride, src); \ | |||
| vector unsigned char srcC##i = vec_ld(j * stride, src); \ | |||
| vector unsigned char srcD##i = vec_ld(j * stride+ 16, src); \ | |||
| vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i); \ | |||
| vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i); \ | |||
| vector unsigned char src##j = vec_perm(srcC##i, srcD##i, perm2##i) | |||
| LOAD_DOUBLE_LINE(0, 1); | |||
| @@ -1107,10 +1107,10 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds | |||
| const vector unsigned char zero = vec_splat_u8(0); | |||
| const vector unsigned char magic_perm = (const vector unsigned char) | |||
| AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |||
| 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | |||
| 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | |||
| #define LOAD_DOUBLE_LINE(i, j) \ | |||
| vector unsigned char src##i = vec_ld(i * 16, src); \ | |||
| #define LOAD_DOUBLE_LINE(i, j) \ | |||
| vector unsigned char src##i = vec_ld(i * 16, src); \ | |||
| vector unsigned char src##j = vec_ld(j * 16, src) | |||
| LOAD_DOUBLE_LINE(0, 1); | |||
| @@ -1169,24 +1169,24 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds | |||
| const vector signed char neg1 = vec_splat_s8(-1); | |||
| #define STORE_DOUBLE_LINE(i, j) \ | |||
| vector unsigned char dstA##i = vec_ld(i * stride, dst); \ | |||
| vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \ | |||
| vector unsigned char dstA##j = vec_ld(j * stride, dst); \ | |||
| vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst); \ | |||
| vector unsigned char align##i = vec_lvsr(i * stride, dst); \ | |||
| vector unsigned char align##j = vec_lvsr(j * stride, dst); \ | |||
| vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \ | |||
| vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \ | |||
| vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i); \ | |||
| vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j); \ | |||
| vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \ | |||
| vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \ | |||
| vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \ | |||
| vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j); \ | |||
| vec_st(dstAF##i, i * stride, dst); \ | |||
| vec_st(dstBF##i, i * stride + 16, dst); \ | |||
| vec_st(dstAF##j, j * stride, dst); \ | |||
| #define STORE_DOUBLE_LINE(i, j) \ | |||
| vector unsigned char dstA##i = vec_ld(i * stride, dst); \ | |||
| vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \ | |||
| vector unsigned char dstA##j = vec_ld(j * stride, dst); \ | |||
| vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst); \ | |||
| vector unsigned char align##i = vec_lvsr(i * stride, dst); \ | |||
| vector unsigned char align##j = vec_lvsr(j * stride, dst); \ | |||
| vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \ | |||
| vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \ | |||
| vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i); \ | |||
| vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j); \ | |||
| vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \ | |||
| vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \ | |||
| vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \ | |||
| vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j); \ | |||
| vec_st(dstAF##i, i * stride, dst); \ | |||
| vec_st(dstBF##i, i * stride + 16, dst); \ | |||
| vec_st(dstAF##j, j * stride, dst); \ | |||
| vec_st(dstBF##j, j * stride + 16, dst) | |||
| STORE_DOUBLE_LINE(0,1); | |||
| @@ -21,42 +21,42 @@ | |||
| * internal api header. | |||
| */ | |||
| #define V_DEBLOCK 0x01 | |||
| #define H_DEBLOCK 0x02 | |||
| #define DERING 0x04 | |||
| #define LEVEL_FIX 0x08 ///< Brightness & Contrast | |||
| #define LUM_V_DEBLOCK V_DEBLOCK // 1 | |||
| #define LUM_H_DEBLOCK H_DEBLOCK // 2 | |||
| #define CHROM_V_DEBLOCK (V_DEBLOCK<<4) // 16 | |||
| #define CHROM_H_DEBLOCK (H_DEBLOCK<<4) // 32 | |||
| #define LUM_DERING DERING // 4 | |||
| #define CHROM_DERING (DERING<<4) // 64 | |||
| #define LUM_LEVEL_FIX LEVEL_FIX // 8 | |||
| #define CHROM_LEVEL_FIX (LEVEL_FIX<<4) // 128 (not implemented yet) | |||
| #define V_DEBLOCK 0x01 | |||
| #define H_DEBLOCK 0x02 | |||
| #define DERING 0x04 | |||
| #define LEVEL_FIX 0x08 ///< Brightness & Contrast | |||
| #define LUM_V_DEBLOCK V_DEBLOCK // 1 | |||
| #define LUM_H_DEBLOCK H_DEBLOCK // 2 | |||
| #define CHROM_V_DEBLOCK (V_DEBLOCK<<4) // 16 | |||
| #define CHROM_H_DEBLOCK (H_DEBLOCK<<4) // 32 | |||
| #define LUM_DERING DERING // 4 | |||
| #define CHROM_DERING (DERING<<4) // 64 | |||
| #define LUM_LEVEL_FIX LEVEL_FIX // 8 | |||
| #define CHROM_LEVEL_FIX (LEVEL_FIX<<4) // 128 (not implemented yet) | |||
| // Experimental vertical filters | |||
| #define V_X1_FILTER 0x0200 // 512 | |||
| #define V_A_DEBLOCK 0x0400 | |||
| #define V_X1_FILTER 0x0200 // 512 | |||
| #define V_A_DEBLOCK 0x0400 | |||
| // Experimental horizontal filters | |||
| #define H_X1_FILTER 0x2000 // 8192 | |||
| #define H_A_DEBLOCK 0x4000 | |||
| #define H_X1_FILTER 0x2000 // 8192 | |||
| #define H_A_DEBLOCK 0x4000 | |||
| /// select between full y range (255-0) or standart one (234-16) | |||
| #define FULL_Y_RANGE 0x8000 // 32768 | |||
| #define FULL_Y_RANGE 0x8000 // 32768 | |||
| //Deinterlacing Filters | |||
| #define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536 | |||
| #define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072 | |||
| #define CUBIC_BLEND_DEINT_FILTER 0x8000 // (not implemented yet) | |||
| #define CUBIC_IPOL_DEINT_FILTER 0x40000 // 262144 | |||
| #define MEDIAN_DEINT_FILTER 0x80000 // 524288 | |||
| #define FFMPEG_DEINT_FILTER 0x400000 | |||
| #define LOWPASS5_DEINT_FILTER 0x800000 | |||
| #define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536 | |||
| #define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072 | |||
| #define CUBIC_BLEND_DEINT_FILTER 0x8000 // (not implemented yet) | |||
| #define CUBIC_IPOL_DEINT_FILTER 0x40000 // 262144 | |||
| #define MEDIAN_DEINT_FILTER 0x80000 // 524288 | |||
| #define FFMPEG_DEINT_FILTER 0x400000 | |||
| #define LOWPASS5_DEINT_FILTER 0x800000 | |||
| #define TEMP_NOISE_FILTER 0x100000 | |||
| #define FORCE_QUANT 0x200000 | |||
| #define TEMP_NOISE_FILTER 0x100000 | |||
| #define FORCE_QUANT 0x200000 | |||
| //use if u want a faster postprocessing code | |||
| //cant differentiate between chroma & luma filters (both on or both off) | |||
| @@ -66,8 +66,8 @@ | |||
| #if 1 | |||
| static inline int CLIP(int a){ | |||
| if(a&256) return ((a)>>31)^(-1); | |||
| else return a; | |||
| if(a&256) return ((a)>>31)^(-1); | |||
| else return a; | |||
| } | |||
| //#define CLIP(a) (((a)&256) ? ((a)>>31)^(-1) : (a)) | |||
| #elif 0 | |||
| @@ -79,92 +79,92 @@ static inline int CLIP(int a){ | |||
| * Postprocessng filter. | |||
| */ | |||
| struct PPFilter{ | |||
| char *shortName; | |||
| char *longName; | |||
| int chromDefault; ///< is chrominance filtering on by default if this filter is manually activated | |||
| int minLumQuality; ///< minimum quality to turn luminance filtering on | |||
| int minChromQuality; ///< minimum quality to turn chrominance filtering on | |||
| int mask; ///< Bitmask to turn this filter on | |||
| char *shortName; | |||
| char *longName; | |||
| int chromDefault; ///< is chrominance filtering on by default if this filter is manually activated | |||
| int minLumQuality; ///< minimum quality to turn luminance filtering on | |||
| int minChromQuality; ///< minimum quality to turn chrominance filtering on | |||
| int mask; ///< Bitmask to turn this filter on | |||
| }; | |||
| /** | |||
| * Postprocessng mode. | |||
| */ | |||
| typedef struct PPMode{ | |||
| int lumMode; ///< acivates filters for luminance | |||
| int chromMode; ///< acivates filters for chrominance | |||
| int error; ///< non zero on error | |||
| int lumMode; ///< acivates filters for luminance | |||
| int chromMode; ///< acivates filters for chrominance | |||
| int error; ///< non zero on error | |||
| int minAllowedY; ///< for brigtness correction | |||
| int maxAllowedY; ///< for brihtness correction | |||
| float maxClippedThreshold; ///< amount of "black" u r willing to loose to get a brightness corrected picture | |||
| int minAllowedY; ///< for brigtness correction | |||
| int maxAllowedY; ///< for brihtness correction | |||
| float maxClippedThreshold; ///< amount of "black" u r willing to loose to get a brightness corrected picture | |||
| int maxTmpNoise[3]; ///< for Temporal Noise Reducing filter (Maximal sum of abs differences) | |||
| int maxTmpNoise[3]; ///< for Temporal Noise Reducing filter (Maximal sum of abs differences) | |||
| int baseDcDiff; | |||
| int flatnessThreshold; | |||
| int baseDcDiff; | |||
| int flatnessThreshold; | |||
| int forcedQuant; ///< quantizer if FORCE_QUANT is used | |||
| int forcedQuant; ///< quantizer if FORCE_QUANT is used | |||
| } PPMode; | |||
| /** | |||
| * postprocess context. | |||
| */ | |||
| typedef struct PPContext{ | |||
| uint8_t *tempBlocks; ///<used for the horizontal code | |||
| uint8_t *tempBlocks; ///<used for the horizontal code | |||
| /** | |||
| * luma histogram. | |||
| * we need 64bit here otherwise we'll going to have a problem | |||
| * after watching a black picture for 5 hours | |||
| */ | |||
| uint64_t *yHistogram; | |||
| /** | |||
| * luma histogram. | |||
| * we need 64bit here otherwise we'll going to have a problem | |||
| * after watching a black picture for 5 hours | |||
| */ | |||
| uint64_t *yHistogram; | |||
| uint64_t __attribute__((aligned(8))) packedYOffset; | |||
| uint64_t __attribute__((aligned(8))) packedYScale; | |||
| uint64_t __attribute__((aligned(8))) packedYOffset; | |||
| uint64_t __attribute__((aligned(8))) packedYScale; | |||
| /** Temporal noise reducing buffers */ | |||
| uint8_t *tempBlured[3]; | |||
| int32_t *tempBluredPast[3]; | |||
| /** Temporal noise reducing buffers */ | |||
| uint8_t *tempBlured[3]; | |||
| int32_t *tempBluredPast[3]; | |||
| /** Temporary buffers for handling the last row(s) */ | |||
| uint8_t *tempDst; | |||
| uint8_t *tempSrc; | |||
| /** Temporary buffers for handling the last row(s) */ | |||
| uint8_t *tempDst; | |||
| uint8_t *tempSrc; | |||
| uint8_t *deintTemp; | |||
| uint8_t *deintTemp; | |||
| uint64_t __attribute__((aligned(8))) pQPb; | |||
| uint64_t __attribute__((aligned(8))) pQPb2; | |||
| uint64_t __attribute__((aligned(8))) pQPb; | |||
| uint64_t __attribute__((aligned(8))) pQPb2; | |||
| uint64_t __attribute__((aligned(8))) mmxDcOffset[64]; | |||
| uint64_t __attribute__((aligned(8))) mmxDcThreshold[64]; | |||
| uint64_t __attribute__((aligned(8))) mmxDcOffset[64]; | |||
| uint64_t __attribute__((aligned(8))) mmxDcThreshold[64]; | |||
| QP_STORE_T *stdQPTable; ///< used to fix MPEG2 style qscale | |||
| QP_STORE_T *nonBQPTable; | |||
| QP_STORE_T *forcedQPTable; | |||
| QP_STORE_T *stdQPTable; ///< used to fix MPEG2 style qscale | |||
| QP_STORE_T *nonBQPTable; | |||
| QP_STORE_T *forcedQPTable; | |||
| int QP; | |||
| int nonBQP; | |||
| int QP; | |||
| int nonBQP; | |||
| int frameNum; | |||
| int frameNum; | |||
| int cpuCaps; | |||
| int cpuCaps; | |||
| int qpStride; ///<size of qp buffers (needed to realloc them if needed) | |||
| int stride; ///<size of some buffers (needed to realloc them if needed) | |||
| int qpStride; ///<size of qp buffers (needed to realloc them if needed) | |||
| int stride; ///<size of some buffers (needed to realloc them if needed) | |||
| int hChromaSubSample; | |||
| int vChromaSubSample; | |||
| int hChromaSubSample; | |||
| int vChromaSubSample; | |||
| PPMode ppMode; | |||
| PPMode ppMode; | |||
| } PPContext; | |||
| static inline void linecpy(void *dest, void *src, int lines, int stride) | |||
| { | |||
| if (stride > 0) { | |||
| memcpy(dest, src, lines*stride); | |||
| } else { | |||
| memcpy(dest+(lines-1)*stride, src+(lines-1)*stride, -lines*stride); | |||
| } | |||
| if (stride > 0) { | |||
| memcpy(dest, src, lines*stride); | |||
| } else { | |||
| memcpy(dest+(lines-1)*stride, src+(lines-1)*stride, -lines*stride); | |||
| } | |||
| } | |||
| @@ -64,8 +64,8 @@ void *av_malloc(unsigned int size) | |||
| Indeed, we should align it: | |||
| on 4 for 386 | |||
| on 16 for 486 | |||
| on 32 for 586, PPro - k6-III | |||
| on 64 for K7 (maybe for P3 too). | |||
| on 32 for 586, PPro - k6-III | |||
| on 64 for K7 (maybe for P3 too). | |||
| Because L1 and L2 caches are aligned on those values. | |||
| But I don't want to code such logic here! | |||
| */ | |||
| @@ -76,13 +76,13 @@ void *av_malloc(unsigned int size) | |||
| Why not larger? because i didnt see a difference in benchmarks ... | |||
| */ | |||
| /* benchmarks with p3 | |||
| memalign(64)+1 3071,3051,3032 | |||
| memalign(64)+2 3051,3032,3041 | |||
| memalign(64)+4 2911,2896,2915 | |||
| memalign(64)+8 2545,2554,2550 | |||
| memalign(64)+16 2543,2572,2563 | |||
| memalign(64)+32 2546,2545,2571 | |||
| memalign(64)+64 2570,2533,2558 | |||
| memalign(64)+1 3071,3051,3032 | |||
| memalign(64)+2 3051,3032,3041 | |||
| memalign(64)+4 2911,2896,2915 | |||
| memalign(64)+8 2545,2554,2550 | |||
| memalign(64)+16 2543,2572,2563 | |||
| memalign(64)+32 2546,2545,2571 | |||
| memalign(64)+64 2570,2533,2558 | |||
| btw, malloc seems to do 8 byte alignment by default here | |||
| */ | |||
| @@ -54,26 +54,26 @@ typedef struct MJpegContext { | |||
| /* JPEG marker codes */ | |||
| typedef enum { | |||
| /* start of frame */ | |||
| SOF0 = 0xc0, /* baseline */ | |||
| SOF1 = 0xc1, /* extended sequential, huffman */ | |||
| SOF2 = 0xc2, /* progressive, huffman */ | |||
| SOF3 = 0xc3, /* lossless, huffman */ | |||
| SOF0 = 0xc0, /* baseline */ | |||
| SOF1 = 0xc1, /* extended sequential, huffman */ | |||
| SOF2 = 0xc2, /* progressive, huffman */ | |||
| SOF3 = 0xc3, /* lossless, huffman */ | |||
| SOF5 = 0xc5, /* differential sequential, huffman */ | |||
| SOF6 = 0xc6, /* differential progressive, huffman */ | |||
| SOF7 = 0xc7, /* differential lossless, huffman */ | |||
| JPG = 0xc8, /* reserved for JPEG extension */ | |||
| SOF9 = 0xc9, /* extended sequential, arithmetic */ | |||
| SOF10 = 0xca, /* progressive, arithmetic */ | |||
| SOF11 = 0xcb, /* lossless, arithmetic */ | |||
| SOF5 = 0xc5, /* differential sequential, huffman */ | |||
| SOF6 = 0xc6, /* differential progressive, huffman */ | |||
| SOF7 = 0xc7, /* differential lossless, huffman */ | |||
| JPG = 0xc8, /* reserved for JPEG extension */ | |||
| SOF9 = 0xc9, /* extended sequential, arithmetic */ | |||
| SOF10 = 0xca, /* progressive, arithmetic */ | |||
| SOF11 = 0xcb, /* lossless, arithmetic */ | |||
| SOF13 = 0xcd, /* differential sequential, arithmetic */ | |||
| SOF14 = 0xce, /* differential progressive, arithmetic */ | |||
| SOF15 = 0xcf, /* differential lossless, arithmetic */ | |||
| SOF13 = 0xcd, /* differential sequential, arithmetic */ | |||
| SOF14 = 0xce, /* differential progressive, arithmetic */ | |||
| SOF15 = 0xcf, /* differential lossless, arithmetic */ | |||
| DHT = 0xc4, /* define huffman tables */ | |||
| DHT = 0xc4, /* define huffman tables */ | |||
| DAC = 0xcc, /* define arithmetic-coding conditioning */ | |||
| DAC = 0xcc, /* define arithmetic-coding conditioning */ | |||
| /* restart with modulo 8 count "m" */ | |||
| RST0 = 0xd0, | |||
| @@ -85,14 +85,14 @@ typedef enum { | |||
| RST6 = 0xd6, | |||
| RST7 = 0xd7, | |||
| SOI = 0xd8, /* start of image */ | |||
| EOI = 0xd9, /* end of image */ | |||
| SOS = 0xda, /* start of scan */ | |||
| DQT = 0xdb, /* define quantization tables */ | |||
| DNL = 0xdc, /* define number of lines */ | |||
| DRI = 0xdd, /* define restart interval */ | |||
| DHP = 0xde, /* define hierarchical progression */ | |||
| EXP = 0xdf, /* expand reference components */ | |||
| SOI = 0xd8, /* start of image */ | |||
| EOI = 0xd9, /* end of image */ | |||
| SOS = 0xda, /* start of scan */ | |||
| DQT = 0xdb, /* define quantization tables */ | |||
| DNL = 0xdc, /* define number of lines */ | |||
| DRI = 0xdd, /* define restart interval */ | |||
| DHP = 0xde, /* define hierarchical progression */ | |||
| EXP = 0xdf, /* expand reference components */ | |||
| APP0 = 0xe0, | |||
| APP1 = 0xe1, | |||
| @@ -118,17 +118,17 @@ typedef enum { | |||
| JPG4 = 0xf4, | |||
| JPG5 = 0xf5, | |||
| JPG6 = 0xf6, | |||
| SOF48 = 0xf7, ///< JPEG-LS | |||
| LSE = 0xf8, ///< JPEG-LS extension parameters | |||
| SOF48 = 0xf7, ///< JPEG-LS | |||
| LSE = 0xf8, ///< JPEG-LS extension parameters | |||
| JPG9 = 0xf9, | |||
| JPG10 = 0xfa, | |||
| JPG11 = 0xfb, | |||
| JPG12 = 0xfc, | |||
| JPG13 = 0xfd, | |||
| COM = 0xfe, /* comment */ | |||
| COM = 0xfe, /* comment */ | |||
| TEM = 0x01, /* temporary private use for arithmetic coding */ | |||
| TEM = 0x01, /* temporary private use for arithmetic coding */ | |||
| /* 0x02 -> 0xbf reserved */ | |||
| } JPEG_MARKER; | |||
| @@ -583,7 +583,7 @@ void mjpeg_picture_trailer(MpegEncContext *s) | |||
| } | |||
| static inline void mjpeg_encode_dc(MpegEncContext *s, int val, | |||
| uint8_t *huff_size, uint16_t *huff_code) | |||
| uint8_t *huff_size, uint16_t *huff_code) | |||
| { | |||
| int mant, nbits; | |||
| @@ -935,10 +935,10 @@ static int mjpeg_decode_init(AVCodecContext *avctx) | |||
| if (avctx->flags & CODEC_FLAG_EXTERN_HUFF) | |||
| { | |||
| av_log(avctx, AV_LOG_INFO, "mjpeg: using external huffman table\n"); | |||
| init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8); | |||
| mjpeg_decode_dht(s); | |||
| /* should check for error - but dunno */ | |||
| av_log(avctx, AV_LOG_INFO, "mjpeg: using external huffman table\n"); | |||
| init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8); | |||
| mjpeg_decode_dht(s); | |||
| /* should check for error - but dunno */ | |||
| } | |||
| return 0; | |||
| @@ -1017,10 +1017,10 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s) | |||
| while (len >= 65) { | |||
| /* only 8 bit precision handled */ | |||
| if (get_bits(&s->gb, 4) != 0) | |||
| { | |||
| dprintf("dqt: 16bit precision\n"); | |||
| { | |||
| dprintf("dqt: 16bit precision\n"); | |||
| return -1; | |||
| } | |||
| } | |||
| index = get_bits(&s->gb, 4); | |||
| if (index >= 4) | |||
| return -1; | |||
| @@ -1028,14 +1028,14 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s) | |||
| /* read quant table */ | |||
| for(i=0;i<64;i++) { | |||
| j = s->scantable.permutated[i]; | |||
| s->quant_matrixes[index][j] = get_bits(&s->gb, 8); | |||
| s->quant_matrixes[index][j] = get_bits(&s->gb, 8); | |||
| } | |||
| //XXX FIXME finetune, and perhaps add dc too | |||
| s->qscale[index]= FFMAX( | |||
| s->quant_matrixes[index][s->scantable.permutated[1]], | |||
| s->quant_matrixes[index][s->scantable.permutated[8]]) >> 1; | |||
| dprintf("qscale[%d]: %d\n", index, s->qscale[index]); | |||
| dprintf("qscale[%d]: %d\n", index, s->qscale[index]); | |||
| len -= 65; | |||
| } | |||
| @@ -1132,7 +1132,7 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s) | |||
| if (s->quant_index[i] >= 4) | |||
| return -1; | |||
| dprintf("component %d %d:%d id: %d quant:%d\n", i, s->h_count[i], | |||
| s->v_count[i], s->component_id[i], s->quant_index[i]); | |||
| s->v_count[i], s->component_id[i], s->quant_index[i]); | |||
| } | |||
| if(s->v_max==1 && s->h_max==1 && s->lossless==1) s->rgb=1; | |||
| @@ -1151,7 +1151,7 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s) | |||
| s->org_height != 0 && | |||
| s->height < ((s->org_height * 3) / 4)) { | |||
| s->interlaced = 1; | |||
| // s->bottom_field = (s->interlace_polarity) ? 1 : 0; | |||
| // s->bottom_field = (s->interlace_polarity) ? 1 : 0; | |||
| s->bottom_field = 0; | |||
| s->avctx->height *= 2; | |||
| } | |||
| @@ -1202,7 +1202,7 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s) | |||
| if (len != (8+(3*nb_components))) | |||
| { | |||
| dprintf("decode_sof0: error, len(%d) mismatch\n", len); | |||
| dprintf("decode_sof0: error, len(%d) mismatch\n", len); | |||
| } | |||
| return 0; | |||
| @@ -1214,7 +1214,7 @@ static inline int mjpeg_decode_dc(MJpegDecodeContext *s, int dc_index) | |||
| code = get_vlc2(&s->gb, s->vlcs[0][dc_index].table, 9, 2); | |||
| if (code < 0) | |||
| { | |||
| dprintf("mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index, | |||
| dprintf("mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index, | |||
| &s->vlcs[0][dc_index]); | |||
| return 0xffff; | |||
| } | |||
| @@ -1247,7 +1247,7 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block, | |||
| ac_vlc = &s->vlcs[1][ac_index]; | |||
| i = 1; | |||
| for(;;) { | |||
| code = get_vlc2(&s->gb, s->vlcs[1][ac_index].table, 9, 2); | |||
| code = get_vlc2(&s->gb, s->vlcs[1][ac_index].table, 9, 2); | |||
| if (code < 0) { | |||
| dprintf("error ac\n"); | |||
| @@ -1452,7 +1452,7 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s){ | |||
| dprintf("error y=%d x=%d\n", mb_y, mb_x); | |||
| return -1; | |||
| } | |||
| // dprintf("mb: %d %d processed\n", mb_y, mb_x); | |||
| // dprintf("mb: %d %d processed\n", mb_y, mb_x); | |||
| ptr = s->picture.data[c] + | |||
| (((s->linesize[c] * (v * mb_y + y) * 8) + | |||
| (h * mb_x + x) * 8) >> s->avctx->lowres); | |||
| @@ -1491,29 +1491,29 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s) | |||
| nb_components = get_bits(&s->gb, 8); | |||
| if (len != 6+2*nb_components) | |||
| { | |||
| dprintf("decode_sos: invalid len (%d)\n", len); | |||
| return -1; | |||
| dprintf("decode_sos: invalid len (%d)\n", len); | |||
| return -1; | |||
| } | |||
| /* XXX: only interleaved scan accepted */ | |||
| if (nb_components != s->nb_components) | |||
| { | |||
| dprintf("decode_sos: components(%d) mismatch\n", nb_components); | |||
| dprintf("decode_sos: components(%d) mismatch\n", nb_components); | |||
| return -1; | |||
| } | |||
| vmax = 0; | |||
| hmax = 0; | |||
| for(i=0;i<nb_components;i++) { | |||
| id = get_bits(&s->gb, 8) - 1; | |||
| dprintf("component: %d\n", id); | |||
| dprintf("component: %d\n", id); | |||
| /* find component index */ | |||
| for(index=0;index<s->nb_components;index++) | |||
| if (id == s->component_id[index]) | |||
| break; | |||
| if (index == s->nb_components) | |||
| { | |||
| dprintf("decode_sos: index(%d) out of components\n", index); | |||
| { | |||
| dprintf("decode_sos: index(%d) out of components\n", index); | |||
| return -1; | |||
| } | |||
| } | |||
| s->comp_index[i] = index; | |||
| @@ -1524,26 +1524,26 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s) | |||
| s->dc_index[i] = get_bits(&s->gb, 4); | |||
| s->ac_index[i] = get_bits(&s->gb, 4); | |||
| if (s->dc_index[i] < 0 || s->ac_index[i] < 0 || | |||
| s->dc_index[i] >= 4 || s->ac_index[i] >= 4) | |||
| goto out_of_range; | |||
| if (s->dc_index[i] < 0 || s->ac_index[i] < 0 || | |||
| s->dc_index[i] >= 4 || s->ac_index[i] >= 4) | |||
| goto out_of_range; | |||
| #if 0 //buggy | |||
| switch(s->start_code) | |||
| { | |||
| case SOF0: | |||
| if (dc_index[i] > 1 || ac_index[i] > 1) | |||
| goto out_of_range; | |||
| break; | |||
| case SOF1: | |||
| case SOF2: | |||
| if (dc_index[i] > 3 || ac_index[i] > 3) | |||
| goto out_of_range; | |||
| break; | |||
| case SOF3: | |||
| if (dc_index[i] > 3 || ac_index[i] != 0) | |||
| goto out_of_range; | |||
| break; | |||
| } | |||
| switch(s->start_code) | |||
| { | |||
| case SOF0: | |||
| if (dc_index[i] > 1 || ac_index[i] > 1) | |||
| goto out_of_range; | |||
| break; | |||
| case SOF1: | |||
| case SOF2: | |||
| if (dc_index[i] > 3 || ac_index[i] > 3) | |||
| goto out_of_range; | |||
| break; | |||
| case SOF3: | |||
| if (dc_index[i] > 3 || ac_index[i] != 0) | |||
| goto out_of_range; | |||
| break; | |||
| } | |||
| #endif | |||
| } | |||
| @@ -1605,7 +1605,7 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s) | |||
| static int mjpeg_decode_dri(MJpegDecodeContext *s) | |||
| { | |||
| if (get_bits(&s->gb, 16) != 4) | |||
| return -1; | |||
| return -1; | |||
| s->restart_interval = get_bits(&s->gb, 16); | |||
| s->restart_count = 0; | |||
| dprintf("restart interval: %d\n", s->restart_interval); | |||
| @@ -1619,7 +1619,7 @@ static int mjpeg_decode_app(MJpegDecodeContext *s) | |||
| len = get_bits(&s->gb, 16); | |||
| if (len < 5) | |||
| return -1; | |||
| return -1; | |||
| if(8*len + get_bits_count(&s->gb) > s->gb.size_in_bits) | |||
| return -1; | |||
| @@ -1636,35 +1636,35 @@ static int mjpeg_decode_app(MJpegDecodeContext *s) | |||
| informations, but it's always present in AVID creates files */ | |||
| if (id == ff_get_fourcc("AVI1")) | |||
| { | |||
| /* structure: | |||
| 4bytes AVI1 | |||
| 1bytes polarity | |||
| 1bytes always zero | |||
| 4bytes field_size | |||
| 4bytes field_size_less_padding | |||
| */ | |||
| s->buggy_avid = 1; | |||
| // if (s->first_picture) | |||
| // printf("mjpeg: workarounding buggy AVID\n"); | |||
| s->interlace_polarity = get_bits(&s->gb, 8); | |||
| /* structure: | |||
| 4bytes AVI1 | |||
| 1bytes polarity | |||
| 1bytes always zero | |||
| 4bytes field_size | |||
| 4bytes field_size_less_padding | |||
| */ | |||
| s->buggy_avid = 1; | |||
| // if (s->first_picture) | |||
| // printf("mjpeg: workarounding buggy AVID\n"); | |||
| s->interlace_polarity = get_bits(&s->gb, 8); | |||
| #if 0 | |||
| skip_bits(&s->gb, 8); | |||
| skip_bits(&s->gb, 32); | |||
| skip_bits(&s->gb, 32); | |||
| len -= 10; | |||
| skip_bits(&s->gb, 8); | |||
| skip_bits(&s->gb, 32); | |||
| skip_bits(&s->gb, 32); | |||
| len -= 10; | |||
| #endif | |||
| // if (s->interlace_polarity) | |||
| // printf("mjpeg: interlace polarity: %d\n", s->interlace_polarity); | |||
| goto out; | |||
| // if (s->interlace_polarity) | |||
| // printf("mjpeg: interlace polarity: %d\n", s->interlace_polarity); | |||
| goto out; | |||
| } | |||
| // len -= 2; | |||
| if (id == ff_get_fourcc("JFIF")) | |||
| { | |||
| int t_w, t_h, v1, v2; | |||
| skip_bits(&s->gb, 8); /* the trailing zero-byte */ | |||
| v1= get_bits(&s->gb, 8); | |||
| int t_w, t_h, v1, v2; | |||
| skip_bits(&s->gb, 8); /* the trailing zero-byte */ | |||
| v1= get_bits(&s->gb, 8); | |||
| v2= get_bits(&s->gb, 8); | |||
| skip_bits(&s->gb, 8); | |||
| @@ -1678,37 +1678,37 @@ static int mjpeg_decode_app(MJpegDecodeContext *s) | |||
| s->avctx->sample_aspect_ratio.den | |||
| ); | |||
| t_w = get_bits(&s->gb, 8); | |||
| t_h = get_bits(&s->gb, 8); | |||
| if (t_w && t_h) | |||
| { | |||
| /* skip thumbnail */ | |||
| if (len-10-(t_w*t_h*3) > 0) | |||
| len -= t_w*t_h*3; | |||
| } | |||
| len -= 10; | |||
| goto out; | |||
| t_w = get_bits(&s->gb, 8); | |||
| t_h = get_bits(&s->gb, 8); | |||
| if (t_w && t_h) | |||
| { | |||
| /* skip thumbnail */ | |||
| if (len-10-(t_w*t_h*3) > 0) | |||
| len -= t_w*t_h*3; | |||
| } | |||
| len -= 10; | |||
| goto out; | |||
| } | |||
| if (id == ff_get_fourcc("Adob") && (get_bits(&s->gb, 8) == 'e')) | |||
| { | |||
| if (s->avctx->debug & FF_DEBUG_PICT_INFO) | |||
| av_log(s->avctx, AV_LOG_INFO, "mjpeg: Adobe header found\n"); | |||
| skip_bits(&s->gb, 16); /* version */ | |||
| skip_bits(&s->gb, 16); /* flags0 */ | |||
| skip_bits(&s->gb, 16); /* flags1 */ | |||
| skip_bits(&s->gb, 8); /* transform */ | |||
| len -= 7; | |||
| goto out; | |||
| skip_bits(&s->gb, 16); /* version */ | |||
| skip_bits(&s->gb, 16); /* flags0 */ | |||
| skip_bits(&s->gb, 16); /* flags1 */ | |||
| skip_bits(&s->gb, 8); /* transform */ | |||
| len -= 7; | |||
| goto out; | |||
| } | |||
| if (id == ff_get_fourcc("LJIF")){ | |||
| if (s->avctx->debug & FF_DEBUG_PICT_INFO) | |||
| av_log(s->avctx, AV_LOG_INFO, "Pegasus lossless jpeg header found\n"); | |||
| skip_bits(&s->gb, 16); /* version ? */ | |||
| skip_bits(&s->gb, 16); /* unknwon always 0? */ | |||
| skip_bits(&s->gb, 16); /* unknwon always 0? */ | |||
| skip_bits(&s->gb, 16); /* unknwon always 0? */ | |||
| skip_bits(&s->gb, 16); /* version ? */ | |||
| skip_bits(&s->gb, 16); /* unknwon always 0? */ | |||
| skip_bits(&s->gb, 16); /* unknwon always 0? */ | |||
| skip_bits(&s->gb, 16); /* unknwon always 0? */ | |||
| switch( get_bits(&s->gb, 8)){ | |||
| case 1: | |||
| s->rgb= 1; | |||
| @@ -1728,32 +1728,32 @@ static int mjpeg_decode_app(MJpegDecodeContext *s) | |||
| /* Apple MJPEG-A */ | |||
| if ((s->start_code == APP1) && (len > (0x28 - 8))) | |||
| { | |||
| id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16); | |||
| id = be2me_32(id); | |||
| len -= 4; | |||
| if (id == ff_get_fourcc("mjpg")) /* Apple MJPEG-A */ | |||
| { | |||
| id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16); | |||
| id = be2me_32(id); | |||
| len -= 4; | |||
| if (id == ff_get_fourcc("mjpg")) /* Apple MJPEG-A */ | |||
| { | |||
| #if 0 | |||
| skip_bits(&s->gb, 32); /* field size */ | |||
| skip_bits(&s->gb, 32); /* pad field size */ | |||
| skip_bits(&s->gb, 32); /* next off */ | |||
| skip_bits(&s->gb, 32); /* quant off */ | |||
| skip_bits(&s->gb, 32); /* huff off */ | |||
| skip_bits(&s->gb, 32); /* image off */ | |||
| skip_bits(&s->gb, 32); /* scan off */ | |||
| skip_bits(&s->gb, 32); /* data off */ | |||
| skip_bits(&s->gb, 32); /* field size */ | |||
| skip_bits(&s->gb, 32); /* pad field size */ | |||
| skip_bits(&s->gb, 32); /* next off */ | |||
| skip_bits(&s->gb, 32); /* quant off */ | |||
| skip_bits(&s->gb, 32); /* huff off */ | |||
| skip_bits(&s->gb, 32); /* image off */ | |||
| skip_bits(&s->gb, 32); /* scan off */ | |||
| skip_bits(&s->gb, 32); /* data off */ | |||
| #endif | |||
| if (s->avctx->debug & FF_DEBUG_PICT_INFO) | |||
| av_log(s->avctx, AV_LOG_INFO, "mjpeg: Apple MJPEG-A header found\n"); | |||
| } | |||
| av_log(s->avctx, AV_LOG_INFO, "mjpeg: Apple MJPEG-A header found\n"); | |||
| } | |||
| } | |||
| out: | |||
| /* slow but needed for extreme adobe jpegs */ | |||
| if (len < 0) | |||
| av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error, decode_app parser read over the end\n"); | |||
| av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error, decode_app parser read over the end\n"); | |||
| while(--len > 0) | |||
| skip_bits(&s->gb, 8); | |||
| skip_bits(&s->gb, 8); | |||
| return 0; | |||
| } | |||
| @@ -1762,32 +1762,32 @@ static int mjpeg_decode_com(MJpegDecodeContext *s) | |||
| { | |||
| int len = get_bits(&s->gb, 16); | |||
| if (len >= 2 && 8*len - 16 + get_bits_count(&s->gb) <= s->gb.size_in_bits) { | |||
| uint8_t *cbuf = av_malloc(len - 1); | |||
| if (cbuf) { | |||
| int i; | |||
| for (i = 0; i < len - 2; i++) | |||
| cbuf[i] = get_bits(&s->gb, 8); | |||
| if (i > 0 && cbuf[i-1] == '\n') | |||
| cbuf[i-1] = 0; | |||
| else | |||
| cbuf[i] = 0; | |||
| uint8_t *cbuf = av_malloc(len - 1); | |||
| if (cbuf) { | |||
| int i; | |||
| for (i = 0; i < len - 2; i++) | |||
| cbuf[i] = get_bits(&s->gb, 8); | |||
| if (i > 0 && cbuf[i-1] == '\n') | |||
| cbuf[i-1] = 0; | |||
| else | |||
| cbuf[i] = 0; | |||
| if(s->avctx->debug & FF_DEBUG_PICT_INFO) | |||
| av_log(s->avctx, AV_LOG_INFO, "mjpeg comment: '%s'\n", cbuf); | |||
| /* buggy avid, it puts EOI only at every 10th frame */ | |||
| if (!strcmp(cbuf, "AVID")) | |||
| { | |||
| s->buggy_avid = 1; | |||
| // if (s->first_picture) | |||
| // printf("mjpeg: workarounding buggy AVID\n"); | |||
| } | |||
| /* buggy avid, it puts EOI only at every 10th frame */ | |||
| if (!strcmp(cbuf, "AVID")) | |||
| { | |||
| s->buggy_avid = 1; | |||
| // if (s->first_picture) | |||
| // printf("mjpeg: workarounding buggy AVID\n"); | |||
| } | |||
| else if(!strcmp(cbuf, "CS=ITU601")){ | |||
| s->cs_itu601= 1; | |||
| } | |||
| av_free(cbuf); | |||
| } | |||
| av_free(cbuf); | |||
| } | |||
| } | |||
| return 0; | |||
| @@ -1830,13 +1830,13 @@ static int find_marker(uint8_t **pbuf_ptr, uint8_t *buf_end) | |||
| buf_ptr = *pbuf_ptr; | |||
| while (buf_ptr < buf_end) { | |||
| v = *buf_ptr++; | |||
| v2 = *buf_ptr; | |||
| v2 = *buf_ptr; | |||
| if ((v == 0xff) && (v2 >= 0xc0) && (v2 <= 0xfe) && buf_ptr < buf_end) { | |||
| val = *buf_ptr++; | |||
| goto found; | |||
| val = *buf_ptr++; | |||
| goto found; | |||
| } | |||
| #ifdef DEBUG | |||
| skipped++; | |||
| skipped++; | |||
| #endif | |||
| } | |||
| val = -1; | |||
| @@ -1862,74 +1862,74 @@ static int mjpeg_decode_frame(AVCodecContext *avctx, | |||
| while (buf_ptr < buf_end) { | |||
| /* find start next marker */ | |||
| start_code = find_marker(&buf_ptr, buf_end); | |||
| { | |||
| /* EOF */ | |||
| { | |||
| /* EOF */ | |||
| if (start_code < 0) { | |||
| goto the_end; | |||
| goto the_end; | |||
| } else { | |||
| dprintf("marker=%x avail_size_in_buf=%d\n", start_code, buf_end - buf_ptr); | |||
| if ((buf_end - buf_ptr) > s->buffer_size) | |||
| { | |||
| av_free(s->buffer); | |||
| s->buffer_size = buf_end-buf_ptr; | |||
| if ((buf_end - buf_ptr) > s->buffer_size) | |||
| { | |||
| av_free(s->buffer); | |||
| s->buffer_size = buf_end-buf_ptr; | |||
| s->buffer = av_malloc(s->buffer_size + FF_INPUT_BUFFER_PADDING_SIZE); | |||
| dprintf("buffer too small, expanding to %d bytes\n", | |||
| s->buffer_size); | |||
| } | |||
| /* unescape buffer of SOS */ | |||
| if (start_code == SOS) | |||
| { | |||
| uint8_t *src = buf_ptr; | |||
| uint8_t *dst = s->buffer; | |||
| while (src<buf_end) | |||
| { | |||
| uint8_t x = *(src++); | |||
| *(dst++) = x; | |||
| if (x == 0xff) | |||
| { | |||
| dprintf("buffer too small, expanding to %d bytes\n", | |||
| s->buffer_size); | |||
| } | |||
| /* unescape buffer of SOS */ | |||
| if (start_code == SOS) | |||
| { | |||
| uint8_t *src = buf_ptr; | |||
| uint8_t *dst = s->buffer; | |||
| while (src<buf_end) | |||
| { | |||
| uint8_t x = *(src++); | |||
| *(dst++) = x; | |||
| if (x == 0xff) | |||
| { | |||
| while(src<buf_end && x == 0xff) | |||
| x = *(src++); | |||
| if (x >= 0xd0 && x <= 0xd7) | |||
| *(dst++) = x; | |||
| else if (x) | |||
| break; | |||
| } | |||
| } | |||
| init_get_bits(&s->gb, s->buffer, (dst - s->buffer)*8); | |||
| dprintf("escaping removed %d bytes\n", | |||
| (buf_end - buf_ptr) - (dst - s->buffer)); | |||
| } | |||
| else | |||
| init_get_bits(&s->gb, buf_ptr, (buf_end - buf_ptr)*8); | |||
| s->start_code = start_code; | |||
| if (x >= 0xd0 && x <= 0xd7) | |||
| *(dst++) = x; | |||
| else if (x) | |||
| break; | |||
| } | |||
| } | |||
| init_get_bits(&s->gb, s->buffer, (dst - s->buffer)*8); | |||
| dprintf("escaping removed %d bytes\n", | |||
| (buf_end - buf_ptr) - (dst - s->buffer)); | |||
| } | |||
| else | |||
| init_get_bits(&s->gb, buf_ptr, (buf_end - buf_ptr)*8); | |||
| s->start_code = start_code; | |||
| if(s->avctx->debug & FF_DEBUG_STARTCODE){ | |||
| av_log(s->avctx, AV_LOG_DEBUG, "startcode: %X\n", start_code); | |||
| } | |||
| /* process markers */ | |||
| if (start_code >= 0xd0 && start_code <= 0xd7) { | |||
| dprintf("restart marker: %d\n", start_code&0x0f); | |||
| /* APP fields */ | |||
| } else if (start_code >= APP0 && start_code <= APP15) { | |||
| mjpeg_decode_app(s); | |||
| /* Comment */ | |||
| } else if (start_code == COM){ | |||
| mjpeg_decode_com(s); | |||
| } | |||
| /* process markers */ | |||
| if (start_code >= 0xd0 && start_code <= 0xd7) { | |||
| dprintf("restart marker: %d\n", start_code&0x0f); | |||
| /* APP fields */ | |||
| } else if (start_code >= APP0 && start_code <= APP15) { | |||
| mjpeg_decode_app(s); | |||
| /* Comment */ | |||
| } else if (start_code == COM){ | |||
| mjpeg_decode_com(s); | |||
| } | |||
| switch(start_code) { | |||
| case SOI: | |||
| s->restart_interval = 0; | |||
| s->restart_interval = 0; | |||
| reset_ls_coding_parameters(s, 1); | |||
| s->restart_count = 0; | |||
| s->restart_count = 0; | |||
| /* nothing to do on SOI */ | |||
| break; | |||
| case DQT: | |||
| @@ -1944,12 +1944,12 @@ static int mjpeg_decode_frame(AVCodecContext *avctx, | |||
| case SOF0: | |||
| s->lossless=0; | |||
| if (mjpeg_decode_sof(s) < 0) | |||
| return -1; | |||
| return -1; | |||
| break; | |||
| case SOF3: | |||
| s->lossless=1; | |||
| if (mjpeg_decode_sof(s) < 0) | |||
| return -1; | |||
| return -1; | |||
| break; | |||
| case SOF48: | |||
| s->lossless=1; | |||
| @@ -1961,11 +1961,11 @@ static int mjpeg_decode_frame(AVCodecContext *avctx, | |||
| if (decode_lse(s) < 0) | |||
| return -1; | |||
| break; | |||
| case EOI: | |||
| if ((s->buggy_avid && !s->interlaced) || s->restart_interval) | |||
| case EOI: | |||
| if ((s->buggy_avid && !s->interlaced) || s->restart_interval) | |||
| break; | |||
| eoi_parser: | |||
| { | |||
| { | |||
| if (s->interlaced) { | |||
| s->bottom_field ^= 1; | |||
| /* if not bottom field, do not output image yet */ | |||
| @@ -1987,41 +1987,41 @@ eoi_parser: | |||
| goto the_end; | |||
| } | |||
| break; | |||
| break; | |||
| case SOS: | |||
| mjpeg_decode_sos(s); | |||
| /* buggy avid puts EOI every 10-20th frame */ | |||
| /* if restart period is over process EOI */ | |||
| if ((s->buggy_avid && !s->interlaced) || s->restart_interval) | |||
| goto eoi_parser; | |||
| /* buggy avid puts EOI every 10-20th frame */ | |||
| /* if restart period is over process EOI */ | |||
| if ((s->buggy_avid && !s->interlaced) || s->restart_interval) | |||
| goto eoi_parser; | |||
| break; | |||
| case DRI: | |||
| mjpeg_decode_dri(s); | |||
| break; | |||
| case SOF1: | |||
| case SOF2: | |||
| case SOF5: | |||
| case SOF6: | |||
| case SOF7: | |||
| case SOF9: | |||
| case SOF10: | |||
| case SOF11: | |||
| case SOF13: | |||
| case SOF14: | |||
| case SOF15: | |||
| case JPG: | |||
| av_log(s->avctx, AV_LOG_ERROR, "mjpeg: unsupported coding type (%x)\n", start_code); | |||
| break; | |||
| case DRI: | |||
| mjpeg_decode_dri(s); | |||
| break; | |||
| case SOF1: | |||
| case SOF2: | |||
| case SOF5: | |||
| case SOF6: | |||
| case SOF7: | |||
| case SOF9: | |||
| case SOF10: | |||
| case SOF11: | |||
| case SOF13: | |||
| case SOF14: | |||
| case SOF15: | |||
| case JPG: | |||
| av_log(s->avctx, AV_LOG_ERROR, "mjpeg: unsupported coding type (%x)\n", start_code); | |||
| break; | |||
| // default: | |||
| // printf("mjpeg: unsupported marker (%x)\n", start_code); | |||
| // break; | |||
| // default: | |||
| // printf("mjpeg: unsupported marker (%x)\n", start_code); | |||
| // break; | |||
| } | |||
| not_the_end: | |||
| /* eof process start code */ | |||
| buf_ptr += (get_bits_count(&s->gb)+7)/8; | |||
| dprintf("marker parser used %d bytes (%d bits)\n", | |||
| (get_bits_count(&s->gb)+7)/8, get_bits_count(&s->gb)); | |||
| /* eof process start code */ | |||
| buf_ptr += (get_bits_count(&s->gb)+7)/8; | |||
| dprintf("marker parser used %d bytes (%d bits)\n", | |||
| (get_bits_count(&s->gb)+7)/8, get_bits_count(&s->gb)); | |||
| } | |||
| } | |||
| } | |||
| @@ -2057,8 +2057,8 @@ read_header: | |||
| if (get_bits_long(&hgb, 32) != be2me_32(ff_get_fourcc("mjpg"))) | |||
| { | |||
| dprintf("not mjpeg-b (bad fourcc)\n"); | |||
| return 0; | |||
| dprintf("not mjpeg-b (bad fourcc)\n"); | |||
| return 0; | |||
| } | |||
| field_size = get_bits_long(&hgb, 32); /* field size */ | |||
| @@ -2067,34 +2067,34 @@ read_header: | |||
| second_field_offs = get_bits_long(&hgb, 32); | |||
| dprintf("second field offs: 0x%x\n", second_field_offs); | |||
| if (second_field_offs) | |||
| s->interlaced = 1; | |||
| s->interlaced = 1; | |||
| dqt_offs = get_bits_long(&hgb, 32); | |||
| dprintf("dqt offs: 0x%x\n", dqt_offs); | |||
| if (dqt_offs) | |||
| { | |||
| init_get_bits(&s->gb, buf+dqt_offs, (buf_end - (buf+dqt_offs))*8); | |||
| s->start_code = DQT; | |||
| mjpeg_decode_dqt(s); | |||
| init_get_bits(&s->gb, buf+dqt_offs, (buf_end - (buf+dqt_offs))*8); | |||
| s->start_code = DQT; | |||
| mjpeg_decode_dqt(s); | |||
| } | |||
| dht_offs = get_bits_long(&hgb, 32); | |||
| dprintf("dht offs: 0x%x\n", dht_offs); | |||
| if (dht_offs) | |||
| { | |||
| init_get_bits(&s->gb, buf+dht_offs, (buf_end - (buf+dht_offs))*8); | |||
| s->start_code = DHT; | |||
| mjpeg_decode_dht(s); | |||
| init_get_bits(&s->gb, buf+dht_offs, (buf_end - (buf+dht_offs))*8); | |||
| s->start_code = DHT; | |||
| mjpeg_decode_dht(s); | |||
| } | |||
| sof_offs = get_bits_long(&hgb, 32); | |||
| dprintf("sof offs: 0x%x\n", sof_offs); | |||
| if (sof_offs) | |||
| { | |||
| init_get_bits(&s->gb, buf+sof_offs, (buf_end - (buf+sof_offs))*8); | |||
| s->start_code = SOF0; | |||
| if (mjpeg_decode_sof(s) < 0) | |||
| return -1; | |||
| init_get_bits(&s->gb, buf+sof_offs, (buf_end - (buf+sof_offs))*8); | |||
| s->start_code = SOF0; | |||
| if (mjpeg_decode_sof(s) < 0) | |||
| return -1; | |||
| } | |||
| sos_offs = get_bits_long(&hgb, 32); | |||
| @@ -2103,22 +2103,22 @@ read_header: | |||
| dprintf("sod offs: 0x%x\n", sod_offs); | |||
| if (sos_offs) | |||
| { | |||
| // init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8); | |||
| init_get_bits(&s->gb, buf+sos_offs, field_size*8); | |||
| s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16)); | |||
| s->start_code = SOS; | |||
| mjpeg_decode_sos(s); | |||
| // init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8); | |||
| init_get_bits(&s->gb, buf+sos_offs, field_size*8); | |||
| s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16)); | |||
| s->start_code = SOS; | |||
| mjpeg_decode_sos(s); | |||
| } | |||
| if (s->interlaced) { | |||
| s->bottom_field ^= 1; | |||
| /* if not bottom field, do not output image yet */ | |||
| if (s->bottom_field && second_field_offs) | |||
| { | |||
| buf_ptr = buf + second_field_offs; | |||
| second_field_offs = 0; | |||
| goto read_header; | |||
| } | |||
| { | |||
| buf_ptr = buf + second_field_offs; | |||
| second_field_offs = 0; | |||
| goto read_header; | |||
| } | |||
| } | |||
| //XXX FIXME factorize, this looks very similar to the EOI code | |||
| @@ -2153,7 +2153,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx, | |||
| int i = 0, j = 0; | |||
| if (!avctx->width || !avctx->height) | |||
| return -1; | |||
| return -1; | |||
| buf_ptr = buf; | |||
| buf_end = buf + buf_size; | |||
| @@ -2161,7 +2161,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx, | |||
| #if 1 | |||
| recoded = av_mallocz(buf_size + 1024); | |||
| if (!recoded) | |||
| return -1; | |||
| return -1; | |||
| /* SOI */ | |||
| recoded[j++] = 0xFF; | |||
| @@ -2187,9 +2187,9 @@ static int sp5x_decode_frame(AVCodecContext *avctx, | |||
| for (i = 14; i < buf_size && j < buf_size+1024-2; i++) | |||
| { | |||
| recoded[j++] = buf[i]; | |||
| if (buf[i] == 0xff) | |||
| recoded[j++] = 0; | |||
| recoded[j++] = buf[i]; | |||
| if (buf[i] == 0xff) | |||
| recoded[j++] = 0; | |||
| } | |||
| /* EOI */ | |||
| @@ -2229,33 +2229,33 @@ static int sp5x_decode_frame(AVCodecContext *avctx, | |||
| if (avctx->get_buffer(avctx, &s->picture) < 0) | |||
| { | |||
| av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |||
| return -1; | |||
| return -1; | |||
| } | |||
| s->picture.pict_type = I_TYPE; | |||
| s->picture.key_frame = 1; | |||
| for (i = 0; i < 3; i++) | |||
| s->linesize[i] = s->picture.linesize[i] << s->interlaced; | |||
| s->linesize[i] = s->picture.linesize[i] << s->interlaced; | |||
| /* DQT */ | |||
| for (i = 0; i < 64; i++) | |||
| { | |||
| j = s->scantable.permutated[i]; | |||
| s->quant_matrixes[0][j] = sp5x_quant_table[(qscale * 2) + i]; | |||
| j = s->scantable.permutated[i]; | |||
| s->quant_matrixes[0][j] = sp5x_quant_table[(qscale * 2) + i]; | |||
| } | |||
| s->qscale[0] = FFMAX( | |||
| s->quant_matrixes[0][s->scantable.permutated[1]], | |||
| s->quant_matrixes[0][s->scantable.permutated[8]]) >> 1; | |||
| s->quant_matrixes[0][s->scantable.permutated[1]], | |||
| s->quant_matrixes[0][s->scantable.permutated[8]]) >> 1; | |||
| for (i = 0; i < 64; i++) | |||
| { | |||
| j = s->scantable.permutated[i]; | |||
| s->quant_matrixes[1][j] = sp5x_quant_table[(qscale * 2) + 1 + i]; | |||
| j = s->scantable.permutated[i]; | |||
| s->quant_matrixes[1][j] = sp5x_quant_table[(qscale * 2) + 1 + i]; | |||
| } | |||
| s->qscale[1] = FFMAX( | |||
| s->quant_matrixes[1][s->scantable.permutated[1]], | |||
| s->quant_matrixes[1][s->scantable.permutated[8]]) >> 1; | |||
| s->quant_matrixes[1][s->scantable.permutated[1]], | |||
| s->quant_matrixes[1][s->scantable.permutated[8]]) >> 1; | |||
| /* DHT */ | |||
| @@ -2282,7 +2282,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx, | |||
| s->ac_index[2] = 1; | |||
| for (i = 0; i < 3; i++) | |||
| s->last_dc[i] = 1024; | |||
| s->last_dc[i] = 1024; | |||
| s->mb_width = (s->width * s->h_max * 8 -1) / (s->h_max * 8); | |||
| s->mb_height = (s->height * s->v_max * 8 -1) / (s->v_max * 8); | |||
| @@ -61,7 +61,7 @@ static void add_pixels_clamped_mlib(const DCTELEM *block, uint8_t *pixels, int l | |||
| /* put block, width 16 pixel, height 8/16 */ | |||
| static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref, | |||
| int stride, int height) | |||
| int stride, int height) | |||
| { | |||
| switch (height) { | |||
| case 8: | |||
| @@ -78,7 +78,7 @@ static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref, | |||
| } | |||
| static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref, | |||
| int stride, int height) | |||
| int stride, int height) | |||
| { | |||
| switch (height) { | |||
| case 8: | |||
| @@ -95,7 +95,7 @@ static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref, | |||
| } | |||
| static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref, | |||
| int stride, int height) | |||
| int stride, int height) | |||
| { | |||
| switch (height) { | |||
| case 8: | |||
| @@ -112,7 +112,7 @@ static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref, | |||
| } | |||
| static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref, | |||
| int stride, int height) | |||
| int stride, int height) | |||
| { | |||
| switch (height) { | |||
| case 8: | |||
| @@ -131,7 +131,7 @@ static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref, | |||
| /* put block, width 8 pixel, height 4/8/16 */ | |||
| static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref, | |||
| int stride, int height) | |||
| int stride, int height) | |||
| { | |||
| switch (height) { | |||
| case 4: | |||
| @@ -152,7 +152,7 @@ static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref, | |||
| } | |||
| static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref, | |||
| int stride, int height) | |||
| int stride, int height) | |||
| { | |||
| switch (height) { | |||
| case 4: | |||
| @@ -173,7 +173,7 @@ static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref, | |||
| } | |||
| static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref, | |||
| int stride, int height) | |||
| int stride, int height) | |||
| { | |||
| switch (height) { | |||
| case 4: | |||
| @@ -194,7 +194,7 @@ static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref, | |||
| } | |||
| static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref, | |||
| int stride, int height) | |||
| int stride, int height) | |||
| { | |||
| switch (height) { | |||
| case 4: | |||
| @@ -217,7 +217,7 @@ static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref, | |||
| /* average block, width 16 pixel, height 8/16 */ | |||
| static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref, | |||
| int stride, int height) | |||
| int stride, int height) | |||
| { | |||
| switch (height) { | |||
| case 8: | |||
| @@ -234,7 +234,7 @@ static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref, | |||
| } | |||
| static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref, | |||
| int stride, int height) | |||
| int stride, int height) | |||
| { | |||
| switch (height) { | |||
| case 8: | |||
| @@ -251,7 +251,7 @@ static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref, | |||
| } | |||
| static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref, | |||
| int stride, int height) | |||
| int stride, int height) | |||
| { | |||
| switch (height) { | |||
| case 8: | |||
| @@ -268,7 +268,7 @@ static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref, | |||
| } | |||
| static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref, | |||
| int stride, int height) | |||
| int stride, int height) | |||
| { | |||
| switch (height) { | |||
| case 8: | |||
| @@ -287,7 +287,7 @@ static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref, | |||
| /* average block, width 8 pixel, height 4/8/16 */ | |||
| static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref, | |||
| int stride, int height) | |||
| int stride, int height) | |||
| { | |||
| switch (height) { | |||
| case 4: | |||
| @@ -308,7 +308,7 @@ static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref, | |||
| } | |||
| static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref, | |||
| int stride, int height) | |||
| int stride, int height) | |||
| { | |||
| switch (height) { | |||
| case 4: | |||
| @@ -329,7 +329,7 @@ static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref, | |||
| } | |||
| static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref, | |||
| int stride, int height) | |||
| int stride, int height) | |||
| { | |||
| switch (height) { | |||
| case 4: | |||
| @@ -350,7 +350,7 @@ static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref, | |||
| } | |||
| static void avg_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref, | |||
| int stride, int height) | |||
| int stride, int height) | |||
| { | |||
| switch (height) { | |||
| case 4: | |||
| @@ -450,7 +450,7 @@ void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx) | |||
| void MPV_common_init_mlib(MpegEncContext *s) | |||
| { | |||
| if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){ | |||
| s->dsp.fdct = ff_fdct_mlib; | |||
| s->dsp.fdct = ff_fdct_mlib; | |||
| } | |||
| if(s->avctx->idct_algo==FF_IDCT_AUTO || s->avctx->idct_algo==FF_IDCT_MLIB){ | |||
| @@ -45,7 +45,7 @@ | |||
| #define P_MV1 P[9] | |||
| static inline int sad_hpel_motion_search(MpegEncContext * s, | |||
| int *mx_ptr, int *my_ptr, int dmin, | |||
| int *mx_ptr, int *my_ptr, int dmin, | |||
| int src_index, int ref_index, | |||
| int size, int h); | |||
| @@ -293,25 +293,25 @@ static int pix_dev(uint8_t * pix, int line_size, int mean) | |||
| s = 0; | |||
| for (i = 0; i < 16; i++) { | |||
| for (j = 0; j < 16; j += 8) { | |||
| s += ABS(pix[0]-mean); | |||
| s += ABS(pix[1]-mean); | |||
| s += ABS(pix[2]-mean); | |||
| s += ABS(pix[3]-mean); | |||
| s += ABS(pix[4]-mean); | |||
| s += ABS(pix[5]-mean); | |||
| s += ABS(pix[6]-mean); | |||
| s += ABS(pix[7]-mean); | |||
| pix += 8; | |||
| } | |||
| pix += line_size - 16; | |||
| for (j = 0; j < 16; j += 8) { | |||
| s += ABS(pix[0]-mean); | |||
| s += ABS(pix[1]-mean); | |||
| s += ABS(pix[2]-mean); | |||
| s += ABS(pix[3]-mean); | |||
| s += ABS(pix[4]-mean); | |||
| s += ABS(pix[5]-mean); | |||
| s += ABS(pix[6]-mean); | |||
| s += ABS(pix[7]-mean); | |||
| pix += 8; | |||
| } | |||
| pix += line_size - 16; | |||
| } | |||
| return s; | |||
| } | |||
| #endif | |||
| static inline void no_motion_search(MpegEncContext * s, | |||
| int *mx_ptr, int *my_ptr) | |||
| int *mx_ptr, int *my_ptr) | |||
| { | |||
| *mx_ptr = 16 * s->mb_x; | |||
| *my_ptr = 16 * s->mb_y; | |||
| @@ -328,35 +328,35 @@ static int full_motion_search(MpegEncContext * s, | |||
| xx = 16 * s->mb_x; | |||
| yy = 16 * s->mb_y; | |||
| x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */ | |||
| x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */ | |||
| if (x1 < xmin) | |||
| x1 = xmin; | |||
| x1 = xmin; | |||
| x2 = xx + range - 1; | |||
| if (x2 > xmax) | |||
| x2 = xmax; | |||
| x2 = xmax; | |||
| y1 = yy - range + 1; | |||
| if (y1 < ymin) | |||
| y1 = ymin; | |||
| y1 = ymin; | |||
| y2 = yy + range - 1; | |||
| if (y2 > ymax) | |||
| y2 = ymax; | |||
| y2 = ymax; | |||
| pix = s->new_picture.data[0] + (yy * s->linesize) + xx; | |||
| dmin = 0x7fffffff; | |||
| mx = 0; | |||
| my = 0; | |||
| for (y = y1; y <= y2; y++) { | |||
| for (x = x1; x <= x2; x++) { | |||
| d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, | |||
| s->linesize, 16); | |||
| if (d < dmin || | |||
| (d == dmin && | |||
| (abs(x - xx) + abs(y - yy)) < | |||
| (abs(mx - xx) + abs(my - yy)))) { | |||
| dmin = d; | |||
| mx = x; | |||
| my = y; | |||
| } | |||
| } | |||
| for (x = x1; x <= x2; x++) { | |||
| d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, | |||
| s->linesize, 16); | |||
| if (d < dmin || | |||
| (d == dmin && | |||
| (abs(x - xx) + abs(y - yy)) < | |||
| (abs(mx - xx) + abs(my - yy)))) { | |||
| dmin = d; | |||
| mx = x; | |||
| my = y; | |||
| } | |||
| } | |||
| } | |||
| *mx_ptr = mx; | |||
| @@ -364,8 +364,8 @@ static int full_motion_search(MpegEncContext * s, | |||
| #if 0 | |||
| if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) || | |||
| *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) { | |||
| fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr); | |||
| *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) { | |||
| fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr); | |||
| } | |||
| #endif | |||
| return dmin; | |||
| @@ -386,22 +386,22 @@ static int log_motion_search(MpegEncContext * s, | |||
| /* Left limit */ | |||
| x1 = xx - range; | |||
| if (x1 < xmin) | |||
| x1 = xmin; | |||
| x1 = xmin; | |||
| /* Right limit */ | |||
| x2 = xx + range; | |||
| if (x2 > xmax) | |||
| x2 = xmax; | |||
| x2 = xmax; | |||
| /* Upper limit */ | |||
| y1 = yy - range; | |||
| if (y1 < ymin) | |||
| y1 = ymin; | |||
| y1 = ymin; | |||
| /* Lower limit */ | |||
| y2 = yy + range; | |||
| if (y2 > ymax) | |||
| y2 = ymax; | |||
| y2 = ymax; | |||
| pix = s->new_picture.data[0] + (yy * s->linesize) + xx; | |||
| dmin = 0x7fffffff; | |||
| @@ -409,34 +409,34 @@ static int log_motion_search(MpegEncContext * s, | |||
| my = 0; | |||
| do { | |||
| for (y = y1; y <= y2; y += range) { | |||
| for (x = x1; x <= x2; x += range) { | |||
| d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); | |||
| if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { | |||
| dmin = d; | |||
| mx = x; | |||
| my = y; | |||
| } | |||
| } | |||
| } | |||
| range = range >> 1; | |||
| x1 = mx - range; | |||
| if (x1 < xmin) | |||
| x1 = xmin; | |||
| x2 = mx + range; | |||
| if (x2 > xmax) | |||
| x2 = xmax; | |||
| y1 = my - range; | |||
| if (y1 < ymin) | |||
| y1 = ymin; | |||
| y2 = my + range; | |||
| if (y2 > ymax) | |||
| y2 = ymax; | |||
| for (y = y1; y <= y2; y += range) { | |||
| for (x = x1; x <= x2; x += range) { | |||
| d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); | |||
| if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { | |||
| dmin = d; | |||
| mx = x; | |||
| my = y; | |||
| } | |||
| } | |||
| } | |||
| range = range >> 1; | |||
| x1 = mx - range; | |||
| if (x1 < xmin) | |||
| x1 = xmin; | |||
| x2 = mx + range; | |||
| if (x2 > xmax) | |||
| x2 = xmax; | |||
| y1 = my - range; | |||
| if (y1 < ymin) | |||
| y1 = ymin; | |||
| y2 = my + range; | |||
| if (y2 > ymax) | |||
| y2 = ymax; | |||
| } while (range >= 1); | |||
| @@ -462,22 +462,22 @@ static int phods_motion_search(MpegEncContext * s, | |||
| /* Left limit */ | |||
| x1 = xx - range; | |||
| if (x1 < xmin) | |||
| x1 = xmin; | |||
| x1 = xmin; | |||
| /* Right limit */ | |||
| x2 = xx + range; | |||
| if (x2 > xmax) | |||
| x2 = xmax; | |||
| x2 = xmax; | |||
| /* Upper limit */ | |||
| y1 = yy - range; | |||
| if (y1 < ymin) | |||
| y1 = ymin; | |||
| y1 = ymin; | |||
| /* Lower limit */ | |||
| y2 = yy + range; | |||
| if (y2 > ymax) | |||
| y2 = ymax; | |||
| y2 = ymax; | |||
| pix = s->new_picture.data[0] + (yy * s->linesize) + xx; | |||
| mx = 0; | |||
| @@ -489,43 +489,43 @@ static int phods_motion_search(MpegEncContext * s, | |||
| dminx = 0x7fffffff; | |||
| dminy = 0x7fffffff; | |||
| lastx = x; | |||
| for (x = x1; x <= x2; x += range) { | |||
| d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); | |||
| if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { | |||
| dminx = d; | |||
| mx = x; | |||
| } | |||
| } | |||
| x = lastx; | |||
| for (y = y1; y <= y2; y += range) { | |||
| d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); | |||
| if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { | |||
| dminy = d; | |||
| my = y; | |||
| } | |||
| } | |||
| range = range >> 1; | |||
| x = mx; | |||
| y = my; | |||
| x1 = mx - range; | |||
| if (x1 < xmin) | |||
| x1 = xmin; | |||
| x2 = mx + range; | |||
| if (x2 > xmax) | |||
| x2 = xmax; | |||
| y1 = my - range; | |||
| if (y1 < ymin) | |||
| y1 = ymin; | |||
| y2 = my + range; | |||
| if (y2 > ymax) | |||
| y2 = ymax; | |||
| lastx = x; | |||
| for (x = x1; x <= x2; x += range) { | |||
| d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); | |||
| if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { | |||
| dminx = d; | |||
| mx = x; | |||
| } | |||
| } | |||
| x = lastx; | |||
| for (y = y1; y <= y2; y += range) { | |||
| d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); | |||
| if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { | |||
| dminy = d; | |||
| my = y; | |||
| } | |||
| } | |||
| range = range >> 1; | |||
| x = mx; | |||
| y = my; | |||
| x1 = mx - range; | |||
| if (x1 < xmin) | |||
| x1 = xmin; | |||
| x2 = mx + range; | |||
| if (x2 > xmax) | |||
| x2 = xmax; | |||
| y1 = my - range; | |||
| if (y1 < ymin) | |||
| y1 = ymin; | |||
| y2 = my + range; | |||
| if (y2 > ymax) | |||
| y2 = ymax; | |||
| } while (range >= 1); | |||
| @@ -550,7 +550,7 @@ static int phods_motion_search(MpegEncContext * s, | |||
| } | |||
| static inline int sad_hpel_motion_search(MpegEncContext * s, | |||
| int *mx_ptr, int *my_ptr, int dmin, | |||
| int *mx_ptr, int *my_ptr, int dmin, | |||
| int src_index, int ref_index, | |||
| int size, int h) | |||
| { | |||
| @@ -1190,24 +1190,24 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, | |||
| switch(s->me_method) { | |||
| case ME_ZERO: | |||
| default: | |||
| no_motion_search(s, &mx, &my); | |||
| no_motion_search(s, &mx, &my); | |||
| mx-= mb_x*16; | |||
| my-= mb_y*16; | |||
| dmin = 0; | |||
| break; | |||
| #if 0 | |||
| case ME_FULL: | |||
| dmin = full_motion_search(s, &mx, &my, range, ref_picture); | |||
| dmin = full_motion_search(s, &mx, &my, range, ref_picture); | |||
| mx-= mb_x*16; | |||
| my-= mb_y*16; | |||
| break; | |||
| case ME_LOG: | |||
| dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture); | |||
| dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture); | |||
| mx-= mb_x*16; | |||
| my-= mb_y*16; | |||
| break; | |||
| case ME_PHODS: | |||
| dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture); | |||
| dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture); | |||
| mx-= mb_x*16; | |||
| my-= mb_y*16; | |||
| break; | |||
| @@ -1264,7 +1264,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, | |||
| #if 0 | |||
| printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n", | |||
| varc, s->avg_mb_var, sum, vard, mx - xx, my - yy); | |||
| varc, s->avg_mb_var, sum, vard, mx - xx, my - yy); | |||
| #endif | |||
| if(mb_type){ | |||
| if (vard <= 64 || vard < varc) | |||
| @@ -1479,24 +1479,24 @@ static int ff_estimate_motion_b(MpegEncContext * s, | |||
| switch(s->me_method) { | |||
| case ME_ZERO: | |||
| default: | |||
| no_motion_search(s, &mx, &my); | |||
| no_motion_search(s, &mx, &my); | |||
| dmin = 0; | |||
| mx-= mb_x*16; | |||
| my-= mb_y*16; | |||
| break; | |||
| #if 0 | |||
| case ME_FULL: | |||
| dmin = full_motion_search(s, &mx, &my, range, ref_picture); | |||
| dmin = full_motion_search(s, &mx, &my, range, ref_picture); | |||
| mx-= mb_x*16; | |||
| my-= mb_y*16; | |||
| break; | |||
| case ME_LOG: | |||
| dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture); | |||
| dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture); | |||
| mx-= mb_x*16; | |||
| my-= mb_y*16; | |||
| break; | |||
| case ME_PHODS: | |||
| dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture); | |||
| dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture); | |||
| mx-= mb_x*16; | |||
| my-= mb_y*16; | |||
| break; | |||
| @@ -45,7 +45,7 @@ | |||
| #if 0 | |||
| static int hpel_motion_search)(MpegEncContext * s, | |||
| int *mx_ptr, int *my_ptr, int dmin, | |||
| int *mx_ptr, int *my_ptr, int dmin, | |||
| uint8_t *ref_data[3], | |||
| int size) | |||
| { | |||
| @@ -113,7 +113,7 @@ static int hpel_motion_search)(MpegEncContext * s, | |||
| #else | |||
| static int hpel_motion_search(MpegEncContext * s, | |||
| int *mx_ptr, int *my_ptr, int dmin, | |||
| int *mx_ptr, int *my_ptr, int dmin, | |||
| int src_index, int ref_index, | |||
| int size, int h) | |||
| { | |||
| @@ -271,7 +271,7 @@ int inline ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index, | |||
| } | |||
| static int qpel_motion_search(MpegEncContext * s, | |||
| int *mx_ptr, int *my_ptr, int dmin, | |||
| int *mx_ptr, int *my_ptr, int dmin, | |||
| int src_index, int ref_index, | |||
| int size, int h) | |||
| { | |||
| @@ -1005,7 +1005,7 @@ static int epzs_motion_search4(MpegEncContext * s, | |||
| //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); | |||
| /* first line */ | |||
| if (s->first_slice_line) { | |||
| CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) | |||
| CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) | |||
| CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, | |||
| (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) | |||
| CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) | |||
| @@ -1067,7 +1067,7 @@ static int epzs_motion_search2(MpegEncContext * s, | |||
| //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); | |||
| /* first line */ | |||
| if (s->first_slice_line) { | |||
| CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) | |||
| CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) | |||
| CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, | |||
| (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) | |||
| CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) | |||
| @@ -28,51 +28,51 @@ | |||
| #define BUFFER_SIZE (2*MPA_FRAME_SIZE) | |||
| typedef struct Mp3AudioContext { | |||
| lame_global_flags *gfp; | |||
| int stereo; | |||
| lame_global_flags *gfp; | |||
| int stereo; | |||
| uint8_t buffer[BUFFER_SIZE]; | |||
| int buffer_index; | |||
| } Mp3AudioContext; | |||
| static int MP3lame_encode_init(AVCodecContext *avctx) | |||
| { | |||
| Mp3AudioContext *s = avctx->priv_data; | |||
| if (avctx->channels > 2) | |||
| return -1; | |||
| s->stereo = avctx->channels > 1 ? 1 : 0; | |||
| if ((s->gfp = lame_init()) == NULL) | |||
| goto err; | |||
| lame_set_in_samplerate(s->gfp, avctx->sample_rate); | |||
| lame_set_out_samplerate(s->gfp, avctx->sample_rate); | |||
| lame_set_num_channels(s->gfp, avctx->channels); | |||
| /* lame 3.91 dies on quality != 5 */ | |||
| lame_set_quality(s->gfp, 5); | |||
| /* lame 3.91 doesn't work in mono */ | |||
| lame_set_mode(s->gfp, JOINT_STEREO); | |||
| lame_set_brate(s->gfp, avctx->bit_rate/1000); | |||
| Mp3AudioContext *s = avctx->priv_data; | |||
| if (avctx->channels > 2) | |||
| return -1; | |||
| s->stereo = avctx->channels > 1 ? 1 : 0; | |||
| if ((s->gfp = lame_init()) == NULL) | |||
| goto err; | |||
| lame_set_in_samplerate(s->gfp, avctx->sample_rate); | |||
| lame_set_out_samplerate(s->gfp, avctx->sample_rate); | |||
| lame_set_num_channels(s->gfp, avctx->channels); | |||
| /* lame 3.91 dies on quality != 5 */ | |||
| lame_set_quality(s->gfp, 5); | |||
| /* lame 3.91 doesn't work in mono */ | |||
| lame_set_mode(s->gfp, JOINT_STEREO); | |||
| lame_set_brate(s->gfp, avctx->bit_rate/1000); | |||
| if(avctx->flags & CODEC_FLAG_QSCALE) { | |||
| lame_set_brate(s->gfp, 0); | |||
| lame_set_VBR(s->gfp, vbr_default); | |||
| lame_set_VBR_q(s->gfp, avctx->global_quality / (float)FF_QP2LAMBDA); | |||
| } | |||
| lame_set_bWriteVbrTag(s->gfp,0); | |||
| if (lame_init_params(s->gfp) < 0) | |||
| goto err_close; | |||
| if (lame_init_params(s->gfp) < 0) | |||
| goto err_close; | |||
| avctx->frame_size = lame_get_framesize(s->gfp); | |||
| avctx->frame_size = lame_get_framesize(s->gfp); | |||
| avctx->coded_frame= avcodec_alloc_frame(); | |||
| avctx->coded_frame->key_frame= 1; | |||
| return 0; | |||
| return 0; | |||
| err_close: | |||
| lame_close(s->gfp); | |||
| lame_close(s->gfp); | |||
| err: | |||
| return -1; | |||
| return -1; | |||
| } | |||
| static const int sSampleRates[3] = { | |||
| @@ -136,11 +136,11 @@ static int mp3len(void *data, int *samplesPerFrame, int *sampleRate) | |||
| int MP3lame_encode_frame(AVCodecContext *avctx, | |||
| unsigned char *frame, int buf_size, void *data) | |||
| { | |||
| Mp3AudioContext *s = avctx->priv_data; | |||
| int len; | |||
| int lame_result; | |||
| Mp3AudioContext *s = avctx->priv_data; | |||
| int len; | |||
| int lame_result; | |||
| /* lame 3.91 dies on '1-channel interleaved' data */ | |||
| /* lame 3.91 dies on '1-channel interleaved' data */ | |||
| if(data){ | |||
| if (s->stereo) { | |||
| @@ -198,12 +198,12 @@ int MP3lame_encode_frame(AVCodecContext *avctx, | |||
| int MP3lame_encode_close(AVCodecContext *avctx) | |||
| { | |||
| Mp3AudioContext *s = avctx->priv_data; | |||
| Mp3AudioContext *s = avctx->priv_data; | |||
| av_freep(&avctx->coded_frame); | |||
| lame_close(s->gfp); | |||
| return 0; | |||
| lame_close(s->gfp); | |||
| return 0; | |||
| } | |||
| @@ -35,14 +35,14 @@ | |||
| /* Start codes. */ | |||
| #define SEQ_END_CODE 0x000001b7 | |||
| #define SEQ_START_CODE 0x000001b3 | |||
| #define GOP_START_CODE 0x000001b8 | |||
| #define PICTURE_START_CODE 0x00000100 | |||
| #define SLICE_MIN_START_CODE 0x00000101 | |||
| #define SLICE_MAX_START_CODE 0x000001af | |||
| #define EXT_START_CODE 0x000001b5 | |||
| #define USER_START_CODE 0x000001b2 | |||
| #define SEQ_END_CODE 0x000001b7 | |||
| #define SEQ_START_CODE 0x000001b3 | |||
| #define GOP_START_CODE 0x000001b8 | |||
| #define PICTURE_START_CODE 0x00000100 | |||
| #define SLICE_MIN_START_CODE 0x00000101 | |||
| #define SLICE_MAX_START_CODE 0x000001af | |||
| #define EXT_START_CODE 0x000001b5 | |||
| #define USER_START_CODE 0x000001b2 | |||
| #define DC_VLC_BITS 9 | |||
| #define MV_VLC_BITS 9 | |||
| @@ -89,7 +89,7 @@ const enum PixelFormat pixfmt_yuv_444[]= {PIX_FMT_YUV444P,-1}; | |||
| const enum PixelFormat pixfmt_xvmc_mpg2_420[] = { | |||
| PIX_FMT_XVMC_MPEG2_IDCT, | |||
| PIX_FMT_XVMC_MPEG2_MC, | |||
| -1}; | |||
| -1}; | |||
| #ifdef CONFIG_ENCODERS | |||
| static uint8_t (*mv_penalty)[MAX_MV*2+1]= NULL; | |||
| static uint8_t fcode_tab[MAX_MV*2+1]; | |||
| @@ -166,7 +166,7 @@ static void init_uni_ac_vlc(RLTable *rl, uint32_t *uni_ac_vlc_bits, uint8_t *uni | |||
| code= rl->index_run[0][run] + alevel - 1; | |||
| if (code < 111 /* rl->n */) { | |||
| /* store the vlc & sign at once */ | |||
| /* store the vlc & sign at once */ | |||
| len= mpeg1_vlc[code][1]+1; | |||
| bits= (mpeg1_vlc[code][0]<<1) + sign; | |||
| } else { | |||
| @@ -764,38 +764,38 @@ void ff_mpeg1_encode_init(MpegEncContext *s) | |||
| if(!done){ | |||
| int f_code; | |||
| int mv; | |||
| int i; | |||
| int i; | |||
| done=1; | |||
| init_rl(&rl_mpeg1, 1); | |||
| for(i=0; i<64; i++) | |||
| { | |||
| mpeg1_max_level[0][i]= rl_mpeg1.max_level[0][i]; | |||
| mpeg1_index_run[0][i]= rl_mpeg1.index_run[0][i]; | |||
| } | |||
| for(i=0; i<64; i++) | |||
| { | |||
| mpeg1_max_level[0][i]= rl_mpeg1.max_level[0][i]; | |||
| mpeg1_index_run[0][i]= rl_mpeg1.index_run[0][i]; | |||
| } | |||
| init_uni_ac_vlc(&rl_mpeg1, uni_mpeg1_ac_vlc_bits, uni_mpeg1_ac_vlc_len); | |||
| /* build unified dc encoding tables */ | |||
| for(i=-255; i<256; i++) | |||
| { | |||
| int adiff, index; | |||
| int bits, code; | |||
| int diff=i; | |||
| /* build unified dc encoding tables */ | |||
| for(i=-255; i<256; i++) | |||
| { | |||
| int adiff, index; | |||
| int bits, code; | |||
| int diff=i; | |||
| adiff = ABS(diff); | |||
| if(diff<0) diff--; | |||
| index = av_log2(2*adiff); | |||
| adiff = ABS(diff); | |||
| if(diff<0) diff--; | |||
| index = av_log2(2*adiff); | |||
| bits= vlc_dc_lum_bits[index] + index; | |||
| code= (vlc_dc_lum_code[index]<<index) + (diff & ((1 << index) - 1)); | |||
| mpeg1_lum_dc_uni[i+255]= bits + (code<<8); | |||
| bits= vlc_dc_lum_bits[index] + index; | |||
| code= (vlc_dc_lum_code[index]<<index) + (diff & ((1 << index) - 1)); | |||
| mpeg1_lum_dc_uni[i+255]= bits + (code<<8); | |||
| bits= vlc_dc_chroma_bits[index] + index; | |||
| code= (vlc_dc_chroma_code[index]<<index) + (diff & ((1 << index) - 1)); | |||
| mpeg1_chr_dc_uni[i+255]= bits + (code<<8); | |||
| } | |||
| bits= vlc_dc_chroma_bits[index] + index; | |||
| code= (vlc_dc_chroma_code[index]<<index) + (diff & ((1 << index) - 1)); | |||
| mpeg1_chr_dc_uni[i+255]= bits + (code<<8); | |||
| } | |||
| mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) ); | |||
| @@ -873,14 +873,14 @@ static inline void encode_dc(MpegEncContext *s, int diff, int component) | |||
| }else{ | |||
| if (component == 0) { | |||
| put_bits( | |||
| &s->pb, | |||
| mpeg1_lum_dc_uni[diff+255]&0xFF, | |||
| mpeg1_lum_dc_uni[diff+255]>>8); | |||
| &s->pb, | |||
| mpeg1_lum_dc_uni[diff+255]&0xFF, | |||
| mpeg1_lum_dc_uni[diff+255]>>8); | |||
| } else { | |||
| put_bits( | |||
| &s->pb, | |||
| mpeg1_chr_dc_uni[diff+255]&0xFF, | |||
| mpeg1_chr_dc_uni[diff+255]>>8); | |||
| mpeg1_chr_dc_uni[diff+255]&0xFF, | |||
| mpeg1_chr_dc_uni[diff+255]>>8); | |||
| } | |||
| } | |||
| } | |||
| @@ -946,10 +946,10 @@ static void mpeg1_encode_block(MpegEncContext *s, | |||
| // code = get_rl_index(rl, 0, run, alevel); | |||
| if (alevel <= mpeg1_max_level[0][run]){ | |||
| code= mpeg1_index_run[0][run] + alevel - 1; | |||
| /* store the vlc & sign at once */ | |||
| /* store the vlc & sign at once */ | |||
| put_bits(&s->pb, mpeg1_vlc[code][1]+1, (mpeg1_vlc[code][0]<<1) + sign); | |||
| } else { | |||
| /* escape seems to be pretty rare <5% so i dont optimize it */ | |||
| /* escape seems to be pretty rare <5% so i dont optimize it */ | |||
| put_bits(&s->pb, mpeg1_vlc[111/*rl->n*/][1], mpeg1_vlc[111/*rl->n*/][0]); | |||
| /* escape: only clip in this case */ | |||
| put_bits(&s->pb, 6, run); | |||
| @@ -1376,8 +1376,8 @@ static int mpeg_decode_mb(MpegEncContext *s, | |||
| return -1; | |||
| } | |||
| if(mb_block_count > 6){ | |||
| cbp<<= mb_block_count-6; | |||
| cbp |= get_bits(&s->gb, mb_block_count-6); | |||
| cbp<<= mb_block_count-6; | |||
| cbp |= get_bits(&s->gb, mb_block_count-6); | |||
| } | |||
| #ifdef HAVE_XVMC | |||
| @@ -2074,7 +2074,7 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){ | |||
| uint8_t old_permutation[64]; | |||
| if ( | |||
| (s1->mpeg_enc_ctx_allocated == 0)|| | |||
| (s1->mpeg_enc_ctx_allocated == 0)|| | |||
| avctx->coded_width != s->width || | |||
| avctx->coded_height != s->height|| | |||
| s1->save_aspect_info != s->aspect_ratio_info|| | |||
| @@ -2088,8 +2088,8 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){ | |||
| s->parse_context= pc; | |||
| } | |||
| if( (s->width == 0 )||(s->height == 0)) | |||
| return -2; | |||
| if( (s->width == 0 )||(s->height == 0)) | |||
| return -2; | |||
| avcodec_set_dimensions(avctx, s->width, s->height); | |||
| avctx->bit_rate = s->bit_rate; | |||
| @@ -2129,7 +2129,7 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){ | |||
| mpeg2_aspect[s->aspect_ratio_info], | |||
| (AVRational){s1->pan_scan.width, s1->pan_scan.height} | |||
| ); | |||
| } | |||
| } | |||
| }else{ | |||
| s->avctx->sample_aspect_ratio= | |||
| mpeg2_aspect[s->aspect_ratio_info]; | |||
| @@ -2312,16 +2312,16 @@ static void mpeg_decode_picture_display_extension(Mpeg1Context *s1) | |||
| nofco = 1; | |||
| if(s->progressive_sequence){ | |||
| if(s->repeat_first_field){ | |||
| nofco++; | |||
| if(s->top_field_first) | |||
| nofco++; | |||
| } | |||
| nofco++; | |||
| if(s->top_field_first) | |||
| nofco++; | |||
| } | |||
| }else{ | |||
| if(s->picture_structure == PICT_FRAME){ | |||
| nofco++; | |||
| if(s->repeat_first_field) | |||
| nofco++; | |||
| } | |||
| if(s->repeat_first_field) | |||
| nofco++; | |||
| } | |||
| } | |||
| for(i=0; i<nofco; i++){ | |||
| s1->pan_scan.position[i][0]= get_sbits(&s->gb, 16); | |||
| @@ -2985,8 +2985,8 @@ static void mpeg_decode_gop(AVCodecContext *avctx, | |||
| if(s->avctx->debug & FF_DEBUG_PICT_INFO) | |||
| av_log(s->avctx, AV_LOG_DEBUG, "GOP (%2d:%02d:%02d.[%02d]) broken_link=%d\n", | |||
| time_code_hours, time_code_minutes, time_code_seconds, | |||
| time_code_pictures, broken_link); | |||
| time_code_hours, time_code_minutes, time_code_seconds, | |||
| time_code_pictures, broken_link); | |||
| } | |||
| /** | |||
| * finds the end of the current frame in the bitstream. | |||
| @@ -3044,13 +3044,13 @@ static int mpeg_decode_frame(AVCodecContext *avctx, | |||
| dprintf("fill_buffer\n"); | |||
| if (buf_size == 0) { | |||
| /* special case for last picture */ | |||
| if (s2->low_delay==0 && s2->next_picture_ptr) { | |||
| *picture= *(AVFrame*)s2->next_picture_ptr; | |||
| s2->next_picture_ptr= NULL; | |||
| /* special case for last picture */ | |||
| if (s2->low_delay==0 && s2->next_picture_ptr) { | |||
| *picture= *(AVFrame*)s2->next_picture_ptr; | |||
| s2->next_picture_ptr= NULL; | |||
| *data_size = sizeof(AVFrame); | |||
| } | |||
| *data_size = sizeof(AVFrame); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -3111,13 +3111,13 @@ static int mpeg_decode_frame(AVCodecContext *avctx, | |||
| switch(start_code) { | |||
| case SEQ_START_CODE: | |||
| mpeg1_decode_sequence(avctx, buf_ptr, | |||
| input_size); | |||
| input_size); | |||
| break; | |||
| case PICTURE_START_CODE: | |||
| /* we have a complete image : we try to decompress it */ | |||
| mpeg1_decode_picture(avctx, | |||
| buf_ptr, input_size); | |||
| buf_ptr, input_size); | |||
| break; | |||
| case EXT_START_CODE: | |||
| mpeg_decode_extension(avctx, | |||
| @@ -4,14 +4,14 @@ | |||
| */ | |||
| const int16_t ff_mpeg1_default_intra_matrix[64] = { | |||
| 8, 16, 19, 22, 26, 27, 29, 34, | |||
| 16, 16, 22, 24, 27, 29, 34, 37, | |||
| 19, 22, 26, 27, 29, 34, 34, 38, | |||
| 22, 22, 26, 27, 29, 34, 37, 40, | |||
| 22, 26, 27, 29, 32, 35, 40, 48, | |||
| 26, 27, 29, 32, 35, 40, 48, 58, | |||
| 26, 27, 29, 34, 38, 46, 56, 69, | |||
| 27, 29, 35, 38, 46, 56, 69, 83 | |||
| 8, 16, 19, 22, 26, 27, 29, 34, | |||
| 16, 16, 22, 24, 27, 29, 34, 37, | |||
| 19, 22, 26, 27, 29, 34, 34, 38, | |||
| 22, 22, 26, 27, 29, 34, 37, 40, | |||
| 22, 26, 27, 29, 32, 35, 40, 48, | |||
| 26, 27, 29, 32, 35, 40, 48, 58, | |||
| 26, 27, 29, 34, 38, 46, 56, 69, | |||
| 27, 29, 35, 38, 46, 56, 69, 83 | |||
| }; | |||
| const int16_t ff_mpeg1_default_non_intra_matrix[64] = { | |||
| @@ -748,7 +748,7 @@ static void encode_frame(MpegAudioContext *s, | |||
| } | |||
| static int MPA_encode_frame(AVCodecContext *avctx, | |||
| unsigned char *frame, int buf_size, void *data) | |||
| unsigned char *frame, int buf_size, void *data) | |||
| { | |||
| MpegAudioContext *s = avctx->priv_data; | |||
| short *samples = data; | |||
| @@ -55,7 +55,7 @@ int l2_select_table(int bitrate, int nb_channels, int freq, int lsf); | |||
| int mpa_decode_header(AVCodecContext *avctx, uint32_t head); | |||
| void ff_mpa_synth_init(MPA_INT *window); | |||
| void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, | |||
| MPA_INT *window, int *dither_state, | |||
| MPA_INT *window, int *dither_state, | |||
| OUT_INT *samples, int incr, | |||
| int32_t sb_samples[SBLIMIT]); | |||
| @@ -64,7 +64,7 @@ static always_inline int MULH(int a, int b){ | |||
| struct GranuleDef; | |||
| typedef struct MPADecodeContext { | |||
| uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE]; /* input buffer */ | |||
| uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE]; /* input buffer */ | |||
| int inbuf_index; | |||
| uint8_t *inbuf_ptr, *inbuf; | |||
| int frame_size; | |||
| @@ -340,13 +340,13 @@ static int decode_init(AVCodecContext * avctx) | |||
| scale_factor_mult[i][2]); | |||
| } | |||
| ff_mpa_synth_init(window); | |||
| ff_mpa_synth_init(window); | |||
| /* huffman decode tables */ | |||
| huff_code_table[0] = NULL; | |||
| for(i=1;i<16;i++) { | |||
| const HuffTable *h = &mpa_huff_tables[i]; | |||
| int xsize, x, y; | |||
| int xsize, x, y; | |||
| unsigned int n; | |||
| uint8_t *code_table; | |||
| @@ -378,11 +378,11 @@ static int decode_init(AVCodecContext * avctx) | |||
| band_index_long[i][22] = k; | |||
| } | |||
| /* compute n ^ (4/3) and store it in mantissa/exp format */ | |||
| table_4_3_exp= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_exp[0])); | |||
| /* compute n ^ (4/3) and store it in mantissa/exp format */ | |||
| table_4_3_exp= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_exp[0])); | |||
| if(!table_4_3_exp) | |||
| return -1; | |||
| table_4_3_value= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_value[0])); | |||
| return -1; | |||
| table_4_3_value= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_value[0])); | |||
| if(!table_4_3_value) | |||
| return -1; | |||
| @@ -844,7 +844,7 @@ void ff_mpa_synth_init(MPA_INT *window) | |||
| 32 samples. */ | |||
| /* XXX: optimize by avoiding ring buffer usage */ | |||
| void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, | |||
| MPA_INT *window, int *dither_state, | |||
| MPA_INT *window, int *dither_state, | |||
| OUT_INT *samples, int incr, | |||
| int32_t sb_samples[SBLIMIT]) | |||
| { | |||
| @@ -2440,8 +2440,8 @@ static int mp_decode_frame(MPADecodeContext *s, | |||
| samples_ptr = samples + ch; | |||
| for(i=0;i<nb_frames;i++) { | |||
| ff_mpa_synth_filter(s->synth_buf[ch], &(s->synth_buf_offset[ch]), | |||
| window, &s->dither_state, | |||
| samples_ptr, s->nb_channels, | |||
| window, &s->dither_state, | |||
| samples_ptr, s->nb_channels, | |||
| s->sb_samples[ch][i]); | |||
| samples_ptr += 32 * s->nb_channels; | |||
| } | |||
| @@ -2453,8 +2453,8 @@ static int mp_decode_frame(MPADecodeContext *s, | |||
| } | |||
| static int decode_frame(AVCodecContext * avctx, | |||
| void *data, int *data_size, | |||
| uint8_t * buf, int buf_size) | |||
| void *data, int *data_size, | |||
| uint8_t * buf, int buf_size) | |||
| { | |||
| MPADecodeContext *s = avctx->priv_data; | |||
| uint32_t header; | |||
| @@ -2464,8 +2464,8 @@ static int decode_frame(AVCodecContext * avctx, | |||
| buf_ptr = buf; | |||
| while (buf_size > 0) { | |||
| len = s->inbuf_ptr - s->inbuf; | |||
| if (s->frame_size == 0) { | |||
| len = s->inbuf_ptr - s->inbuf; | |||
| if (s->frame_size == 0) { | |||
| /* special case for next header for first frame in free | |||
| format case (XXX: find a simpler method) */ | |||
| if (s->free_format_next_header != 0) { | |||
| @@ -2477,34 +2477,34 @@ static int decode_frame(AVCodecContext * avctx, | |||
| s->free_format_next_header = 0; | |||
| goto got_header; | |||
| } | |||
| /* no header seen : find one. We need at least HEADER_SIZE | |||
| /* no header seen : find one. We need at least HEADER_SIZE | |||
| bytes to parse it */ | |||
| len = HEADER_SIZE - len; | |||
| if (len > buf_size) | |||
| len = buf_size; | |||
| if (len > 0) { | |||
| memcpy(s->inbuf_ptr, buf_ptr, len); | |||
| buf_ptr += len; | |||
| buf_size -= len; | |||
| s->inbuf_ptr += len; | |||
| } | |||
| if ((s->inbuf_ptr - s->inbuf) >= HEADER_SIZE) { | |||
| len = HEADER_SIZE - len; | |||
| if (len > buf_size) | |||
| len = buf_size; | |||
| if (len > 0) { | |||
| memcpy(s->inbuf_ptr, buf_ptr, len); | |||
| buf_ptr += len; | |||
| buf_size -= len; | |||
| s->inbuf_ptr += len; | |||
| } | |||
| if ((s->inbuf_ptr - s->inbuf) >= HEADER_SIZE) { | |||
| got_header: | |||
| header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) | | |||
| (s->inbuf[2] << 8) | s->inbuf[3]; | |||
| header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) | | |||
| (s->inbuf[2] << 8) | s->inbuf[3]; | |||
| if (ff_mpa_check_header(header) < 0) { | |||
| /* no sync found : move by one byte (inefficient, but simple!) */ | |||
| memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); | |||
| s->inbuf_ptr--; | |||
| if (ff_mpa_check_header(header) < 0) { | |||
| /* no sync found : move by one byte (inefficient, but simple!) */ | |||
| memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); | |||
| s->inbuf_ptr--; | |||
| dprintf("skip %x\n", header); | |||
| /* reset free format frame size to give a chance | |||
| to get a new bitrate */ | |||
| s->free_format_frame_size = 0; | |||
| } else { | |||
| if (decode_header(s, header) == 1) { | |||
| } else { | |||
| if (decode_header(s, header) == 1) { | |||
| /* free format: prepare to compute frame size */ | |||
| s->frame_size = -1; | |||
| s->frame_size = -1; | |||
| } | |||
| /* update codec info */ | |||
| avctx->sample_rate = s->sample_rate; | |||
| @@ -2525,18 +2525,18 @@ static int decode_frame(AVCodecContext * avctx, | |||
| avctx->frame_size = 1152; | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } else if (s->frame_size == -1) { | |||
| /* free format : find next sync to compute frame size */ | |||
| len = MPA_MAX_CODED_FRAME_SIZE - len; | |||
| if (len > buf_size) | |||
| len = buf_size; | |||
| len = MPA_MAX_CODED_FRAME_SIZE - len; | |||
| if (len > buf_size) | |||
| len = buf_size; | |||
| if (len == 0) { | |||
| /* frame too long: resync */ | |||
| /* frame too long: resync */ | |||
| s->frame_size = 0; | |||
| memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); | |||
| s->inbuf_ptr--; | |||
| memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); | |||
| s->inbuf_ptr--; | |||
| } else { | |||
| uint8_t *p, *pend; | |||
| uint32_t header1; | |||
| @@ -2580,17 +2580,17 @@ static int decode_frame(AVCodecContext * avctx, | |||
| s->inbuf_ptr += len; | |||
| buf_size -= len; | |||
| } | |||
| } else if (len < s->frame_size) { | |||
| } else if (len < s->frame_size) { | |||
| if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE) | |||
| s->frame_size = MPA_MAX_CODED_FRAME_SIZE; | |||
| len = s->frame_size - len; | |||
| if (len > buf_size) | |||
| len = buf_size; | |||
| memcpy(s->inbuf_ptr, buf_ptr, len); | |||
| buf_ptr += len; | |||
| s->inbuf_ptr += len; | |||
| buf_size -= len; | |||
| } | |||
| len = s->frame_size - len; | |||
| if (len > buf_size) | |||
| len = buf_size; | |||
| memcpy(s->inbuf_ptr, buf_ptr, len); | |||
| buf_ptr += len; | |||
| s->inbuf_ptr += len; | |||
| buf_size -= len; | |||
| } | |||
| next_data: | |||
| if (s->frame_size > 0 && | |||
| (s->inbuf_ptr - s->inbuf) >= s->frame_size) { | |||
| @@ -2601,22 +2601,22 @@ static int decode_frame(AVCodecContext * avctx, | |||
| } else { | |||
| out_size = mp_decode_frame(s, out_samples); | |||
| } | |||
| s->inbuf_ptr = s->inbuf; | |||
| s->frame_size = 0; | |||
| s->inbuf_ptr = s->inbuf; | |||
| s->frame_size = 0; | |||
| if(out_size>=0) | |||
| *data_size = out_size; | |||
| *data_size = out_size; | |||
| else | |||
| av_log(avctx, AV_LOG_DEBUG, "Error while decoding mpeg audio frame\n"); //FIXME return -1 / but also return the number of bytes consumed | |||
| break; | |||
| } | |||
| break; | |||
| } | |||
| } | |||
| return buf_ptr - buf; | |||
| } | |||
| static int decode_frame_adu(AVCodecContext * avctx, | |||
| void *data, int *data_size, | |||
| uint8_t * buf, int buf_size) | |||
| void *data, int *data_size, | |||
| uint8_t * buf, int buf_size) | |||
| { | |||
| MPADecodeContext *s = avctx->priv_data; | |||
| uint32_t header; | |||
| @@ -2747,8 +2747,8 @@ static int decode_close_mp3on4(AVCodecContext * avctx) | |||
| static int decode_frame_mp3on4(AVCodecContext * avctx, | |||
| void *data, int *data_size, | |||
| uint8_t * buf, int buf_size) | |||
| void *data, int *data_size, | |||
| uint8_t * buf, int buf_size) | |||
| { | |||
| MP3On4DecodeContext *s = avctx->priv_data; | |||
| MPADecodeContext *m; | |||
| @@ -354,7 +354,7 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){ | |||
| r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic); | |||
| if(r<0 || !pic->age || !pic->type || !pic->data[0]){ | |||
| av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]); | |||
| av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]); | |||
| return -1; | |||
| } | |||
| @@ -913,7 +913,7 @@ int MPV_encode_init(AVCodecContext *avctx) | |||
| s->width = avctx->width; | |||
| s->height = avctx->height; | |||
| if(avctx->gop_size > 600){ | |||
| av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n"); | |||
| av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n"); | |||
| avctx->gop_size=600; | |||
| } | |||
| s->gop_size = avctx->gop_size; | |||
| @@ -1120,7 +1120,7 @@ int MPV_encode_init(AVCodecContext *avctx) | |||
| s->out_format = FMT_MJPEG; | |||
| s->intra_only = 1; /* force intra only for jpeg */ | |||
| s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS; | |||
| s->mjpeg_data_only_frames = 0; /* write all the needed headers */ | |||
| s->mjpeg_data_only_frames = 0; /* write all the needed headers */ | |||
| s->mjpeg_vsample[0] = 1<<chroma_v_shift; | |||
| s->mjpeg_vsample[1] = 1; | |||
| s->mjpeg_vsample[2] = 1; | |||
| @@ -1143,24 +1143,24 @@ int MPV_encode_init(AVCodecContext *avctx) | |||
| return -1; | |||
| } | |||
| s->out_format = FMT_H263; | |||
| s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0; | |||
| s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0; | |||
| avctx->delay=0; | |||
| s->low_delay=1; | |||
| break; | |||
| case CODEC_ID_H263P: | |||
| s->out_format = FMT_H263; | |||
| s->h263_plus = 1; | |||
| /* Fx */ | |||
| /* Fx */ | |||
| s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0; | |||
| s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0; | |||
| s->modified_quant= s->h263_aic; | |||
| s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0; | |||
| s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0; | |||
| s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0; | |||
| s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus; | |||
| s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0; | |||
| s->modified_quant= s->h263_aic; | |||
| s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0; | |||
| s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0; | |||
| s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0; | |||
| s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus; | |||
| s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0; | |||
| /* /Fx */ | |||
| /* /Fx */ | |||
| /* These are just to be sure */ | |||
| avctx->delay=0; | |||
| s->low_delay=1; | |||
| @@ -2473,7 +2473,7 @@ static inline void gmc1_motion(MpegEncContext *s, | |||
| dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2); | |||
| if (s->no_rounding){ | |||
| s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16); | |||
| s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16); | |||
| }else{ | |||
| s->dsp.put_pixels_tab [0][dxy](dest_y, ptr, linesize, 16); | |||
| } | |||
| @@ -4148,7 +4148,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||
| } | |||
| } | |||
| s->dsp.get_pixels(s->block[0], ptr_y , wrap_y); | |||
| s->dsp.get_pixels(s->block[0], ptr_y , wrap_y); | |||
| s->dsp.get_pixels(s->block[1], ptr_y + 8, wrap_y); | |||
| s->dsp.get_pixels(s->block[2], ptr_y + dct_offset , wrap_y); | |||
| s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y); | |||
| @@ -4157,7 +4157,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||
| skip_dct[4]= 1; | |||
| skip_dct[5]= 1; | |||
| }else{ | |||
| s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c); | |||
| s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c); | |||
| s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c); | |||
| } | |||
| }else{ | |||
| @@ -4170,7 +4170,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||
| dest_cr = s->dest[2]; | |||
| if ((!s->no_rounding) || s->pict_type==B_TYPE){ | |||
| op_pix = s->dsp.put_pixels_tab; | |||
| op_pix = s->dsp.put_pixels_tab; | |||
| op_qpix= s->dsp.put_qpel_pixels_tab; | |||
| }else{ | |||
| op_pix = s->dsp.put_no_rnd_pixels_tab; | |||
| @@ -4208,7 +4208,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||
| } | |||
| } | |||
| s->dsp.diff_pixels(s->block[0], ptr_y , dest_y , wrap_y); | |||
| s->dsp.diff_pixels(s->block[0], ptr_y , dest_y , wrap_y); | |||
| s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y); | |||
| s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset , dest_y + dct_offset , wrap_y); | |||
| s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y); | |||
| @@ -4223,7 +4223,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||
| /* pre quantization */ | |||
| if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){ | |||
| //FIXME optimize | |||
| if(s->dsp.sad[1](NULL, ptr_y , dest_y , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1; | |||
| if(s->dsp.sad[1](NULL, ptr_y , dest_y , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1; | |||
| if(s->dsp.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1; | |||
| if(s->dsp.sad[1](NULL, ptr_y +dct_offset , dest_y +dct_offset , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1; | |||
| if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1; | |||
| @@ -6265,7 +6265,7 @@ static int dct_quantize_c(MpegEncContext *s, | |||
| /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ | |||
| if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM) | |||
| ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero); | |||
| ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero); | |||
| return last_non_zero; | |||
| } | |||
| @@ -126,7 +126,7 @@ typedef struct ScanTable{ | |||
| uint8_t permutated[64]; | |||
| uint8_t raster_end[64]; | |||
| #ifdef ARCH_POWERPC | |||
| /** Used by dct_quantise_alitvec to find last-non-zero */ | |||
| /** Used by dct_quantise_alitvec to find last-non-zero */ | |||
| uint8_t __align8 inverse[64]; | |||
| #endif | |||
| } ScanTable; | |||
| @@ -181,7 +181,7 @@ typedef struct Picture{ | |||
| uint16_t *mb_var; ///< Table for MB variances | |||
| uint16_t *mc_mb_var; ///< Table for motion compensated MB variances | |||
| uint8_t *mb_mean; ///< Table for MB luminance | |||
| int32_t *mb_cmp_score; ///< Table for MB cmp scores, for mb decision FIXME remove | |||
| int32_t *mb_cmp_score; ///< Table for MB cmp scores, for mb decision FIXME remove | |||
| int b_frame_score; /* */ | |||
| } Picture; | |||
| @@ -245,7 +245,7 @@ typedef struct MotionEstContext{ | |||
| uint8_t (*mv_penalty)[MAX_MV*2+1]; ///< amount of bits needed to encode a MV | |||
| uint8_t *current_mv_penalty; | |||
| int (*sub_motion_search)(struct MpegEncContext * s, | |||
| int *mx_ptr, int *my_ptr, int dmin, | |||
| int *mx_ptr, int *my_ptr, int dmin, | |||
| int src_index, int ref_index, | |||
| int size, int h); | |||
| }MotionEstContext; | |||
| @@ -544,24 +544,24 @@ void msmpeg4_encode_mb(MpegEncContext * s, | |||
| handle_slices(s); | |||
| if (!s->mb_intra) { | |||
| /* compute cbp */ | |||
| /* compute cbp */ | |||
| set_stat(ST_INTER_MB); | |||
| cbp = 0; | |||
| for (i = 0; i < 6; i++) { | |||
| if (s->block_last_index[i] >= 0) | |||
| cbp |= 1 << (5 - i); | |||
| } | |||
| if (s->use_skip_mb_code && (cbp | motion_x | motion_y) == 0) { | |||
| /* skip macroblock */ | |||
| put_bits(&s->pb, 1, 1); | |||
| cbp = 0; | |||
| for (i = 0; i < 6; i++) { | |||
| if (s->block_last_index[i] >= 0) | |||
| cbp |= 1 << (5 - i); | |||
| } | |||
| if (s->use_skip_mb_code && (cbp | motion_x | motion_y) == 0) { | |||
| /* skip macroblock */ | |||
| put_bits(&s->pb, 1, 1); | |||
| s->last_bits++; | |||
| s->misc_bits++; | |||
| s->misc_bits++; | |||
| s->skip_count++; | |||
| return; | |||
| } | |||
| return; | |||
| } | |||
| if (s->use_skip_mb_code) | |||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||
| if(s->msmpeg4_version<=2){ | |||
| put_bits(&s->pb, | |||
| @@ -599,10 +599,10 @@ void msmpeg4_encode_mb(MpegEncContext * s, | |||
| } | |||
| s->p_tex_bits += get_bits_diff(s); | |||
| } else { | |||
| /* compute cbp */ | |||
| cbp = 0; | |||
| /* compute cbp */ | |||
| cbp = 0; | |||
| coded_cbp = 0; | |||
| for (i = 0; i < 6; i++) { | |||
| for (i = 0; i < 6; i++) { | |||
| int val, pred; | |||
| val = (s->block_last_index[i] >= 1); | |||
| cbp |= val << (5 - i); | |||
| @@ -613,7 +613,7 @@ void msmpeg4_encode_mb(MpegEncContext * s, | |||
| val = val ^ pred; | |||
| } | |||
| coded_cbp |= val << (5 - i); | |||
| } | |||
| } | |||
| #if 0 | |||
| if (coded_cbp) | |||
| printf("cbp=%x %x\n", cbp, coded_cbp); | |||
| @@ -625,12 +625,12 @@ void msmpeg4_encode_mb(MpegEncContext * s, | |||
| v2_intra_cbpc[cbp&3][1], v2_intra_cbpc[cbp&3][0]); | |||
| } else { | |||
| if (s->use_skip_mb_code) | |||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||
| put_bits(&s->pb, | |||
| v2_mb_type[(cbp&3) + 4][1], | |||
| v2_mb_type[(cbp&3) + 4][0]); | |||
| } | |||
| put_bits(&s->pb, 1, 0); /* no AC prediction yet */ | |||
| put_bits(&s->pb, 1, 0); /* no AC prediction yet */ | |||
| put_bits(&s->pb, | |||
| cbpy_tab[cbp>>2][1], | |||
| cbpy_tab[cbp>>2][0]); | |||
| @@ -641,13 +641,13 @@ void msmpeg4_encode_mb(MpegEncContext * s, | |||
| ff_msmp4_mb_i_table[coded_cbp][1], ff_msmp4_mb_i_table[coded_cbp][0]); | |||
| } else { | |||
| if (s->use_skip_mb_code) | |||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||
| put_bits(&s->pb, 1, 0); /* mb coded */ | |||
| put_bits(&s->pb, | |||
| table_mb_non_intra[cbp][1], | |||
| table_mb_non_intra[cbp][0]); | |||
| } | |||
| set_stat(ST_INTRA_MB); | |||
| put_bits(&s->pb, 1, 0); /* no AC prediction yet */ | |||
| put_bits(&s->pb, 1, 0); /* no AC prediction yet */ | |||
| if(s->inter_intra_pred){ | |||
| s->h263_aic_dir=0; | |||
| put_bits(&s->pb, table_inter_intra[s->h263_aic_dir][1], table_inter_intra[s->h263_aic_dir][0]); | |||
| @@ -702,9 +702,9 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n, | |||
| /* find prediction */ | |||
| if (n < 4) { | |||
| scale = s->y_dc_scale; | |||
| scale = s->y_dc_scale; | |||
| } else { | |||
| scale = s->c_dc_scale; | |||
| scale = s->c_dc_scale; | |||
| } | |||
| wrap = s->block_wrap[n]; | |||
| @@ -727,22 +727,22 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n, | |||
| to problems if Q could vary !) */ | |||
| #if (defined(ARCH_X86) || defined(ARCH_X86_64)) && !defined PIC | |||
| asm volatile( | |||
| "movl %3, %%eax \n\t" | |||
| "shrl $1, %%eax \n\t" | |||
| "addl %%eax, %2 \n\t" | |||
| "addl %%eax, %1 \n\t" | |||
| "addl %0, %%eax \n\t" | |||
| "mull %4 \n\t" | |||
| "movl %%edx, %0 \n\t" | |||
| "movl %1, %%eax \n\t" | |||
| "mull %4 \n\t" | |||
| "movl %%edx, %1 \n\t" | |||
| "movl %2, %%eax \n\t" | |||
| "mull %4 \n\t" | |||
| "movl %%edx, %2 \n\t" | |||
| : "+b" (a), "+c" (b), "+D" (c) | |||
| : "g" (scale), "S" (inverse[scale]) | |||
| : "%eax", "%edx" | |||
| "movl %3, %%eax \n\t" | |||
| "shrl $1, %%eax \n\t" | |||
| "addl %%eax, %2 \n\t" | |||
| "addl %%eax, %1 \n\t" | |||
| "addl %0, %%eax \n\t" | |||
| "mull %4 \n\t" | |||
| "movl %%edx, %0 \n\t" | |||
| "movl %1, %%eax \n\t" | |||
| "mull %4 \n\t" | |||
| "movl %%edx, %1 \n\t" | |||
| "movl %2, %%eax \n\t" | |||
| "mull %4 \n\t" | |||
| "movl %%edx, %2 \n\t" | |||
| : "+b" (a), "+c" (b), "+D" (c) | |||
| : "g" (scale), "S" (inverse[scale]) | |||
| : "%eax", "%edx" | |||
| ); | |||
| #else | |||
| /* #elif defined (ARCH_ALPHA) */ | |||
| @@ -750,13 +750,13 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n, | |||
| common case. But they are costly everywhere... | |||
| */ | |||
| if (scale == 8) { | |||
| a = (a + (8 >> 1)) / 8; | |||
| b = (b + (8 >> 1)) / 8; | |||
| c = (c + (8 >> 1)) / 8; | |||
| a = (a + (8 >> 1)) / 8; | |||
| b = (b + (8 >> 1)) / 8; | |||
| c = (c + (8 >> 1)) / 8; | |||
| } else { | |||
| a = FASTDIV((a + (scale >> 1)), scale); | |||
| b = FASTDIV((b + (scale >> 1)), scale); | |||
| c = FASTDIV((c + (scale >> 1)), scale); | |||
| a = FASTDIV((a + (scale >> 1)), scale); | |||
| b = FASTDIV((b + (scale >> 1)), scale); | |||
| c = FASTDIV((c + (scale >> 1)), scale); | |||
| } | |||
| #endif | |||
| /* XXX: WARNING: they did not choose the same test as MPEG4. This | |||
| @@ -957,17 +957,17 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int | |||
| /* AC coefs */ | |||
| last_non_zero = i - 1; | |||
| for (; i <= last_index; i++) { | |||
| j = scantable[i]; | |||
| level = block[j]; | |||
| if (level) { | |||
| run = i - last_non_zero - 1; | |||
| last = (i == last_index); | |||
| sign = 0; | |||
| slevel = level; | |||
| if (level < 0) { | |||
| sign = 1; | |||
| level = -level; | |||
| } | |||
| j = scantable[i]; | |||
| level = block[j]; | |||
| if (level) { | |||
| run = i - last_non_zero - 1; | |||
| last = (i == last_index); | |||
| sign = 0; | |||
| slevel = level; | |||
| if (level < 0) { | |||
| sign = 1; | |||
| level = -level; | |||
| } | |||
| if(level<=MAX_LEVEL && run<=MAX_RUN){ | |||
| s->ac_stats[s->mb_intra][n>3][level][run][last]++; | |||
| @@ -1030,8 +1030,8 @@ else | |||
| } else { | |||
| put_bits(&s->pb, 1, sign); | |||
| } | |||
| last_non_zero = i; | |||
| } | |||
| last_non_zero = i; | |||
| } | |||
| } | |||
| } | |||
| @@ -1064,7 +1064,7 @@ static void init_h263_dc_for_msmpeg4(void) | |||
| v = abs(level); | |||
| while (v) { | |||
| v >>= 1; | |||
| size++; | |||
| size++; | |||
| } | |||
| if (level < 0) | |||
| @@ -1301,11 +1301,11 @@ return -1; | |||
| } | |||
| s->no_rounding = 1; | |||
| if(s->avctx->debug&FF_DEBUG_PICT_INFO) | |||
| av_log(s->avctx, AV_LOG_DEBUG, "qscale:%d rlc:%d rl:%d dc:%d mbrl:%d slice:%d \n", | |||
| s->qscale, | |||
| s->rl_chroma_table_index, | |||
| s->rl_table_index, | |||
| s->dc_table_index, | |||
| av_log(s->avctx, AV_LOG_DEBUG, "qscale:%d rlc:%d rl:%d dc:%d mbrl:%d slice:%d \n", | |||
| s->qscale, | |||
| s->rl_chroma_table_index, | |||
| s->rl_table_index, | |||
| s->dc_table_index, | |||
| s->per_mb_rl_table, | |||
| s->slice_height); | |||
| } else { | |||
| @@ -1349,20 +1349,20 @@ return -1; | |||
| } | |||
| if(s->avctx->debug&FF_DEBUG_PICT_INFO) | |||
| av_log(s->avctx, AV_LOG_DEBUG, "skip:%d rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d \n", | |||
| s->use_skip_mb_code, | |||
| s->rl_table_index, | |||
| s->rl_chroma_table_index, | |||
| s->dc_table_index, | |||
| s->mv_table_index, | |||
| av_log(s->avctx, AV_LOG_DEBUG, "skip:%d rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d \n", | |||
| s->use_skip_mb_code, | |||
| s->rl_table_index, | |||
| s->rl_chroma_table_index, | |||
| s->dc_table_index, | |||
| s->mv_table_index, | |||
| s->per_mb_rl_table, | |||
| s->qscale); | |||
| if(s->flipflop_rounding){ | |||
| s->no_rounding ^= 1; | |||
| }else{ | |||
| s->no_rounding = 0; | |||
| } | |||
| if(s->flipflop_rounding){ | |||
| s->no_rounding ^= 1; | |||
| }else{ | |||
| s->no_rounding = 0; | |||
| } | |||
| } | |||
| //printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height); | |||
| @@ -1557,10 +1557,10 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| for (i = 0; i < 6; i++) { | |||
| if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) | |||
| { | |||
| { | |||
| av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); | |||
| return -1; | |||
| } | |||
| } | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -1593,8 +1593,8 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) | |||
| code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3); | |||
| if (code < 0) | |||
| return -1; | |||
| //s->mb_intra = (code & 0x40) ? 0 : 1; | |||
| s->mb_intra = (~code & 0x40) >> 6; | |||
| //s->mb_intra = (code & 0x40) ? 0 : 1; | |||
| s->mb_intra = (~code & 0x40) >> 6; | |||
| cbp = code & 0x3f; | |||
| } else { | |||
| @@ -1650,10 +1650,10 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| for (i = 0; i < 6; i++) { | |||
| if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) | |||
| { | |||
| av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); | |||
| return -1; | |||
| } | |||
| { | |||
| av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); | |||
| return -1; | |||
| } | |||
| } | |||
| return 0; | |||
| @@ -1672,7 +1672,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||
| qmul=1; | |||
| qadd=0; | |||
| /* DC coef */ | |||
| /* DC coef */ | |||
| set_stat(ST_DC); | |||
| level = msmpeg4_decode_dc(s, n, &dc_pred_dir); | |||
| @@ -1808,8 +1808,8 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||
| } | |||
| } | |||
| #endif | |||
| //level = level * qmul + (level>0) * qadd - (level<=0) * qadd ; | |||
| if (level>0) level= level * qmul + qadd; | |||
| //level = level * qmul + (level>0) * qadd - (level<=0) * qadd ; | |||
| if (level>0) level= level * qmul + qadd; | |||
| else level= level * qmul - qadd; | |||
| #if 0 // waste of time too :( | |||
| if(level>2048 || level<-2048){ | |||
| @@ -45,7 +45,7 @@ Theora_decode_frame(AVCodecContext *ctx, void *outdata, int *outdata_size, | |||
| thc->op.bytes = buf_size; | |||
| if(theora_decode_packetin(&thc->state, &thc->op)) | |||
| return -1; | |||
| return -1; | |||
| theora_decode_YUVout(&thc->state, &yuv); | |||
| @@ -78,7 +78,7 @@ Theora_decode_init(AVCodecContext *ctx) | |||
| uint8_t *cdp; | |||
| if(ctx->extradata_size < 6) | |||
| return -1; | |||
| return -1; | |||
| theora_info_init(&thc->info); | |||
| @@ -87,25 +87,25 @@ Theora_decode_init(AVCodecContext *ctx) | |||
| size = ctx->extradata_size; | |||
| for(i = 0; i < 3; i++){ | |||
| hs = *cdp++ << 8; | |||
| hs += *cdp++; | |||
| size -= 2; | |||
| hs = *cdp++ << 8; | |||
| hs += *cdp++; | |||
| size -= 2; | |||
| if(hs > size){ | |||
| av_log(ctx, AV_LOG_ERROR, "extradata too small: %i > %i\n", | |||
| if(hs > size){ | |||
| av_log(ctx, AV_LOG_ERROR, "extradata too small: %i > %i\n", | |||
| hs, size); | |||
| return -1; | |||
| } | |||
| op.packet = cdp; | |||
| op.bytes = hs; | |||
| op.b_o_s = !i; | |||
| if(theora_decode_header(&thc->info, &thc->comment, &op)) | |||
| return -1; | |||
| op.packetno++; | |||
| cdp += hs; | |||
| size -= hs; | |||
| return -1; | |||
| } | |||
| op.packet = cdp; | |||
| op.bytes = hs; | |||
| op.b_o_s = !i; | |||
| if(theora_decode_header(&thc->info, &thc->comment, &op)) | |||
| return -1; | |||
| op.packetno++; | |||
| cdp += hs; | |||
| size -= hs; | |||
| } | |||
| theora_decode_init(&thc->state, &thc->info); | |||
| @@ -40,13 +40,13 @@ static int oggvorbis_init_encoder(vorbis_info *vi, AVCodecContext *avccontext) { | |||
| return (vorbis_encode_setup_managed(vi, avccontext->channels, | |||
| avccontext->sample_rate, -1, avccontext->bit_rate, -1) || | |||
| vorbis_encode_ctl(vi, OV_ECTL_RATEMANAGE_AVG, NULL) || | |||
| vorbis_encode_setup_init(vi)) ; | |||
| vorbis_encode_ctl(vi, OV_ECTL_RATEMANAGE_AVG, NULL) || | |||
| vorbis_encode_setup_init(vi)) ; | |||
| #else | |||
| /* constant bitrate */ | |||
| return vorbis_encode_init(vi, avccontext->channels, | |||
| avccontext->sample_rate, -1, avccontext->bit_rate, -1) ; | |||
| avccontext->sample_rate, -1, avccontext->bit_rate, -1) ; | |||
| #endif | |||
| } | |||
| @@ -58,8 +58,8 @@ static int oggvorbis_encode_init(AVCodecContext *avccontext) { | |||
| vorbis_info_init(&context->vi) ; | |||
| if(oggvorbis_init_encoder(&context->vi, avccontext) < 0) { | |||
| av_log(avccontext, AV_LOG_ERROR, "oggvorbis_encode_init: init_encoder failed") ; | |||
| return -1 ; | |||
| av_log(avccontext, AV_LOG_ERROR, "oggvorbis_encode_init: init_encoder failed") ; | |||
| return -1 ; | |||
| } | |||
| vorbis_analysis_init(&context->vd, &context->vi) ; | |||
| vorbis_block_init(&context->vd, &context->vb) ; | |||
| @@ -101,8 +101,8 @@ static int oggvorbis_encode_init(AVCodecContext *avccontext) { | |||
| static int oggvorbis_encode_frame(AVCodecContext *avccontext, | |||
| unsigned char *packets, | |||
| int buf_size, void *data) | |||
| unsigned char *packets, | |||
| int buf_size, void *data) | |||
| { | |||
| OggVorbisContext *context = avccontext->priv_data ; | |||
| float **buffer ; | |||
| @@ -113,22 +113,22 @@ static int oggvorbis_encode_frame(AVCodecContext *avccontext, | |||
| buffer = vorbis_analysis_buffer(&context->vd, samples) ; | |||
| if(context->vi.channels == 1) { | |||
| for(l = 0 ; l < samples ; l++) | |||
| buffer[0][l]=audio[l]/32768.f; | |||
| for(l = 0 ; l < samples ; l++) | |||
| buffer[0][l]=audio[l]/32768.f; | |||
| } else { | |||
| for(l = 0 ; l < samples ; l++){ | |||
| buffer[0][l]=audio[l*2]/32768.f; | |||
| buffer[1][l]=audio[l*2+1]/32768.f; | |||
| } | |||
| for(l = 0 ; l < samples ; l++){ | |||
| buffer[0][l]=audio[l*2]/32768.f; | |||
| buffer[1][l]=audio[l*2+1]/32768.f; | |||
| } | |||
| } | |||
| vorbis_analysis_wrote(&context->vd, samples) ; | |||
| while(vorbis_analysis_blockout(&context->vd, &context->vb) == 1) { | |||
| vorbis_analysis(&context->vb, NULL); | |||
| vorbis_bitrate_addblock(&context->vb) ; | |||
| vorbis_analysis(&context->vb, NULL); | |||
| vorbis_bitrate_addblock(&context->vb) ; | |||
| while(vorbis_bitrate_flushpacket(&context->vd, &op)) { | |||
| while(vorbis_bitrate_flushpacket(&context->vd, &op)) { | |||
| if(op.bytes==1) //id love to say this is a hack, bad sadly its not, appearently the end of stream decission is in libogg | |||
| continue; | |||
| memcpy(context->buffer + context->buffer_index, &op, sizeof(ogg_packet)); | |||
| @@ -136,7 +136,7 @@ static int oggvorbis_encode_frame(AVCodecContext *avccontext, | |||
| memcpy(context->buffer + context->buffer_index, op.packet, op.bytes); | |||
| context->buffer_index += op.bytes; | |||
| // av_log(avccontext, AV_LOG_DEBUG, "e%d / %d\n", context->buffer_index, op.bytes); | |||
| } | |||
| } | |||
| } | |||
| l=0; | |||
| @@ -268,19 +268,19 @@ static inline int conv(int samples, float **pcm, char *buf, int channels) { | |||
| float *mono ; | |||
| for(i = 0 ; i < channels ; i++){ | |||
| ptr = &data[i]; | |||
| mono = pcm[i] ; | |||
| ptr = &data[i]; | |||
| mono = pcm[i] ; | |||
| for(j = 0 ; j < samples ; j++) { | |||
| for(j = 0 ; j < samples ; j++) { | |||
| val = mono[j] * 32767.f; | |||
| val = mono[j] * 32767.f; | |||
| if(val > 32767) val = 32767 ; | |||
| if(val < -32768) val = -32768 ; | |||
| if(val > 32767) val = 32767 ; | |||
| if(val < -32768) val = -32768 ; | |||
| *ptr = val ; | |||
| ptr += channels; | |||
| } | |||
| *ptr = val ; | |||
| ptr += channels; | |||
| } | |||
| } | |||
| return 0 ; | |||
| @@ -311,15 +311,15 @@ static int oggvorbis_decode_frame(AVCodecContext *avccontext, | |||
| av_log(avccontext, AV_LOG_DEBUG, "\n");*/ | |||
| if(vorbis_synthesis(&context->vb, op) == 0) | |||
| vorbis_synthesis_blockin(&context->vd, &context->vb) ; | |||
| vorbis_synthesis_blockin(&context->vd, &context->vb) ; | |||
| total_samples = 0 ; | |||
| total_bytes = 0 ; | |||
| while((samples = vorbis_synthesis_pcmout(&context->vd, &pcm)) > 0) { | |||
| conv(samples, pcm, (char*)data + total_bytes, context->vi.channels) ; | |||
| total_bytes += samples * 2 * context->vi.channels ; | |||
| total_samples += samples ; | |||
| conv(samples, pcm, (char*)data + total_bytes, context->vi.channels) ; | |||
| total_bytes += samples * 2 * context->vi.channels ; | |||
| total_samples += samples ; | |||
| vorbis_synthesis_read(&context->vd, samples) ; | |||
| } | |||
| @@ -191,11 +191,11 @@ void av_parser_close(AVCodecParserContext *s) | |||
| //#define END_NOT_FOUND (-100) | |||
| #define PICTURE_START_CODE 0x00000100 | |||
| #define SEQ_START_CODE 0x000001b3 | |||
| #define EXT_START_CODE 0x000001b5 | |||
| #define SLICE_MIN_START_CODE 0x00000101 | |||
| #define SLICE_MAX_START_CODE 0x000001af | |||
| #define PICTURE_START_CODE 0x00000100 | |||
| #define SEQ_START_CODE 0x000001b3 | |||
| #define EXT_START_CODE 0x000001b5 | |||
| #define SLICE_MIN_START_CODE 0x00000101 | |||
| #define SLICE_MAX_START_CODE 0x000001af | |||
| typedef struct ParseContext1{ | |||
| ParseContext pc; | |||
| @@ -571,7 +571,7 @@ static int mpeg4video_split(AVCodecContext *avctx, | |||
| /*************************/ | |||
| typedef struct MpegAudioParseContext { | |||
| uint8_t inbuf[MPA_MAX_CODED_FRAME_SIZE]; /* input buffer */ | |||
| uint8_t inbuf[MPA_MAX_CODED_FRAME_SIZE]; /* input buffer */ | |||
| uint8_t *inbuf_ptr; | |||
| int frame_size; | |||
| int free_format_frame_size; | |||
| @@ -608,8 +608,8 @@ static int mpegaudio_parse(AVCodecParserContext *s1, | |||
| *poutbuf_size = 0; | |||
| buf_ptr = buf; | |||
| while (buf_size > 0) { | |||
| len = s->inbuf_ptr - s->inbuf; | |||
| if (s->frame_size == 0) { | |||
| len = s->inbuf_ptr - s->inbuf; | |||
| if (s->frame_size == 0) { | |||
| /* special case for next header for first frame in free | |||
| format case (XXX: find a simpler method) */ | |||
| if (s->free_format_next_header != 0) { | |||
| @@ -621,34 +621,34 @@ static int mpegaudio_parse(AVCodecParserContext *s1, | |||
| s->free_format_next_header = 0; | |||
| goto got_header; | |||
| } | |||
| /* no header seen : find one. We need at least MPA_HEADER_SIZE | |||
| /* no header seen : find one. We need at least MPA_HEADER_SIZE | |||
| bytes to parse it */ | |||
| len = MPA_HEADER_SIZE - len; | |||
| if (len > buf_size) | |||
| len = buf_size; | |||
| if (len > 0) { | |||
| memcpy(s->inbuf_ptr, buf_ptr, len); | |||
| buf_ptr += len; | |||
| buf_size -= len; | |||
| s->inbuf_ptr += len; | |||
| } | |||
| if ((s->inbuf_ptr - s->inbuf) >= MPA_HEADER_SIZE) { | |||
| len = MPA_HEADER_SIZE - len; | |||
| if (len > buf_size) | |||
| len = buf_size; | |||
| if (len > 0) { | |||
| memcpy(s->inbuf_ptr, buf_ptr, len); | |||
| buf_ptr += len; | |||
| buf_size -= len; | |||
| s->inbuf_ptr += len; | |||
| } | |||
| if ((s->inbuf_ptr - s->inbuf) >= MPA_HEADER_SIZE) { | |||
| got_header: | |||
| sr= avctx->sample_rate; | |||
| header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) | | |||
| (s->inbuf[2] << 8) | s->inbuf[3]; | |||
| header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) | | |||
| (s->inbuf[2] << 8) | s->inbuf[3]; | |||
| ret = mpa_decode_header(avctx, header); | |||
| if (ret < 0) { | |||
| s->header_count= -2; | |||
| /* no sync found : move by one byte (inefficient, but simple!) */ | |||
| memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); | |||
| s->inbuf_ptr--; | |||
| /* no sync found : move by one byte (inefficient, but simple!) */ | |||
| memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); | |||
| s->inbuf_ptr--; | |||
| dprintf("skip %x\n", header); | |||
| /* reset free format frame size to give a chance | |||
| to get a new bitrate */ | |||
| s->free_format_frame_size = 0; | |||
| } else { | |||
| } else { | |||
| if((header&SAME_HEADER_MASK) != (s->header&SAME_HEADER_MASK) && s->header) | |||
| s->header_count= -3; | |||
| s->header= header; | |||
| @@ -657,26 +657,26 @@ static int mpegaudio_parse(AVCodecParserContext *s1, | |||
| #if 0 | |||
| /* free format: prepare to compute frame size */ | |||
| if (decode_header(s, header) == 1) { | |||
| s->frame_size = -1; | |||
| if (decode_header(s, header) == 1) { | |||
| s->frame_size = -1; | |||
| } | |||
| #endif | |||
| } | |||
| } | |||
| if(s->header_count <= 0) | |||
| avctx->sample_rate= sr; //FIXME ugly | |||
| } | |||
| } | |||
| } else | |||
| #if 0 | |||
| if (s->frame_size == -1) { | |||
| /* free format : find next sync to compute frame size */ | |||
| len = MPA_MAX_CODED_FRAME_SIZE - len; | |||
| if (len > buf_size) | |||
| len = buf_size; | |||
| len = MPA_MAX_CODED_FRAME_SIZE - len; | |||
| if (len > buf_size) | |||
| len = buf_size; | |||
| if (len == 0) { | |||
| /* frame too long: resync */ | |||
| /* frame too long: resync */ | |||
| s->frame_size = 0; | |||
| memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); | |||
| s->inbuf_ptr--; | |||
| memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); | |||
| s->inbuf_ptr--; | |||
| } else { | |||
| uint8_t *p, *pend; | |||
| uint32_t header1; | |||
| @@ -720,19 +720,19 @@ static int mpegaudio_parse(AVCodecParserContext *s1, | |||
| s->inbuf_ptr += len; | |||
| buf_size -= len; | |||
| } | |||
| } else | |||
| } else | |||
| #endif | |||
| if (len < s->frame_size) { | |||
| if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE) | |||
| s->frame_size = MPA_MAX_CODED_FRAME_SIZE; | |||
| len = s->frame_size - len; | |||
| if (len > buf_size) | |||
| len = buf_size; | |||
| memcpy(s->inbuf_ptr, buf_ptr, len); | |||
| buf_ptr += len; | |||
| s->inbuf_ptr += len; | |||
| buf_size -= len; | |||
| } | |||
| len = s->frame_size - len; | |||
| if (len > buf_size) | |||
| len = buf_size; | |||
| memcpy(s->inbuf_ptr, buf_ptr, len); | |||
| buf_ptr += len; | |||
| s->inbuf_ptr += len; | |||
| buf_size -= len; | |||
| } | |||
| // next_data: | |||
| if (s->frame_size > 0 && | |||
| (s->inbuf_ptr - s->inbuf) >= s->frame_size) { | |||
| @@ -740,10 +740,10 @@ static int mpegaudio_parse(AVCodecParserContext *s1, | |||
| *poutbuf = s->inbuf; | |||
| *poutbuf_size = s->inbuf_ptr - s->inbuf; | |||
| } | |||
| s->inbuf_ptr = s->inbuf; | |||
| s->frame_size = 0; | |||
| break; | |||
| } | |||
| s->inbuf_ptr = s->inbuf; | |||
| s->frame_size = 0; | |||
| break; | |||
| } | |||
| } | |||
| return buf_ptr - buf; | |||
| } | |||
| @@ -783,7 +783,7 @@ static int ac3_parse(AVCodecParserContext *s1, | |||
| const uint8_t *buf_ptr; | |||
| int len, sample_rate, bit_rate; | |||
| static const int ac3_channels[8] = { | |||
| 2, 1, 2, 3, 3, 4, 4, 5 | |||
| 2, 1, 2, 3, 3, 4, 4, 5 | |||
| }; | |||
| *poutbuf = NULL; | |||
| @@ -812,7 +812,7 @@ static int ac3_parse(AVCodecParserContext *s1, | |||
| memmove(s->inbuf, s->inbuf + 1, AC3_HEADER_SIZE - 1); | |||
| s->inbuf_ptr--; | |||
| } else { | |||
| s->frame_size = len; | |||
| s->frame_size = len; | |||
| /* update codec info */ | |||
| avctx->sample_rate = sample_rate; | |||
| /* set channels,except if the user explicitly requests 1 or 2 channels, XXX/FIXME this is a bit ugly */ | |||
| @@ -821,7 +821,7 @@ static int ac3_parse(AVCodecParserContext *s1, | |||
| if (s->flags & A52_LFE) | |||
| avctx->channels++; | |||
| } | |||
| avctx->bit_rate = bit_rate; | |||
| avctx->bit_rate = bit_rate; | |||
| avctx->frame_size = 6 * 256; | |||
| } | |||
| } | |||
| @@ -27,48 +27,48 @@ | |||
| /* from g711.c by SUN microsystems (unrestricted use) */ | |||
| #define SIGN_BIT (0x80) /* Sign bit for a A-law byte. */ | |||
| #define QUANT_MASK (0xf) /* Quantization field mask. */ | |||
| #define NSEGS (8) /* Number of A-law segments. */ | |||
| #define SEG_SHIFT (4) /* Left shift for segment number. */ | |||
| #define SEG_MASK (0x70) /* Segment field mask. */ | |||
| #define SIGN_BIT (0x80) /* Sign bit for a A-law byte. */ | |||
| #define QUANT_MASK (0xf) /* Quantization field mask. */ | |||
| #define NSEGS (8) /* Number of A-law segments. */ | |||
| #define SEG_SHIFT (4) /* Left shift for segment number. */ | |||
| #define SEG_MASK (0x70) /* Segment field mask. */ | |||
| #define BIAS (0x84) /* Bias for linear code. */ | |||
| #define BIAS (0x84) /* Bias for linear code. */ | |||
| /* | |||
| * alaw2linear() - Convert an A-law value to 16-bit linear PCM | |||
| * | |||
| */ | |||
| static int alaw2linear(unsigned char a_val) | |||
| static int alaw2linear(unsigned char a_val) | |||
| { | |||
| int t; | |||
| int seg; | |||
| int t; | |||
| int seg; | |||
| a_val ^= 0x55; | |||
| a_val ^= 0x55; | |||
| t = a_val & QUANT_MASK; | |||
| seg = ((unsigned)a_val & SEG_MASK) >> SEG_SHIFT; | |||
| if(seg) t= (t + t + 1 + 32) << (seg + 2); | |||
| else t= (t + t + 1 ) << 3; | |||
| t = a_val & QUANT_MASK; | |||
| seg = ((unsigned)a_val & SEG_MASK) >> SEG_SHIFT; | |||
| if(seg) t= (t + t + 1 + 32) << (seg + 2); | |||
| else t= (t + t + 1 ) << 3; | |||
| return ((a_val & SIGN_BIT) ? t : -t); | |||
| return ((a_val & SIGN_BIT) ? t : -t); | |||
| } | |||
| static int ulaw2linear(unsigned char u_val) | |||
| static int ulaw2linear(unsigned char u_val) | |||
| { | |||
| int t; | |||
| int t; | |||
| /* Complement to obtain normal u-law value. */ | |||
| u_val = ~u_val; | |||
| /* Complement to obtain normal u-law value. */ | |||
| u_val = ~u_val; | |||
| /* | |||
| * Extract and bias the quantization bits. Then | |||
| * shift up by the segment number and subtract out the bias. | |||
| */ | |||
| t = ((u_val & QUANT_MASK) << 3) + BIAS; | |||
| t <<= ((unsigned)u_val & SEG_MASK) >> SEG_SHIFT; | |||
| /* | |||
| * Extract and bias the quantization bits. Then | |||
| * shift up by the segment number and subtract out the bias. | |||
| */ | |||
| t = ((u_val & QUANT_MASK) << 3) + BIAS; | |||
| t <<= ((unsigned)u_val & SEG_MASK) >> SEG_SHIFT; | |||
| return ((u_val & SIGN_BIT) ? (BIAS - t) : (t - BIAS)); | |||
| return ((u_val & SIGN_BIT) ? (BIAS - t) : (t - BIAS)); | |||
| } | |||
| /* 16384 entries per table */ | |||
| @@ -209,7 +209,7 @@ static inline void encode_from16(int bps, int le, int us, | |||
| } | |||
| static int pcm_encode_frame(AVCodecContext *avctx, | |||
| unsigned char *frame, int buf_size, void *data) | |||
| unsigned char *frame, int buf_size, void *data) | |||
| { | |||
| int n, sample_size, v; | |||
| short *samples; | |||
| @@ -397,8 +397,8 @@ static inline void decode_to16(int bps, int le, int us, | |||
| } | |||
| static int pcm_decode_frame(AVCodecContext *avctx, | |||
| void *data, int *data_size, | |||
| uint8_t *buf, int buf_size) | |||
| void *data, int *data_size, | |||
| uint8_t *buf, int buf_size) | |||
| { | |||
| PCMDecode *s = avctx->priv_data; | |||
| int n; | |||
| @@ -509,9 +509,9 @@ AVCodec name ## _encoder = { \ | |||
| CODEC_TYPE_AUDIO, \ | |||
| id, \ | |||
| 0, \ | |||
| pcm_encode_init, \ | |||
| pcm_encode_frame, \ | |||
| pcm_encode_close, \ | |||
| pcm_encode_init, \ | |||
| pcm_encode_frame, \ | |||
| pcm_encode_close, \ | |||
| NULL, \ | |||
| }; \ | |||
| AVCodec name ## _decoder = { \ | |||
| @@ -519,7 +519,7 @@ AVCodec name ## _decoder = { \ | |||
| CODEC_TYPE_AUDIO, \ | |||
| id, \ | |||
| sizeof(PCMDecode), \ | |||
| pcm_decode_init, \ | |||
| pcm_decode_init, \ | |||
| NULL, \ | |||
| NULL, \ | |||
| pcm_decode_frame, \ | |||
| @@ -67,7 +67,7 @@ int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h | |||
| /* | |||
| Read unaligned pixels into our vectors. The vectors are as follows: | |||
| pix1v: pix1[0]-pix1[15] | |||
| pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] | |||
| pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] | |||
| */ | |||
| tv = (vector unsigned char *) pix1; | |||
| pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); | |||
| @@ -184,7 +184,7 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int | |||
| fact to avoid a potentially expensive unaligned read, as well | |||
| as some splitting, and vector addition each time around the loop. | |||
| Read unaligned pixels into our vectors. The vectors are as follows: | |||
| pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] | |||
| pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] | |||
| Split the pixel vectors into shorts | |||
| */ | |||
| tv = (vector unsigned char *) &pix2[0]; | |||
| @@ -204,7 +204,7 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int | |||
| /* | |||
| Read unaligned pixels into our vectors. The vectors are as follows: | |||
| pix1v: pix1[0]-pix1[15] | |||
| pix3v: pix3[0]-pix3[15] pix3iv: pix3[1]-pix3[16] | |||
| pix3v: pix3[0]-pix3[15] pix3iv: pix3[1]-pix3[16] | |||
| */ | |||
| tv = (vector unsigned char *) pix1; | |||
| pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); | |||
| @@ -273,7 +273,7 @@ int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| for(i=0;i<h;i++) { | |||
| /* Read potentially unaligned pixels into t1 and t2 */ | |||
| /* Read potentially unaligned pixels into t1 and t2 */ | |||
| perm1 = vec_lvsl(0, pix1); | |||
| pix1v = (vector unsigned char *) pix1; | |||
| perm2 = vec_lvsl(0, pix2); | |||
| @@ -281,12 +281,12 @@ int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| t1 = vec_perm(pix1v[0], pix1v[1], perm1); | |||
| t2 = vec_perm(pix2v[0], pix2v[1], perm2); | |||
| /* Calculate a sum of abs differences vector */ | |||
| /* Calculate a sum of abs differences vector */ | |||
| t3 = vec_max(t1, t2); | |||
| t4 = vec_min(t1, t2); | |||
| t5 = vec_sub(t3, t4); | |||
| /* Add each 4 pixel group together and put 4 results into sad */ | |||
| /* Add each 4 pixel group together and put 4 results into sad */ | |||
| sad = vec_sum4s(t5, sad); | |||
| pix1 += line_size; | |||
| @@ -316,9 +316,9 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); | |||
| for(i=0;i<h;i++) { | |||
| /* Read potentially unaligned pixels into t1 and t2 | |||
| Since we're reading 16 pixels, and actually only want 8, | |||
| mask out the last 8 pixels. The 0s don't change the sum. */ | |||
| /* Read potentially unaligned pixels into t1 and t2 | |||
| Since we're reading 16 pixels, and actually only want 8, | |||
| mask out the last 8 pixels. The 0s don't change the sum. */ | |||
| perm1 = vec_lvsl(0, pix1); | |||
| pix1v = (vector unsigned char *) pix1; | |||
| perm2 = vec_lvsl(0, pix2); | |||
| @@ -326,12 +326,12 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear); | |||
| t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear); | |||
| /* Calculate a sum of abs differences vector */ | |||
| /* Calculate a sum of abs differences vector */ | |||
| t3 = vec_max(t1, t2); | |||
| t4 = vec_min(t1, t2); | |||
| t5 = vec_sub(t3, t4); | |||
| /* Add each 4 pixel group together and put 4 results into sad */ | |||
| /* Add each 4 pixel group together and put 4 results into sad */ | |||
| sad = vec_sum4s(t5, sad); | |||
| pix1 += line_size; | |||
| @@ -398,9 +398,9 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| for(i=0;i<h;i++) { | |||
| /* Read potentially unaligned pixels into t1 and t2 | |||
| Since we're reading 16 pixels, and actually only want 8, | |||
| mask out the last 8 pixels. The 0s don't change the sum. */ | |||
| /* Read potentially unaligned pixels into t1 and t2 | |||
| Since we're reading 16 pixels, and actually only want 8, | |||
| mask out the last 8 pixels. The 0s don't change the sum. */ | |||
| perm1 = vec_lvsl(0, pix1); | |||
| pix1v = (vector unsigned char *) pix1; | |||
| perm2 = vec_lvsl(0, pix2); | |||
| @@ -413,7 +413,7 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| of the fact that abs(a-b)^2 = (a-b)^2. | |||
| */ | |||
| /* Calculate abs differences vector */ | |||
| /* Calculate abs differences vector */ | |||
| t3 = vec_max(t1, t2); | |||
| t4 = vec_min(t1, t2); | |||
| t5 = vec_sub(t3, t4); | |||
| @@ -451,7 +451,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| sum = (vector unsigned int)vec_splat_u32(0); | |||
| for(i=0;i<h;i++) { | |||
| /* Read potentially unaligned pixels into t1 and t2 */ | |||
| /* Read potentially unaligned pixels into t1 and t2 */ | |||
| perm1 = vec_lvsl(0, pix1); | |||
| pix1v = (vector unsigned char *) pix1; | |||
| perm2 = vec_lvsl(0, pix2); | |||
| @@ -464,7 +464,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| of the fact that abs(a-b)^2 = (a-b)^2. | |||
| */ | |||
| /* Calculate abs differences vector */ | |||
| /* Calculate abs differences vector */ | |||
| t3 = vec_max(t1, t2); | |||
| t4 = vec_min(t1, t2); | |||
| t5 = vec_sub(t3, t4); | |||
| @@ -498,12 +498,12 @@ int pix_sum_altivec(uint8_t * pix, int line_size) | |||
| sad = (vector unsigned int)vec_splat_u32(0); | |||
| for (i = 0; i < 16; i++) { | |||
| /* Read the potentially unaligned 16 pixels into t1 */ | |||
| /* Read the potentially unaligned 16 pixels into t1 */ | |||
| perm = vec_lvsl(0, pix); | |||
| pixv = (vector unsigned char *) pix; | |||
| t1 = vec_perm(pixv[0], pixv[1], perm); | |||
| /* Add each 4 pixel group together and put 4 results into sad */ | |||
| /* Add each 4 pixel group together and put 4 results into sad */ | |||
| sad = vec_sum4s(t1, sad); | |||
| pix += line_size; | |||
| @@ -1335,32 +1335,32 @@ POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); | |||
| 0x00, 0x01, 0x02, 0x03, | |||
| 0x04, 0x05, 0x06, 0x07); | |||
| #define ONEITERBUTTERFLY(i, res) \ | |||
| { \ | |||
| register vector unsigned char src1, src2, srcO; \ | |||
| register vector unsigned char dst1, dst2, dstO; \ | |||
| src1 = vec_ld(stride * i, src); \ | |||
| if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8) \ | |||
| src2 = vec_ld((stride * i) + 16, src); \ | |||
| srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ | |||
| dst1 = vec_ld(stride * i, dst); \ | |||
| if ((((stride * i) + (unsigned long)dst) & 0x0000000F) > 8) \ | |||
| dst2 = vec_ld((stride * i) + 16, dst); \ | |||
| dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ | |||
| /* promote the unsigned chars to signed shorts */ \ | |||
| /* we're in the 8x8 function, we only care for the first 8 */ \ | |||
| register vector signed short srcV = \ | |||
| (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ | |||
| register vector signed short dstV = \ | |||
| (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ | |||
| /* substractions inside the first butterfly */ \ | |||
| register vector signed short but0 = vec_sub(srcV, dstV); \ | |||
| register vector signed short op1 = vec_perm(but0, but0, perm1); \ | |||
| register vector signed short but1 = vec_mladd(but0, vprod1, op1); \ | |||
| register vector signed short op2 = vec_perm(but1, but1, perm2); \ | |||
| register vector signed short but2 = vec_mladd(but1, vprod2, op2); \ | |||
| register vector signed short op3 = vec_perm(but2, but2, perm3); \ | |||
| res = vec_mladd(but2, vprod3, op3); \ | |||
| #define ONEITERBUTTERFLY(i, res) \ | |||
| { \ | |||
| register vector unsigned char src1, src2, srcO; \ | |||
| register vector unsigned char dst1, dst2, dstO; \ | |||
| src1 = vec_ld(stride * i, src); \ | |||
| if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8) \ | |||
| src2 = vec_ld((stride * i) + 16, src); \ | |||
| srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ | |||
| dst1 = vec_ld(stride * i, dst); \ | |||
| if ((((stride * i) + (unsigned long)dst) & 0x0000000F) > 8) \ | |||
| dst2 = vec_ld((stride * i) + 16, dst); \ | |||
| dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ | |||
| /* promote the unsigned chars to signed shorts */ \ | |||
| /* we're in the 8x8 function, we only care for the first 8 */ \ | |||
| register vector signed short srcV = \ | |||
| (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ | |||
| register vector signed short dstV = \ | |||
| (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ | |||
| /* substractions inside the first butterfly */ \ | |||
| register vector signed short but0 = vec_sub(srcV, dstV); \ | |||
| register vector signed short op1 = vec_perm(but0, but0, perm1); \ | |||
| register vector signed short but1 = vec_mladd(but0, vprod1, op1); \ | |||
| register vector signed short op2 = vec_perm(but1, but1, perm2); \ | |||
| register vector signed short but2 = vec_mladd(but1, vprod2, op2); \ | |||
| register vector signed short op3 = vec_perm(but2, but2, perm3); \ | |||
| res = vec_mladd(but2, vprod3, op3); \ | |||
| } | |||
| ONEITERBUTTERFLY(0, temp0); | |||
| ONEITERBUTTERFLY(1, temp1); | |||
| @@ -1480,26 +1480,26 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, | |||
| 0x00, 0x01, 0x02, 0x03, | |||
| 0x04, 0x05, 0x06, 0x07); | |||
| #define ONEITERBUTTERFLY(i, res1, res2) \ | |||
| { \ | |||
| #define ONEITERBUTTERFLY(i, res1, res2) \ | |||
| { \ | |||
| register vector unsigned char src1 asm ("v22"), src2 asm ("v23"); \ | |||
| register vector unsigned char dst1 asm ("v24"), dst2 asm ("v25"); \ | |||
| src1 = vec_ld(stride * i, src); \ | |||
| src2 = vec_ld((stride * i) + 16, src); \ | |||
| src1 = vec_ld(stride * i, src); \ | |||
| src2 = vec_ld((stride * i) + 16, src); \ | |||
| register vector unsigned char srcO asm ("v22") = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ | |||
| dst1 = vec_ld(stride * i, dst); \ | |||
| dst2 = vec_ld((stride * i) + 16, dst); \ | |||
| dst1 = vec_ld(stride * i, dst); \ | |||
| dst2 = vec_ld((stride * i) + 16, dst); \ | |||
| register vector unsigned char dstO asm ("v23") = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ | |||
| /* promote the unsigned chars to signed shorts */ \ | |||
| /* promote the unsigned chars to signed shorts */ \ | |||
| register vector signed short srcV asm ("v24") = \ | |||
| (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ | |||
| (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ | |||
| register vector signed short dstV asm ("v25") = \ | |||
| (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ | |||
| (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ | |||
| register vector signed short srcW asm ("v26") = \ | |||
| (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \ | |||
| (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \ | |||
| register vector signed short dstW asm ("v27") = \ | |||
| (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \ | |||
| /* substractions inside the first butterfly */ \ | |||
| (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \ | |||
| /* substractions inside the first butterfly */ \ | |||
| register vector signed short but0 asm ("v28") = vec_sub(srcV, dstV); \ | |||
| register vector signed short but0S asm ("v29") = vec_sub(srcW, dstW); \ | |||
| register vector signed short op1 asm ("v30") = vec_perm(but0, but0, perm1); \ | |||
| @@ -1511,9 +1511,9 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, | |||
| register vector signed short op2S asm ("v27") = vec_perm(but1S, but1S, perm2); \ | |||
| register vector signed short but2S asm ("v28") = vec_mladd(but1S, vprod2, op2S); \ | |||
| register vector signed short op3 asm ("v29") = vec_perm(but2, but2, perm3); \ | |||
| res1 = vec_mladd(but2, vprod3, op3); \ | |||
| res1 = vec_mladd(but2, vprod3, op3); \ | |||
| register vector signed short op3S asm ("v30") = vec_perm(but2S, but2S, perm3); \ | |||
| res2 = vec_mladd(but2S, vprod3, op3S); \ | |||
| res2 = vec_mladd(but2S, vprod3, op3S); \ | |||
| } | |||
| ONEITERBUTTERFLY(0, temp0, temp0S); | |||
| ONEITERBUTTERFLY(1, temp1, temp1S); | |||
| @@ -1623,12 +1623,12 @@ POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1); | |||
| int has_altivec(void) | |||
| { | |||
| #ifdef __AMIGAOS4__ | |||
| ULONG result = 0; | |||
| extern struct ExecIFace *IExec; | |||
| ULONG result = 0; | |||
| extern struct ExecIFace *IExec; | |||
| IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE); | |||
| if (result == VECTORTYPE_ALTIVEC) return 1; | |||
| return 0; | |||
| IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE); | |||
| if (result == VECTORTYPE_ALTIVEC) return 1; | |||
| return 0; | |||
| #else /* __AMIGAOS4__ */ | |||
| #ifdef CONFIG_DARWIN | |||
| @@ -191,33 +191,33 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint | |||
| /* from dsputil.c */ | |||
| static inline void put_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { | |||
| int i; | |||
| for (i = 0; i < h; i++) { | |||
| uint32_t a, b; | |||
| a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l); | |||
| b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l); | |||
| *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b); | |||
| a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l); | |||
| b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l); | |||
| *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b); | |||
| } | |||
| int i; | |||
| for (i = 0; i < h; i++) { | |||
| uint32_t a, b; | |||
| a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l); | |||
| b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l); | |||
| *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b); | |||
| a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l); | |||
| b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l); | |||
| *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b); | |||
| } | |||
| } static inline void avg_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { | |||
| int i; | |||
| for (i = 0; i < h; i++) { | |||
| uint32_t a, b; | |||
| a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l); | |||
| b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l); | |||
| *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b)); | |||
| a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l); | |||
| b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l); | |||
| *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b)); | |||
| } | |||
| int i; | |||
| for (i = 0; i < h; i++) { | |||
| uint32_t a, b; | |||
| a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l); | |||
| b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l); | |||
| *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b)); | |||
| a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l); | |||
| b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l); | |||
| *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b)); | |||
| } | |||
| } static inline void put_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { | |||
| put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); | |||
| put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h); | |||
| put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); | |||
| put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h); | |||
| } static inline void avg_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { | |||
| avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); | |||
| avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h); | |||
| avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); | |||
| avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h); | |||
| } | |||
| /* UNIMPLEMENTED YET !! */ | |||
| @@ -87,16 +87,16 @@ void powerpc_display_perf_report(void) | |||
| { | |||
| for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) | |||
| { | |||
| if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0) | |||
| av_log(NULL, AV_LOG_INFO, | |||
| " Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", | |||
| perfname[i], | |||
| j+1, | |||
| perfdata[j][i][powerpc_data_min], | |||
| perfdata[j][i][powerpc_data_max], | |||
| (double)perfdata[j][i][powerpc_data_sum] / | |||
| (double)perfdata[j][i][powerpc_data_num], | |||
| perfdata[j][i][powerpc_data_num]); | |||
| if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0) | |||
| av_log(NULL, AV_LOG_INFO, | |||
| " Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", | |||
| perfname[i], | |||
| j+1, | |||
| perfdata[j][i][powerpc_data_min], | |||
| perfdata[j][i][powerpc_data_max], | |||
| (double)perfdata[j][i][powerpc_data_sum] / | |||
| (double)perfdata[j][i][powerpc_data_num], | |||
| perfdata[j][i][powerpc_data_num]); | |||
| } | |||
| } | |||
| } | |||
| @@ -179,7 +179,7 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1); | |||
| } | |||
| else | |||
| for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) { | |||
| asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory"); | |||
| asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory"); | |||
| } | |||
| #else | |||
| memset(blocks, 0, sizeof(DCTELEM)*6*64); | |||
| @@ -284,25 +284,25 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) | |||
| c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec; | |||
| c->avg_pixels_tab[0][0] = avg_pixels16_altivec; | |||
| c->avg_pixels_tab[1][0] = avg_pixels8_altivec; | |||
| c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec; | |||
| c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec; | |||
| c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec; | |||
| c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec; | |||
| c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec; | |||
| c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec; | |||
| c->gmc1 = gmc1_altivec; | |||
| c->gmc1 = gmc1_altivec; | |||
| #ifdef CONFIG_DARWIN // ATM gcc-3.3 and gcc-3.4 fail to compile these in linux... | |||
| c->hadamard8_diff[0] = hadamard8_diff16_altivec; | |||
| c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; | |||
| c->hadamard8_diff[0] = hadamard8_diff16_altivec; | |||
| c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; | |||
| #endif | |||
| #ifdef CONFIG_ENCODERS | |||
| if (avctx->dct_algo == FF_DCT_AUTO || | |||
| avctx->dct_algo == FF_DCT_ALTIVEC) | |||
| { | |||
| c->fdct = fdct_altivec; | |||
| } | |||
| if (avctx->dct_algo == FF_DCT_AUTO || | |||
| avctx->dct_algo == FF_DCT_ALTIVEC) | |||
| { | |||
| c->fdct = fdct_altivec; | |||
| } | |||
| #endif //CONFIG_ENCODERS | |||
| if (avctx->lowres==0) | |||
| @@ -325,14 +325,14 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) | |||
| int i, j; | |||
| for (i = 0 ; i < powerpc_perf_total ; i++) | |||
| { | |||
| for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) | |||
| { | |||
| perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL; | |||
| perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL; | |||
| perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL; | |||
| perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL; | |||
| } | |||
| } | |||
| for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) | |||
| { | |||
| perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL; | |||
| perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL; | |||
| perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL; | |||
| perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL; | |||
| } | |||
| } | |||
| } | |||
| #endif /* POWERPC_PERFORMANCE_REPORT */ | |||
| } else | |||
| @@ -114,10 +114,10 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][ | |||
| #define POWERPC_GET_PMC6(a) do {} while (0) | |||
| #endif | |||
| #endif /* POWERPC_MODE_64BITS */ | |||
| #define POWERPC_PERF_DECLARE(a, cond) \ | |||
| POWERP_PMC_DATATYPE \ | |||
| pmc_start[POWERPC_NUM_PMC_ENABLED], \ | |||
| pmc_stop[POWERPC_NUM_PMC_ENABLED], \ | |||
| #define POWERPC_PERF_DECLARE(a, cond) \ | |||
| POWERP_PMC_DATATYPE \ | |||
| pmc_start[POWERPC_NUM_PMC_ENABLED], \ | |||
| pmc_stop[POWERPC_NUM_PMC_ENABLED], \ | |||
| pmc_loop_index; | |||
| #define POWERPC_PERF_START_COUNT(a, cond) do { \ | |||
| POWERPC_GET_PMC6(pmc_start[5]); \ | |||
| @@ -141,8 +141,8 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][ | |||
| pmc_loop_index++) \ | |||
| { \ | |||
| if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \ | |||
| { \ | |||
| POWERP_PMC_DATATYPE diff = \ | |||
| { \ | |||
| POWERP_PMC_DATATYPE diff = \ | |||
| pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \ | |||
| if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \ | |||
| perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \ | |||
| @@ -65,8 +65,8 @@ void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z) | |||
| POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6); | |||
| #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |||
| int ln = s->nbits; | |||
| int j, np, np2; | |||
| int nblocks, nloops; | |||
| int j, np, np2; | |||
| int nblocks, nloops; | |||
| register FFTComplex *p, *q; | |||
| FFTComplex *exptab = s->exptab; | |||
| int l; | |||
| @@ -147,8 +147,8 @@ POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6); | |||
| #endif | |||
| int ln = s->nbits; | |||
| int j, np, np2; | |||
| int nblocks, nloops; | |||
| int j, np, np2; | |||
| int nblocks, nloops; | |||
| register FFTComplex *p, *q; | |||
| FFTComplex *cptr, *cptr1; | |||
| int k; | |||
| @@ -30,31 +30,31 @@ | |||
| */ | |||
| static inline vector signed char ff_vmrglb (vector signed char const A, | |||
| vector signed char const B) | |||
| vector signed char const B) | |||
| { | |||
| static const vector unsigned char lowbyte = { | |||
| 0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b, 0x1b, | |||
| 0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f | |||
| 0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b, 0x1b, | |||
| 0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f | |||
| }; | |||
| return vec_perm (A, B, lowbyte); | |||
| } | |||
| static inline vector signed short ff_vmrglh (vector signed short const A, | |||
| vector signed short const B) | |||
| vector signed short const B) | |||
| { | |||
| static const vector unsigned char lowhalf = { | |||
| 0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b, | |||
| 0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f | |||
| 0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b, | |||
| 0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f | |||
| }; | |||
| return vec_perm (A, B, lowhalf); | |||
| } | |||
| static inline vector signed int ff_vmrglw (vector signed int const A, | |||
| vector signed int const B) | |||
| vector signed int const B) | |||
| { | |||
| static const vector unsigned char lowword = { | |||
| 0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b, | |||
| 0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f | |||
| 0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b, | |||
| 0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f | |||
| }; | |||
| return vec_perm (A, B, lowword); | |||
| } | |||
| @@ -51,108 +51,108 @@ | |||
| #define vector_s32_t vector signed int | |||
| #define vector_u32_t vector unsigned int | |||
| #define IDCT_HALF \ | |||
| /* 1st stage */ \ | |||
| t1 = vec_mradds (a1, vx7, vx1 ); \ | |||
| t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ | |||
| t7 = vec_mradds (a2, vx5, vx3); \ | |||
| t3 = vec_mradds (ma2, vx3, vx5); \ | |||
| \ | |||
| /* 2nd stage */ \ | |||
| t5 = vec_adds (vx0, vx4); \ | |||
| t0 = vec_subs (vx0, vx4); \ | |||
| t2 = vec_mradds (a0, vx6, vx2); \ | |||
| t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \ | |||
| t6 = vec_adds (t8, t3); \ | |||
| t3 = vec_subs (t8, t3); \ | |||
| t8 = vec_subs (t1, t7); \ | |||
| t1 = vec_adds (t1, t7); \ | |||
| \ | |||
| /* 3rd stage */ \ | |||
| t7 = vec_adds (t5, t2); \ | |||
| t2 = vec_subs (t5, t2); \ | |||
| t5 = vec_adds (t0, t4); \ | |||
| t0 = vec_subs (t0, t4); \ | |||
| t4 = vec_subs (t8, t3); \ | |||
| t3 = vec_adds (t8, t3); \ | |||
| \ | |||
| /* 4th stage */ \ | |||
| vy0 = vec_adds (t7, t1); \ | |||
| vy7 = vec_subs (t7, t1); \ | |||
| vy1 = vec_mradds (c4, t3, t5); \ | |||
| vy6 = vec_mradds (mc4, t3, t5); \ | |||
| vy2 = vec_mradds (c4, t4, t0); \ | |||
| vy5 = vec_mradds (mc4, t4, t0); \ | |||
| vy3 = vec_adds (t2, t6); \ | |||
| #define IDCT_HALF \ | |||
| /* 1st stage */ \ | |||
| t1 = vec_mradds (a1, vx7, vx1 ); \ | |||
| t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ | |||
| t7 = vec_mradds (a2, vx5, vx3); \ | |||
| t3 = vec_mradds (ma2, vx3, vx5); \ | |||
| \ | |||
| /* 2nd stage */ \ | |||
| t5 = vec_adds (vx0, vx4); \ | |||
| t0 = vec_subs (vx0, vx4); \ | |||
| t2 = vec_mradds (a0, vx6, vx2); \ | |||
| t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \ | |||
| t6 = vec_adds (t8, t3); \ | |||
| t3 = vec_subs (t8, t3); \ | |||
| t8 = vec_subs (t1, t7); \ | |||
| t1 = vec_adds (t1, t7); \ | |||
| \ | |||
| /* 3rd stage */ \ | |||
| t7 = vec_adds (t5, t2); \ | |||
| t2 = vec_subs (t5, t2); \ | |||
| t5 = vec_adds (t0, t4); \ | |||
| t0 = vec_subs (t0, t4); \ | |||
| t4 = vec_subs (t8, t3); \ | |||
| t3 = vec_adds (t8, t3); \ | |||
| \ | |||
| /* 4th stage */ \ | |||
| vy0 = vec_adds (t7, t1); \ | |||
| vy7 = vec_subs (t7, t1); \ | |||
| vy1 = vec_mradds (c4, t3, t5); \ | |||
| vy6 = vec_mradds (mc4, t3, t5); \ | |||
| vy2 = vec_mradds (c4, t4, t0); \ | |||
| vy5 = vec_mradds (mc4, t4, t0); \ | |||
| vy3 = vec_adds (t2, t6); \ | |||
| vy4 = vec_subs (t2, t6); | |||
| #define IDCT \ | |||
| vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ | |||
| vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ | |||
| vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \ | |||
| vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \ | |||
| vector_u16_t shift; \ | |||
| \ | |||
| c4 = vec_splat (constants[0], 0); \ | |||
| a0 = vec_splat (constants[0], 1); \ | |||
| a1 = vec_splat (constants[0], 2); \ | |||
| a2 = vec_splat (constants[0], 3); \ | |||
| mc4 = vec_splat (constants[0], 4); \ | |||
| ma2 = vec_splat (constants[0], 5); \ | |||
| bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3); \ | |||
| \ | |||
| zero = vec_splat_s16 (0); \ | |||
| shift = vec_splat_u16 (4); \ | |||
| \ | |||
| vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \ | |||
| vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero); \ | |||
| vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero); \ | |||
| vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero); \ | |||
| vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero); \ | |||
| vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero); \ | |||
| vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero); \ | |||
| vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero); \ | |||
| \ | |||
| IDCT_HALF \ | |||
| \ | |||
| vx0 = vec_mergeh (vy0, vy4); \ | |||
| vx1 = vec_mergel (vy0, vy4); \ | |||
| vx2 = vec_mergeh (vy1, vy5); \ | |||
| vx3 = vec_mergel (vy1, vy5); \ | |||
| vx4 = vec_mergeh (vy2, vy6); \ | |||
| vx5 = vec_mergel (vy2, vy6); \ | |||
| vx6 = vec_mergeh (vy3, vy7); \ | |||
| vx7 = vec_mergel (vy3, vy7); \ | |||
| \ | |||
| vy0 = vec_mergeh (vx0, vx4); \ | |||
| vy1 = vec_mergel (vx0, vx4); \ | |||
| vy2 = vec_mergeh (vx1, vx5); \ | |||
| vy3 = vec_mergel (vx1, vx5); \ | |||
| vy4 = vec_mergeh (vx2, vx6); \ | |||
| vy5 = vec_mergel (vx2, vx6); \ | |||
| vy6 = vec_mergeh (vx3, vx7); \ | |||
| vy7 = vec_mergel (vx3, vx7); \ | |||
| \ | |||
| vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \ | |||
| vx1 = vec_mergel (vy0, vy4); \ | |||
| vx2 = vec_mergeh (vy1, vy5); \ | |||
| vx3 = vec_mergel (vy1, vy5); \ | |||
| vx4 = vec_mergeh (vy2, vy6); \ | |||
| vx5 = vec_mergel (vy2, vy6); \ | |||
| vx6 = vec_mergeh (vy3, vy7); \ | |||
| vx7 = vec_mergel (vy3, vy7); \ | |||
| \ | |||
| IDCT_HALF \ | |||
| \ | |||
| shift = vec_splat_u16 (6); \ | |||
| vx0 = vec_sra (vy0, shift); \ | |||
| vx1 = vec_sra (vy1, shift); \ | |||
| vx2 = vec_sra (vy2, shift); \ | |||
| vx3 = vec_sra (vy3, shift); \ | |||
| vx4 = vec_sra (vy4, shift); \ | |||
| vx5 = vec_sra (vy5, shift); \ | |||
| vx6 = vec_sra (vy6, shift); \ | |||
| #define IDCT \ | |||
| vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ | |||
| vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ | |||
| vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \ | |||
| vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \ | |||
| vector_u16_t shift; \ | |||
| \ | |||
| c4 = vec_splat (constants[0], 0); \ | |||
| a0 = vec_splat (constants[0], 1); \ | |||
| a1 = vec_splat (constants[0], 2); \ | |||
| a2 = vec_splat (constants[0], 3); \ | |||
| mc4 = vec_splat (constants[0], 4); \ | |||
| ma2 = vec_splat (constants[0], 5); \ | |||
| bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3); \ | |||
| \ | |||
| zero = vec_splat_s16 (0); \ | |||
| shift = vec_splat_u16 (4); \ | |||
| \ | |||
| vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \ | |||
| vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero); \ | |||
| vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero); \ | |||
| vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero); \ | |||
| vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero); \ | |||
| vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero); \ | |||
| vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero); \ | |||
| vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero); \ | |||
| \ | |||
| IDCT_HALF \ | |||
| \ | |||
| vx0 = vec_mergeh (vy0, vy4); \ | |||
| vx1 = vec_mergel (vy0, vy4); \ | |||
| vx2 = vec_mergeh (vy1, vy5); \ | |||
| vx3 = vec_mergel (vy1, vy5); \ | |||
| vx4 = vec_mergeh (vy2, vy6); \ | |||
| vx5 = vec_mergel (vy2, vy6); \ | |||
| vx6 = vec_mergeh (vy3, vy7); \ | |||
| vx7 = vec_mergel (vy3, vy7); \ | |||
| \ | |||
| vy0 = vec_mergeh (vx0, vx4); \ | |||
| vy1 = vec_mergel (vx0, vx4); \ | |||
| vy2 = vec_mergeh (vx1, vx5); \ | |||
| vy3 = vec_mergel (vx1, vx5); \ | |||
| vy4 = vec_mergeh (vx2, vx6); \ | |||
| vy5 = vec_mergel (vx2, vx6); \ | |||
| vy6 = vec_mergeh (vx3, vx7); \ | |||
| vy7 = vec_mergel (vx3, vx7); \ | |||
| \ | |||
| vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \ | |||
| vx1 = vec_mergel (vy0, vy4); \ | |||
| vx2 = vec_mergeh (vy1, vy5); \ | |||
| vx3 = vec_mergel (vy1, vy5); \ | |||
| vx4 = vec_mergeh (vy2, vy6); \ | |||
| vx5 = vec_mergel (vy2, vy6); \ | |||
| vx6 = vec_mergeh (vy3, vy7); \ | |||
| vx7 = vec_mergel (vy3, vy7); \ | |||
| \ | |||
| IDCT_HALF \ | |||
| \ | |||
| shift = vec_splat_u16 (6); \ | |||
| vx0 = vec_sra (vy0, shift); \ | |||
| vx1 = vec_sra (vy1, shift); \ | |||
| vx2 = vec_sra (vy2, shift); \ | |||
| vx3 = vec_sra (vy3, shift); \ | |||
| vx4 = vec_sra (vy4, shift); \ | |||
| vx5 = vec_sra (vy5, shift); \ | |||
| vx6 = vec_sra (vy6, shift); \ | |||
| vx7 = vec_sra (vy7, shift); | |||
| @@ -180,18 +180,18 @@ POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1); | |||
| #endif | |||
| IDCT | |||
| #define COPY(dest,src) \ | |||
| tmp = vec_packsu (src, src); \ | |||
| vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ | |||
| #define COPY(dest,src) \ | |||
| tmp = vec_packsu (src, src); \ | |||
| vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ | |||
| vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |||
| COPY (dest, vx0) dest += stride; | |||
| COPY (dest, vx1) dest += stride; | |||
| COPY (dest, vx2) dest += stride; | |||
| COPY (dest, vx3) dest += stride; | |||
| COPY (dest, vx4) dest += stride; | |||
| COPY (dest, vx5) dest += stride; | |||
| COPY (dest, vx6) dest += stride; | |||
| COPY (dest, vx0) dest += stride; | |||
| COPY (dest, vx1) dest += stride; | |||
| COPY (dest, vx2) dest += stride; | |||
| COPY (dest, vx3) dest += stride; | |||
| COPY (dest, vx4) dest += stride; | |||
| COPY (dest, vx5) dest += stride; | |||
| COPY (dest, vx6) dest += stride; | |||
| COPY (dest, vx7) | |||
| POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1); | |||
| @@ -225,22 +225,22 @@ POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1); | |||
| perm0 = vec_mergeh (p, p0); | |||
| perm1 = vec_mergeh (p, p1); | |||
| #define ADD(dest,src,perm) \ | |||
| /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \ | |||
| tmp = vec_ld (0, dest); \ | |||
| tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \ | |||
| tmp3 = vec_adds (tmp2, src); \ | |||
| tmp = vec_packsu (tmp3, tmp3); \ | |||
| vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ | |||
| #define ADD(dest,src,perm) \ | |||
| /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \ | |||
| tmp = vec_ld (0, dest); \ | |||
| tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \ | |||
| tmp3 = vec_adds (tmp2, src); \ | |||
| tmp = vec_packsu (tmp3, tmp3); \ | |||
| vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ | |||
| vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); | |||
| ADD (dest, vx0, perm0) dest += stride; | |||
| ADD (dest, vx1, perm1) dest += stride; | |||
| ADD (dest, vx2, perm0) dest += stride; | |||
| ADD (dest, vx3, perm1) dest += stride; | |||
| ADD (dest, vx4, perm0) dest += stride; | |||
| ADD (dest, vx5, perm1) dest += stride; | |||
| ADD (dest, vx6, perm0) dest += stride; | |||
| ADD (dest, vx0, perm0) dest += stride; | |||
| ADD (dest, vx1, perm1) dest += stride; | |||
| ADD (dest, vx2, perm0) dest += stride; | |||
| ADD (dest, vx3, perm1) dest += stride; | |||
| ADD (dest, vx4, perm0) dest += stride; | |||
| ADD (dest, vx5, perm1) dest += stride; | |||
| ADD (dest, vx6, perm0) dest += stride; | |||
| ADD (dest, vx7, perm1) | |||
| POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1); | |||
| @@ -152,9 +152,9 @@ int dct_quantize_altivec(MpegEncContext* s, | |||
| } | |||
| // The following block could exist as a separate an altivec dct | |||
| // function. However, if we put it inline, the DCT data can remain | |||
| // in the vector local variables, as floats, which we'll use during the | |||
| // quantize step... | |||
| // function. However, if we put it inline, the DCT data can remain | |||
| // in the vector local variables, as floats, which we'll use during the | |||
| // quantize step... | |||
| { | |||
| const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f); | |||
| const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f); | |||
| @@ -206,11 +206,11 @@ int dct_quantize_altivec(MpegEncContext* s, | |||
| z1 = vec_madd(vec_add(tmp12, tmp13), vec_0_541196100, (vector float)zero); | |||
| // dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | |||
| // CONST_BITS-PASS1_BITS); | |||
| // CONST_BITS-PASS1_BITS); | |||
| row2 = vec_madd(tmp13, vec_0_765366865, z1); | |||
| // dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), | |||
| // CONST_BITS-PASS1_BITS); | |||
| // CONST_BITS-PASS1_BITS); | |||
| row6 = vec_madd(tmp12, vec_1_847759065, z1); | |||
| z1 = vec_add(tmp4, tmp7); // z1 = tmp4 + tmp7; | |||
| @@ -315,7 +315,7 @@ int dct_quantize_altivec(MpegEncContext* s, | |||
| } | |||
| // Load the bias vector (We add 0.5 to the bias so that we're | |||
| // rounding when we convert to int, instead of flooring.) | |||
| // rounding when we convert to int, instead of flooring.) | |||
| { | |||
| vector signed int biasInt; | |||
| const vector float negOneFloat = (vector float)FOUROF(-1.0f); | |||
| @@ -80,7 +80,7 @@ static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size) | |||
| "pextlb $10, $0, $10 \n\t" | |||
| "sq $10, 80(%1) \n\t" | |||
| "pextlb $8, $0, $8 \n\t" | |||
| "sq $8, 96(%1) \n\t" | |||
| "sq $8, 96(%1) \n\t" | |||
| "pextlb $9, $0, $9 \n\t" | |||
| "sq $9, 112(%1) \n\t" | |||
| ".set pop \n\t" | |||
| @@ -112,7 +112,7 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz | |||
| asm volatile ( | |||
| ".set push \n\t" | |||
| ".set mips3 \n\t" | |||
| "1: \n\t" | |||
| "1: \n\t" | |||
| "ldr $8, 0(%1) \n\t" | |||
| "add $11, %1, %3 \n\t" | |||
| "ldl $8, 7(%1) \n\t" | |||
| @@ -133,7 +133,7 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz | |||
| "bgtz %2, 1b \n\t" | |||
| ".set pop \n\t" | |||
| : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size) | |||
| : "$8", "$9", "$10", "$11", "$12", "$13", "memory" ); | |||
| : "$8", "$9", "$10", "$11", "$12", "$13", "memory" ); | |||
| } | |||
| @@ -15,32 +15,32 @@ | |||
| #include "../dsputil.h" | |||
| #include "mmi.h" | |||
| #define BITS_INV_ACC 5 // 4 or 5 for IEEE | |||
| #define SHIFT_INV_ROW (16 - BITS_INV_ACC) | |||
| #define BITS_INV_ACC 5 // 4 or 5 for IEEE | |||
| #define SHIFT_INV_ROW (16 - BITS_INV_ACC) | |||
| #define SHIFT_INV_COL (1 + BITS_INV_ACC) | |||
| #define TG1 6518 | |||
| #define TG2 13573 | |||
| #define TG3 21895 | |||
| #define CS4 23170 | |||
| #define TG1 6518 | |||
| #define TG2 13573 | |||
| #define TG3 21895 | |||
| #define CS4 23170 | |||
| #define ROUNDER_0 0 | |||
| #define ROUNDER_1 16 | |||
| #define ROUNDER_0 0 | |||
| #define ROUNDER_1 16 | |||
| #define TAB_i_04 (32+0) | |||
| #define TAB_i_17 (32+64) | |||
| #define TAB_i_26 (32+128) | |||
| #define TAB_i_35 (32+192) | |||
| #define TAB_i_04 (32+0) | |||
| #define TAB_i_17 (32+64) | |||
| #define TAB_i_26 (32+128) | |||
| #define TAB_i_35 (32+192) | |||
| #define TG_1_16 (32+256+0) | |||
| #define TG_2_16 (32+256+16) | |||
| #define TG_3_16 (32+256+32) | |||
| #define COS_4_16 (32+256+48) | |||
| #define TG_1_16 (32+256+0) | |||
| #define TG_2_16 (32+256+16) | |||
| #define TG_3_16 (32+256+32) | |||
| #define COS_4_16 (32+256+48) | |||
| #define CLIPMAX (32+256+64+0) | |||
| #define CLIPMAX (32+256+64+0) | |||
| static short consttable[] align16 = { | |||
| /* rounder 0*/ // assume SHIFT_INV_ROW == 11 | |||
| /* rounder 0*/ // assume SHIFT_INV_ROW == 11 | |||
| 0x3ff, 1, 0x3ff, 1, 0x3ff, 1, 0x3ff, 1, | |||
| /* rounder 1*/ | |||
| 0x3ff, 0, 0x3ff, 0, 0x3ff, 0, 0x3ff, 0, | |||
| @@ -75,274 +75,274 @@ static short consttable[] align16 = { | |||
| #define DCT_8_INV_ROW1(blk, rowoff, taboff, rnd, outreg) { \ | |||
| lq(blk, rowoff, $16); /* r16 = x7 x5 x3 x1 x6 x4 x2 x0 */ \ | |||
| /*slot*/ \ | |||
| lq($24, 0+taboff, $17); /* r17 = w */ \ | |||
| /*delay slot $16*/ \ | |||
| lq($24, 16+taboff, $18);/* r18 = w */ \ | |||
| prevh($16, $2); /* r2 = x1 x3 x5 x7 x0 x2 x4 x6 */ \ | |||
| lq($24, 32+taboff, $19);/* r19 = w */ \ | |||
| phmadh($17, $16, $17); /* r17 = b1"b0'a1"a0' */ \ | |||
| lq($24, 48+taboff, $20);/* r20 = w */ \ | |||
| phmadh($18, $2, $18); /* r18 = b1'b0"a1'a0" */ \ | |||
| phmadh($19, $16, $19); /* r19 = b3"b2'a3"a2' */ \ | |||
| phmadh($20, $2, $20); /* r20 = b3'b2"a3'a2" */ \ | |||
| paddw($17, $18, $17); /* r17 = (b1)(b0)(a1)(a0) */ \ | |||
| paddw($19, $20, $19); /* r19 = (b3)(b2)(a3)(a2) */ \ | |||
| pcpyld($19, $17, $18); /* r18 = (a3)(a2)(a1)(a0) */ \ | |||
| pcpyud($17, $19, $20); /* r20 = (b3)(b2)(b1)(b0) */ \ | |||
| paddw($18, rnd, $18); /* r18 = (a3)(a2)(a1)(a0) */\ | |||
| paddw($18, $20, $17); /* r17 = ()()()(a0+b0) */ \ | |||
| psubw($18, $20, $20); /* r20 = ()()()(a0-b0) */ \ | |||
| psraw($17, SHIFT_INV_ROW, $17); /* r17 = (y3 y2 y1 y0) */ \ | |||
| psraw($20, SHIFT_INV_ROW, $20); /* r20 = (y4 y5 y6 y7) */ \ | |||
| ppach($20, $17, outreg);/* out = y4 y5 y6 y7 y3 y2 y1 y0 Note order */ \ | |||
| lq(blk, rowoff, $16); /* r16 = x7 x5 x3 x1 x6 x4 x2 x0 */ \ | |||
| /*slot*/ \ | |||
| lq($24, 0+taboff, $17); /* r17 = w */ \ | |||
| /*delay slot $16*/ \ | |||
| lq($24, 16+taboff, $18);/* r18 = w */ \ | |||
| prevh($16, $2); /* r2 = x1 x3 x5 x7 x0 x2 x4 x6 */ \ | |||
| lq($24, 32+taboff, $19);/* r19 = w */ \ | |||
| phmadh($17, $16, $17); /* r17 = b1"b0'a1"a0' */ \ | |||
| lq($24, 48+taboff, $20);/* r20 = w */ \ | |||
| phmadh($18, $2, $18); /* r18 = b1'b0"a1'a0" */ \ | |||
| phmadh($19, $16, $19); /* r19 = b3"b2'a3"a2' */ \ | |||
| phmadh($20, $2, $20); /* r20 = b3'b2"a3'a2" */ \ | |||
| paddw($17, $18, $17); /* r17 = (b1)(b0)(a1)(a0) */ \ | |||
| paddw($19, $20, $19); /* r19 = (b3)(b2)(a3)(a2) */ \ | |||
| pcpyld($19, $17, $18); /* r18 = (a3)(a2)(a1)(a0) */ \ | |||
| pcpyud($17, $19, $20); /* r20 = (b3)(b2)(b1)(b0) */ \ | |||
| paddw($18, rnd, $18); /* r18 = (a3)(a2)(a1)(a0) */\ | |||
| paddw($18, $20, $17); /* r17 = ()()()(a0+b0) */ \ | |||
| psubw($18, $20, $20); /* r20 = ()()()(a0-b0) */ \ | |||
| psraw($17, SHIFT_INV_ROW, $17); /* r17 = (y3 y2 y1 y0) */ \ | |||
| psraw($20, SHIFT_INV_ROW, $20); /* r20 = (y4 y5 y6 y7) */ \ | |||
| ppach($20, $17, outreg);/* out = y4 y5 y6 y7 y3 y2 y1 y0 Note order */ \ | |||
| \ | |||
| prevh(outreg, $2); \ | |||
| pcpyud($2, $2, $2); \ | |||
| pcpyld($2, outreg, outreg); \ | |||
| prevh(outreg, $2); \ | |||
| pcpyud($2, $2, $2); \ | |||
| pcpyld($2, outreg, outreg); \ | |||
| } | |||
| #define DCT_8_INV_COL8() \ | |||
| \ | |||
| lq($24, TG_3_16, $2); /* r2 = tn3 */ \ | |||
| lq($24, TG_3_16, $2); /* r2 = tn3 */ \ | |||
| \ | |||
| pmulth($11, $2, $17); /* r17 = x3 * tn3 (6420) */ \ | |||
| psraw($17, 15, $17); \ | |||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||
| psraw($3, 15, $3); \ | |||
| pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \ | |||
| psubh($17, $13, $17); /* r17 = tm35 */ \ | |||
| pmulth($11, $2, $17); /* r17 = x3 * tn3 (6420) */ \ | |||
| psraw($17, 15, $17); \ | |||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||
| psraw($3, 15, $3); \ | |||
| pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \ | |||
| psubh($17, $13, $17); /* r17 = tm35 */ \ | |||
| \ | |||
| pmulth($13, $2, $18); /* r18 = x5 * tn3 (6420) */ \ | |||
| psraw($18, 15, $18); \ | |||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||
| psraw($3, 15, $3); \ | |||
| pinteh($3, $18, $18); /* r18 = x5 * tn3 */ \ | |||
| paddh($18, $11, $18); /* r18 = tp35 */ \ | |||
| pmulth($13, $2, $18); /* r18 = x5 * tn3 (6420) */ \ | |||
| psraw($18, 15, $18); \ | |||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||
| psraw($3, 15, $3); \ | |||
| pinteh($3, $18, $18); /* r18 = x5 * tn3 */ \ | |||
| paddh($18, $11, $18); /* r18 = tp35 */ \ | |||
| \ | |||
| lq($24, TG_1_16, $2); /* r2 = tn1 */ \ | |||
| lq($24, TG_1_16, $2); /* r2 = tn1 */ \ | |||
| \ | |||
| pmulth($15, $2, $19); /* r19 = x7 * tn1 (6420) */ \ | |||
| psraw($19, 15, $19); \ | |||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||
| psraw($3, 15, $3); \ | |||
| pinteh($3, $19, $19); /* r19 = x7 * tn1 */ \ | |||
| paddh($19, $9, $19); /* r19 = tp17 */ \ | |||
| pmulth($15, $2, $19); /* r19 = x7 * tn1 (6420) */ \ | |||
| psraw($19, 15, $19); \ | |||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||
| psraw($3, 15, $3); \ | |||
| pinteh($3, $19, $19); /* r19 = x7 * tn1 */ \ | |||
| paddh($19, $9, $19); /* r19 = tp17 */ \ | |||
| \ | |||
| pmulth($9, $2, $20); /* r20 = x1 * tn1 (6420) */ \ | |||
| psraw($20, 15, $20); \ | |||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||
| psraw($3, 15, $3); \ | |||
| pinteh($3, $20, $20); /* r20 = x1 * tn1 */ \ | |||
| psubh($20, $15, $20); /* r20 = tm17 */ \ | |||
| pmulth($9, $2, $20); /* r20 = x1 * tn1 (6420) */ \ | |||
| psraw($20, 15, $20); \ | |||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||
| psraw($3, 15, $3); \ | |||
| pinteh($3, $20, $20); /* r20 = x1 * tn1 */ \ | |||
| psubh($20, $15, $20); /* r20 = tm17 */ \ | |||
| \ | |||
| psubh($19, $18, $3); /* r3 = t1 */ \ | |||
| paddh($20, $17, $16); /* r16 = t2 */ \ | |||
| psubh($20, $17, $23); /* r23 = b3 */ \ | |||
| paddh($19, $18, $20); /* r20 = b0 */ \ | |||
| psubh($19, $18, $3); /* r3 = t1 */ \ | |||
| paddh($20, $17, $16); /* r16 = t2 */ \ | |||
| psubh($20, $17, $23); /* r23 = b3 */ \ | |||
| paddh($19, $18, $20); /* r20 = b0 */ \ | |||
| \ | |||
| lq($24, COS_4_16, $2); /* r2 = cs4 */ \ | |||
| lq($24, COS_4_16, $2); /* r2 = cs4 */ \ | |||
| \ | |||
| paddh($3, $16, $21); /* r21 = t1+t2 */ \ | |||
| psubh($3, $16, $22); /* r22 = t1-t2 */ \ | |||
| paddh($3, $16, $21); /* r21 = t1+t2 */ \ | |||
| psubh($3, $16, $22); /* r22 = t1-t2 */ \ | |||
| \ | |||
| pmulth($21, $2, $21); /* r21 = cs4 * (t1+t2) 6420 */ \ | |||
| psraw($21, 15, $21); \ | |||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||
| psraw($3, 15, $3); \ | |||
| pinteh($3, $21, $21); /* r21 = b1 */ \ | |||
| pmulth($21, $2, $21); /* r21 = cs4 * (t1+t2) 6420 */ \ | |||
| psraw($21, 15, $21); \ | |||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||
| psraw($3, 15, $3); \ | |||
| pinteh($3, $21, $21); /* r21 = b1 */ \ | |||
| \ | |||
| pmulth($22, $2, $22); /* r22 = cs4 * (t1-t2) 6420 */ \ | |||
| psraw($22, 15, $22); \ | |||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||
| psraw($3, 15, $3); \ | |||
| pinteh($3, $22, $22); /* r22 = b2 */ \ | |||
| pmulth($22, $2, $22); /* r22 = cs4 * (t1-t2) 6420 */ \ | |||
| psraw($22, 15, $22); \ | |||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||
| psraw($3, 15, $3); \ | |||
| pinteh($3, $22, $22); /* r22 = b2 */ \ | |||
| \ | |||
| lq($24, TG_2_16, $2); /* r2 = tn2 */ \ | |||
| lq($24, TG_2_16, $2); /* r2 = tn2 */ \ | |||
| \ | |||
| pmulth($10, $2, $17); /* r17 = x2 * tn2 (6420) */ \ | |||
| psraw($17, 15, $17); \ | |||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||
| psraw($3, 15, $3); \ | |||
| pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \ | |||
| psubh($17, $14, $17); /* r17 = tm26 */ \ | |||
| pmulth($10, $2, $17); /* r17 = x2 * tn2 (6420) */ \ | |||
| psraw($17, 15, $17); \ | |||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||
| psraw($3, 15, $3); \ | |||
| pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \ | |||
| psubh($17, $14, $17); /* r17 = tm26 */ \ | |||
| \ | |||
| pmulth($14, $2, $18); /* r18 = x6 * tn2 (6420) */ \ | |||
| psraw($18, 15, $18); \ | |||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||
| psraw($3, 15, $3); \ | |||
| pinteh($3, $18, $18); /* r18 = x6 * tn2 */ \ | |||
| paddh($18, $10, $18); /* r18 = tp26 */ \ | |||
| pmulth($14, $2, $18); /* r18 = x6 * tn2 (6420) */ \ | |||
| psraw($18, 15, $18); \ | |||
| pmfhl_uw($3); /* r3 = 7531 */ \ | |||
| psraw($3, 15, $3); \ | |||
| pinteh($3, $18, $18); /* r18 = x6 * tn2 */ \ | |||
| paddh($18, $10, $18); /* r18 = tp26 */ \ | |||
| \ | |||
| paddh($8, $12, $2); /* r2 = tp04 */ \ | |||
| psubh($8, $12, $3); /* r3 = tm04 */ \ | |||
| paddh($8, $12, $2); /* r2 = tp04 */ \ | |||
| psubh($8, $12, $3); /* r3 = tm04 */ \ | |||
| \ | |||
| paddh($2, $18, $16); /* r16 = a0 */ \ | |||
| psubh($2, $18, $19); /* r19 = a3 */ \ | |||
| psubh($3, $17, $18); /* r18 = a2 */ \ | |||
| paddh($3, $17, $17); /* r17 = a1 */ | |||
| paddh($2, $18, $16); /* r16 = a0 */ \ | |||
| psubh($2, $18, $19); /* r19 = a3 */ \ | |||
| psubh($3, $17, $18); /* r18 = a2 */ \ | |||
| paddh($3, $17, $17); /* r17 = a1 */ | |||
| #define DCT_8_INV_COL8_STORE(blk) \ | |||
| \ | |||
| paddh($16, $20, $2); /* y0 a0+b0 */ \ | |||
| psubh($16, $20, $16); /* y7 a0-b0 */ \ | |||
| psrah($2, SHIFT_INV_COL, $2); \ | |||
| psrah($16, SHIFT_INV_COL, $16); \ | |||
| sq($2, 0, blk); \ | |||
| sq($16, 112, blk); \ | |||
| paddh($16, $20, $2); /* y0 a0+b0 */ \ | |||
| psubh($16, $20, $16); /* y7 a0-b0 */ \ | |||
| psrah($2, SHIFT_INV_COL, $2); \ | |||
| psrah($16, SHIFT_INV_COL, $16); \ | |||
| sq($2, 0, blk); \ | |||
| sq($16, 112, blk); \ | |||
| \ | |||
| paddh($17, $21, $3); /* y1 a1+b1 */ \ | |||
| psubh($17, $21, $17); /* y6 a1-b1 */ \ | |||
| psrah($3, SHIFT_INV_COL, $3); \ | |||
| psrah($17, SHIFT_INV_COL, $17); \ | |||
| sq($3, 16, blk); \ | |||
| sq($17, 96, blk); \ | |||
| paddh($17, $21, $3); /* y1 a1+b1 */ \ | |||
| psubh($17, $21, $17); /* y6 a1-b1 */ \ | |||
| psrah($3, SHIFT_INV_COL, $3); \ | |||
| psrah($17, SHIFT_INV_COL, $17); \ | |||
| sq($3, 16, blk); \ | |||
| sq($17, 96, blk); \ | |||
| \ | |||
| paddh($18, $22, $2); /* y2 a2+b2 */ \ | |||
| psubh($18, $22, $18); /* y5 a2-b2 */ \ | |||
| psrah($2, SHIFT_INV_COL, $2); \ | |||
| psrah($18, SHIFT_INV_COL, $18); \ | |||
| sq($2, 32, blk); \ | |||
| sq($18, 80, blk); \ | |||
| paddh($18, $22, $2); /* y2 a2+b2 */ \ | |||
| psubh($18, $22, $18); /* y5 a2-b2 */ \ | |||
| psrah($2, SHIFT_INV_COL, $2); \ | |||
| psrah($18, SHIFT_INV_COL, $18); \ | |||
| sq($2, 32, blk); \ | |||
| sq($18, 80, blk); \ | |||
| \ | |||
| paddh($19, $23, $3); /* y3 a3+b3 */ \ | |||
| psubh($19, $23, $19); /* y4 a3-b3 */ \ | |||
| psrah($3, SHIFT_INV_COL, $3); \ | |||
| psrah($19, SHIFT_INV_COL, $19); \ | |||
| sq($3, 48, blk); \ | |||
| sq($19, 64, blk); | |||
| paddh($19, $23, $3); /* y3 a3+b3 */ \ | |||
| psubh($19, $23, $19); /* y4 a3-b3 */ \ | |||
| psrah($3, SHIFT_INV_COL, $3); \ | |||
| psrah($19, SHIFT_INV_COL, $19); \ | |||
| sq($3, 48, blk); \ | |||
| sq($19, 64, blk); | |||
| #define DCT_8_INV_COL8_PMS() \ | |||
| paddh($16, $20, $2); /* y0 a0+b0 */ \ | |||
| psubh($16, $20, $20); /* y7 a0-b0 */ \ | |||
| psrah($2, SHIFT_INV_COL, $16); \ | |||
| psrah($20, SHIFT_INV_COL, $20); \ | |||
| paddh($16, $20, $2); /* y0 a0+b0 */ \ | |||
| psubh($16, $20, $20); /* y7 a0-b0 */ \ | |||
| psrah($2, SHIFT_INV_COL, $16); \ | |||
| psrah($20, SHIFT_INV_COL, $20); \ | |||
| \ | |||
| paddh($17, $21, $3); /* y1 a1+b1 */ \ | |||
| psubh($17, $21, $21); /* y6 a1-b1 */ \ | |||
| psrah($3, SHIFT_INV_COL, $17); \ | |||
| psrah($21, SHIFT_INV_COL, $21); \ | |||
| paddh($17, $21, $3); /* y1 a1+b1 */ \ | |||
| psubh($17, $21, $21); /* y6 a1-b1 */ \ | |||
| psrah($3, SHIFT_INV_COL, $17); \ | |||
| psrah($21, SHIFT_INV_COL, $21); \ | |||
| \ | |||
| paddh($18, $22, $2); /* y2 a2+b2 */ \ | |||
| psubh($18, $22, $22); /* y5 a2-b2 */ \ | |||
| psrah($2, SHIFT_INV_COL, $18); \ | |||
| psrah($22, SHIFT_INV_COL, $22); \ | |||
| paddh($18, $22, $2); /* y2 a2+b2 */ \ | |||
| psubh($18, $22, $22); /* y5 a2-b2 */ \ | |||
| psrah($2, SHIFT_INV_COL, $18); \ | |||
| psrah($22, SHIFT_INV_COL, $22); \ | |||
| \ | |||
| paddh($19, $23, $3); /* y3 a3+b3 */ \ | |||
| psubh($19, $23, $23); /* y4 a3-b3 */ \ | |||
| psrah($3, SHIFT_INV_COL, $19); \ | |||
| psrah($23, SHIFT_INV_COL, $23); | |||
| #define PUT(rs) \ | |||
| pminh(rs, $11, $2); \ | |||
| pmaxh($2, $0, $2); \ | |||
| ppacb($0, $2, $2); \ | |||
| sd3(2, 0, 4); \ | |||
| __asm__ __volatile__ ("add $4, $5, $4"); | |||
| paddh($19, $23, $3); /* y3 a3+b3 */ \ | |||
| psubh($19, $23, $23); /* y4 a3-b3 */ \ | |||
| psrah($3, SHIFT_INV_COL, $19); \ | |||
| psrah($23, SHIFT_INV_COL, $23); | |||
| #define PUT(rs) \ | |||
| pminh(rs, $11, $2); \ | |||
| pmaxh($2, $0, $2); \ | |||
| ppacb($0, $2, $2); \ | |||
| sd3(2, 0, 4); \ | |||
| __asm__ __volatile__ ("add $4, $5, $4"); | |||
| #define DCT_8_INV_COL8_PUT() \ | |||
| PUT($16); \ | |||
| PUT($17); \ | |||
| PUT($18); \ | |||
| PUT($19); \ | |||
| PUT($23); \ | |||
| PUT($22); \ | |||
| PUT($21); \ | |||
| PUT($20); | |||
| #define ADD(rs) \ | |||
| ld3(4, 0, 2); \ | |||
| pextlb($0, $2, $2); \ | |||
| paddh($2, rs, $2); \ | |||
| pminh($2, $11, $2); \ | |||
| pmaxh($2, $0, $2); \ | |||
| ppacb($0, $2, $2); \ | |||
| sd3(2, 0, 4); \ | |||
| __asm__ __volatile__ ("add $4, $5, $4"); | |||
| PUT($16); \ | |||
| PUT($17); \ | |||
| PUT($18); \ | |||
| PUT($19); \ | |||
| PUT($23); \ | |||
| PUT($22); \ | |||
| PUT($21); \ | |||
| PUT($20); | |||
| #define ADD(rs) \ | |||
| ld3(4, 0, 2); \ | |||
| pextlb($0, $2, $2); \ | |||
| paddh($2, rs, $2); \ | |||
| pminh($2, $11, $2); \ | |||
| pmaxh($2, $0, $2); \ | |||
| ppacb($0, $2, $2); \ | |||
| sd3(2, 0, 4); \ | |||
| __asm__ __volatile__ ("add $4, $5, $4"); | |||
| /*fixme: schedule*/ | |||
| #define DCT_8_INV_COL8_ADD() \ | |||
| ADD($16); \ | |||
| ADD($17); \ | |||
| ADD($18); \ | |||
| ADD($19); \ | |||
| ADD($23); \ | |||
| ADD($22); \ | |||
| ADD($21); \ | |||
| ADD($20); | |||
| ADD($16); \ | |||
| ADD($17); \ | |||
| ADD($18); \ | |||
| ADD($19); \ | |||
| ADD($23); \ | |||
| ADD($22); \ | |||
| ADD($21); \ | |||
| ADD($20); | |||
| void ff_mmi_idct(int16_t * block) | |||
| { | |||
| /* $4 = block */ | |||
| __asm__ __volatile__("la $24, %0"::"m"(consttable[0])); | |||
| lq($24, ROUNDER_0, $8); | |||
| lq($24, ROUNDER_1, $7); | |||
| DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8); | |||
| DCT_8_INV_ROW1($4, 16, TAB_i_17, $7, $9); | |||
| DCT_8_INV_ROW1($4, 32, TAB_i_26, $7, $10); | |||
| DCT_8_INV_ROW1($4, 48, TAB_i_35, $7, $11); | |||
| DCT_8_INV_ROW1($4, 64, TAB_i_04, $7, $12); | |||
| DCT_8_INV_ROW1($4, 80, TAB_i_35, $7, $13); | |||
| DCT_8_INV_ROW1($4, 96, TAB_i_26, $7, $14); | |||
| DCT_8_INV_ROW1($4, 112, TAB_i_17, $7, $15); | |||
| DCT_8_INV_COL8(); | |||
| DCT_8_INV_COL8_STORE($4); | |||
| //let savedtemp regs be saved | |||
| __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | |||
| /* $4 = block */ | |||
| __asm__ __volatile__("la $24, %0"::"m"(consttable[0])); | |||
| lq($24, ROUNDER_0, $8); | |||
| lq($24, ROUNDER_1, $7); | |||
| DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8); | |||
| DCT_8_INV_ROW1($4, 16, TAB_i_17, $7, $9); | |||
| DCT_8_INV_ROW1($4, 32, TAB_i_26, $7, $10); | |||
| DCT_8_INV_ROW1($4, 48, TAB_i_35, $7, $11); | |||
| DCT_8_INV_ROW1($4, 64, TAB_i_04, $7, $12); | |||
| DCT_8_INV_ROW1($4, 80, TAB_i_35, $7, $13); | |||
| DCT_8_INV_ROW1($4, 96, TAB_i_26, $7, $14); | |||
| DCT_8_INV_ROW1($4, 112, TAB_i_17, $7, $15); | |||
| DCT_8_INV_COL8(); | |||
| DCT_8_INV_COL8_STORE($4); | |||
| //let savedtemp regs be saved | |||
| __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | |||
| } | |||
| void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block) | |||
| { | |||
| /* $4 = dest, $5 = line_size, $6 = block */ | |||
| __asm__ __volatile__("la $24, %0"::"m"(consttable[0])); | |||
| lq($24, ROUNDER_0, $8); | |||
| lq($24, ROUNDER_1, $7); | |||
| DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8); | |||
| DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9); | |||
| DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10); | |||
| DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11); | |||
| DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12); | |||
| DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13); | |||
| DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14); | |||
| DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15); | |||
| DCT_8_INV_COL8(); | |||
| lq($24, CLIPMAX, $11); | |||
| DCT_8_INV_COL8_PMS(); | |||
| DCT_8_INV_COL8_PUT(); | |||
| //let savedtemp regs be saved | |||
| __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | |||
| /* $4 = dest, $5 = line_size, $6 = block */ | |||
| __asm__ __volatile__("la $24, %0"::"m"(consttable[0])); | |||
| lq($24, ROUNDER_0, $8); | |||
| lq($24, ROUNDER_1, $7); | |||
| DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8); | |||
| DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9); | |||
| DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10); | |||
| DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11); | |||
| DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12); | |||
| DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13); | |||
| DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14); | |||
| DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15); | |||
| DCT_8_INV_COL8(); | |||
| lq($24, CLIPMAX, $11); | |||
| DCT_8_INV_COL8_PMS(); | |||
| DCT_8_INV_COL8_PUT(); | |||
| //let savedtemp regs be saved | |||
| __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | |||
| } | |||
| void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | |||
| { | |||
| /* $4 = dest, $5 = line_size, $6 = block */ | |||
| __asm__ __volatile__("la $24, %0"::"m"(consttable[0])); | |||
| lq($24, ROUNDER_0, $8); | |||
| lq($24, ROUNDER_1, $7); | |||
| DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8); | |||
| DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9); | |||
| DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10); | |||
| DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11); | |||
| DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12); | |||
| DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13); | |||
| DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14); | |||
| DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15); | |||
| DCT_8_INV_COL8(); | |||
| lq($24, CLIPMAX, $11); | |||
| DCT_8_INV_COL8_PMS(); | |||
| DCT_8_INV_COL8_ADD(); | |||
| //let savedtemp regs be saved | |||
| __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | |||
| /* $4 = dest, $5 = line_size, $6 = block */ | |||
| __asm__ __volatile__("la $24, %0"::"m"(consttable[0])); | |||
| lq($24, ROUNDER_0, $8); | |||
| lq($24, ROUNDER_1, $7); | |||
| DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8); | |||
| DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9); | |||
| DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10); | |||
| DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11); | |||
| DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12); | |||
| DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13); | |||
| DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14); | |||
| DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15); | |||
| DCT_8_INV_COL8(); | |||
| lq($24, CLIPMAX, $11); | |||
| DCT_8_INV_COL8_PMS(); | |||
| DCT_8_INV_COL8_ADD(); | |||
| //let savedtemp regs be saved | |||
| __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); | |||
| } | |||
| @@ -5,148 +5,148 @@ | |||
| /* | |||
| #define r0 $zero | |||
| #define r1 $at //assembler! | |||
| #define r2 $v0 //return | |||
| #define r3 $v1 //return | |||
| #define r4 $a0 //arg | |||
| #define r5 $a1 //arg | |||
| #define r6 $a2 //arg | |||
| #define r7 $a3 //arg | |||
| #define r8 $t0 //temp | |||
| #define r9 $t1 //temp | |||
| #define r10 $t2 //temp | |||
| #define r11 $t3 //temp | |||
| #define r12 $t4 //temp | |||
| #define r13 $t5 //temp | |||
| #define r14 $t6 //temp | |||
| #define r15 $t7 //temp | |||
| #define r16 $s0 //saved temp | |||
| #define r17 $s1 //saved temp | |||
| #define r18 $s2 //saved temp | |||
| #define r19 $s3 //saved temp | |||
| #define r20 $s4 //saved temp | |||
| #define r21 $s5 //saved temp | |||
| #define r22 $s6 //saved temp | |||
| #define r23 $s7 //saved temp | |||
| #define r24 $t8 //temp | |||
| #define r25 $t9 //temp | |||
| #define r26 $k0 //kernel | |||
| #define r27 $k1 //kernel | |||
| #define r28 $gp //global ptr | |||
| #define r29 $sp //stack ptr | |||
| #define r30 $fp //frame ptr | |||
| #define r31 $ra //return addr | |||
| #define r1 $at //assembler! | |||
| #define r2 $v0 //return | |||
| #define r3 $v1 //return | |||
| #define r4 $a0 //arg | |||
| #define r5 $a1 //arg | |||
| #define r6 $a2 //arg | |||
| #define r7 $a3 //arg | |||
| #define r8 $t0 //temp | |||
| #define r9 $t1 //temp | |||
| #define r10 $t2 //temp | |||
| #define r11 $t3 //temp | |||
| #define r12 $t4 //temp | |||
| #define r13 $t5 //temp | |||
| #define r14 $t6 //temp | |||
| #define r15 $t7 //temp | |||
| #define r16 $s0 //saved temp | |||
| #define r17 $s1 //saved temp | |||
| #define r18 $s2 //saved temp | |||
| #define r19 $s3 //saved temp | |||
| #define r20 $s4 //saved temp | |||
| #define r21 $s5 //saved temp | |||
| #define r22 $s6 //saved temp | |||
| #define r23 $s7 //saved temp | |||
| #define r24 $t8 //temp | |||
| #define r25 $t9 //temp | |||
| #define r26 $k0 //kernel | |||
| #define r27 $k1 //kernel | |||
| #define r28 $gp //global ptr | |||
| #define r29 $sp //stack ptr | |||
| #define r30 $fp //frame ptr | |||
| #define r31 $ra //return addr | |||
| */ | |||
| #define lq(base, off, reg) \ | |||
| __asm__ __volatile__ ("lq " #reg ", %0("#base ")" : : "i" (off) ) | |||
| #define lq(base, off, reg) \ | |||
| __asm__ __volatile__ ("lq " #reg ", %0("#base ")" : : "i" (off) ) | |||
| #define lq2(mem, reg) \ | |||
| __asm__ __volatile__ ("lq " #reg ", %0" : : "r" (mem)) | |||
| #define lq2(mem, reg) \ | |||
| __asm__ __volatile__ ("lq " #reg ", %0" : : "r" (mem)) | |||
| #define sq(reg, off, base) \ | |||
| __asm__ __volatile__ ("sq " #reg ", %0("#base ")" : : "i" (off) ) | |||
| #define sq(reg, off, base) \ | |||
| __asm__ __volatile__ ("sq " #reg ", %0("#base ")" : : "i" (off) ) | |||
| /* | |||
| #define ld(base, off, reg) \ | |||
| __asm__ __volatile__ ("ld " #reg ", " #off "("#base ")") | |||
| #define ld(base, off, reg) \ | |||
| __asm__ __volatile__ ("ld " #reg ", " #off "("#base ")") | |||
| */ | |||
| #define ld3(base, off, reg) \ | |||
| __asm__ __volatile__ (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off))) | |||
| #define ld3(base, off, reg) \ | |||
| __asm__ __volatile__ (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off))) | |||
| #define ldr3(base, off, reg) \ | |||
| __asm__ __volatile__ (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off))) | |||
| #define ldr3(base, off, reg) \ | |||
| __asm__ __volatile__ (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off))) | |||
| #define ldl3(base, off, reg) \ | |||
| __asm__ __volatile__ (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off))) | |||
| #define ldl3(base, off, reg) \ | |||
| __asm__ __volatile__ (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off))) | |||
| /* | |||
| #define sd(reg, off, base) \ | |||
| __asm__ __volatile__ ("sd " #reg ", " #off "("#base ")") | |||
| #define sd(reg, off, base) \ | |||
| __asm__ __volatile__ ("sd " #reg ", " #off "("#base ")") | |||
| */ | |||
| //seems assembler has bug encoding mnemonic 'sd', so DIY | |||
| #define sd3(reg, off, base) \ | |||
| __asm__ __volatile__ (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off))) | |||
| #define sd3(reg, off, base) \ | |||
| __asm__ __volatile__ (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off))) | |||
| #define sw(reg, off, base) \ | |||
| __asm__ __volatile__ ("sw " #reg ", " #off "("#base ")") | |||
| #define sw(reg, off, base) \ | |||
| __asm__ __volatile__ ("sw " #reg ", " #off "("#base ")") | |||
| #define sq2(reg, mem) \ | |||
| __asm__ __volatile__ ("sq " #reg ", %0" : : "m" (*(mem))) | |||
| #define sq2(reg, mem) \ | |||
| __asm__ __volatile__ ("sq " #reg ", %0" : : "m" (*(mem))) | |||
| #define pinth(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pinth " #rd ", " #rs ", " #rt ) | |||
| #define pinth(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pinth " #rd ", " #rs ", " #rt ) | |||
| #define phmadh(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("phmadh " #rd ", " #rs ", " #rt ) | |||
| #define phmadh(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("phmadh " #rd ", " #rs ", " #rt ) | |||
| #define pcpyud(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pcpyud " #rd ", " #rs ", " #rt ) | |||
| #define pcpyud(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pcpyud " #rd ", " #rs ", " #rt ) | |||
| #define pcpyld(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pcpyld " #rd ", " #rs ", " #rt ) | |||
| #define pcpyld(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pcpyld " #rd ", " #rs ", " #rt ) | |||
| #define pcpyh(rt, rd) \ | |||
| __asm__ __volatile__ ("pcpyh " #rd ", " #rt ) | |||
| #define pcpyh(rt, rd) \ | |||
| __asm__ __volatile__ ("pcpyh " #rd ", " #rt ) | |||
| #define paddw(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("paddw " #rd ", " #rs ", " #rt ) | |||
| #define paddw(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("paddw " #rd ", " #rs ", " #rt ) | |||
| #define pextlw(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pextlw " #rd ", " #rs ", " #rt ) | |||
| #define pextlw(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pextlw " #rd ", " #rs ", " #rt ) | |||
| #define pextuw(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pextuw " #rd ", " #rs ", " #rt ) | |||
| #define pextuw(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pextuw " #rd ", " #rs ", " #rt ) | |||
| #define pextlh(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pextlh " #rd ", " #rs ", " #rt ) | |||
| #define pextlh(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pextlh " #rd ", " #rs ", " #rt ) | |||
| #define pextuh(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pextuh " #rd ", " #rs ", " #rt ) | |||
| #define pextuh(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pextuh " #rd ", " #rs ", " #rt ) | |||
| #define psubw(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("psubw " #rd ", " #rs ", " #rt ) | |||
| #define psubw(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("psubw " #rd ", " #rs ", " #rt ) | |||
| #define psraw(rt, sa, rd) \ | |||
| __asm__ __volatile__ ("psraw " #rd ", " #rt ", %0" : : "i"(sa) ) | |||
| #define psraw(rt, sa, rd) \ | |||
| __asm__ __volatile__ ("psraw " #rd ", " #rt ", %0" : : "i"(sa) ) | |||
| #define ppach(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("ppach " #rd ", " #rs ", " #rt ) | |||
| #define ppach(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("ppach " #rd ", " #rs ", " #rt ) | |||
| #define ppacb(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("ppacb " #rd ", " #rs ", " #rt ) | |||
| #define ppacb(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("ppacb " #rd ", " #rs ", " #rt ) | |||
| #define prevh(rt, rd) \ | |||
| __asm__ __volatile__ ("prevh " #rd ", " #rt ) | |||
| #define prevh(rt, rd) \ | |||
| __asm__ __volatile__ ("prevh " #rd ", " #rt ) | |||
| #define pmulth(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pmulth " #rd ", " #rs ", " #rt ) | |||
| #define pmulth(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pmulth " #rd ", " #rs ", " #rt ) | |||
| #define pmaxh(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pmaxh " #rd ", " #rs ", " #rt ) | |||
| #define pmaxh(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pmaxh " #rd ", " #rs ", " #rt ) | |||
| #define pminh(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pminh " #rd ", " #rs ", " #rt ) | |||
| #define pminh(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pminh " #rd ", " #rs ", " #rt ) | |||
| #define pinteh(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pinteh " #rd ", " #rs ", " #rt ) | |||
| #define pinteh(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pinteh " #rd ", " #rs ", " #rt ) | |||
| #define paddh(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("paddh " #rd ", " #rs ", " #rt ) | |||
| #define paddh(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("paddh " #rd ", " #rs ", " #rt ) | |||
| #define psubh(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("psubh " #rd ", " #rs ", " #rt ) | |||
| #define psubh(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("psubh " #rd ", " #rs ", " #rt ) | |||
| #define psrah(rt, sa, rd) \ | |||
| __asm__ __volatile__ ("psrah " #rd ", " #rt ", %0" : : "i"(sa) ) | |||
| #define psrah(rt, sa, rd) \ | |||
| __asm__ __volatile__ ("psrah " #rd ", " #rt ", %0" : : "i"(sa) ) | |||
| #define pmfhl_uw(rd) \ | |||
| __asm__ __volatile__ ("pmfhl.uw " #rd) | |||
| #define pmfhl_uw(rd) \ | |||
| __asm__ __volatile__ ("pmfhl.uw " #rd) | |||
| #define pextlb(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pextlb " #rd ", " #rs ", " #rt ) | |||
| #define pextlb(rs, rt, rd) \ | |||
| __asm__ __volatile__ ("pextlb " #rd ", " #rs ", " #rt ) | |||
| #endif | |||
| @@ -41,7 +41,7 @@ static void dct_unquantize_h263_mmi(MpegEncContext *s, | |||
| level = block[0] * s->c_dc_scale; | |||
| }else { | |||
| qadd = 0; | |||
| level = block[0]; | |||
| level = block[0]; | |||
| } | |||
| nCoeffs= 63; //does not allways use zigzag table | |||
| } else { | |||
| @@ -49,29 +49,29 @@ static void dct_unquantize_h263_mmi(MpegEncContext *s, | |||
| } | |||
| asm volatile( | |||
| "add $14, $0, %3 \n\t" | |||
| "pcpyld $8, %0, %0 \n\t" | |||
| "pcpyh $8, $8 \n\t" //r8 = qmul | |||
| "pcpyld $9, %1, %1 \n\t" | |||
| "pcpyh $9, $9 \n\t" //r9 = qadd | |||
| "add $14, $0, %3 \n\t" | |||
| "pcpyld $8, %0, %0 \n\t" | |||
| "pcpyh $8, $8 \n\t" //r8 = qmul | |||
| "pcpyld $9, %1, %1 \n\t" | |||
| "pcpyh $9, $9 \n\t" //r9 = qadd | |||
| ".p2align 2 \n\t" | |||
| "1: \n\t" | |||
| "lq $10, 0($14) \n\t" //r10 = level | |||
| "addi $14, $14, 16 \n\t" //block+=8 | |||
| "addi %2, %2, -8 \n\t" | |||
| "pcgth $11, $0, $10 \n\t" //r11 = level < 0 ? -1 : 0 | |||
| "pcgth $12, $10, $0 \n\t" //r12 = level > 0 ? -1 : 0 | |||
| "por $12, $11, $12 \n\t" | |||
| "pmulth $10, $10, $8 \n\t" | |||
| "paddh $13, $9, $11 \n\t" | |||
| "1: \n\t" | |||
| "lq $10, 0($14) \n\t" //r10 = level | |||
| "addi $14, $14, 16 \n\t" //block+=8 | |||
| "addi %2, %2, -8 \n\t" | |||
| "pcgth $11, $0, $10 \n\t" //r11 = level < 0 ? -1 : 0 | |||
| "pcgth $12, $10, $0 \n\t" //r12 = level > 0 ? -1 : 0 | |||
| "por $12, $11, $12 \n\t" | |||
| "pmulth $10, $10, $8 \n\t" | |||
| "paddh $13, $9, $11 \n\t" | |||
| "pxor $13, $13, $11 \n\t" //r13 = level < 0 ? -qadd : qadd | |||
| "pmfhl.uw $11 \n\t" | |||
| "pinteh $10, $11, $10 \n\t" //r10 = level * qmul | |||
| "paddh $10, $10, $13 \n\t" | |||
| "pmfhl.uw $11 \n\t" | |||
| "pinteh $10, $11, $10 \n\t" //r10 = level * qmul | |||
| "paddh $10, $10, $13 \n\t" | |||
| "pand $10, $10, $12 \n\t" | |||
| "sq $10, -16($14) \n\t" | |||
| "bgez %2, 1b \n\t" | |||
| :: "r"(qmul), "r" (qadd), "r" (nCoeffs), "r" (block) : "$8", "$9", "$10", "$11", "$12", "$13", "$14", "memory" ); | |||
| "sq $10, -16($14) \n\t" | |||
| "bgez %2, 1b \n\t" | |||
| :: "r"(qmul), "r" (qadd), "r" (nCoeffs), "r" (block) : "$8", "$9", "$10", "$11", "$12", "$13", "$14", "memory" ); | |||
| if(s->mb_intra) | |||
| block[0]= level; | |||