Browse Source

PPC: 32-bit asm for MAC64 and MLS64

GCC makes a mess of these operations, so give it a hand.

55% faster MP3 decoding on G4.

Originally committed as revision 18794 to svn://svn.ffmpeg.org/ffmpeg/trunk
tags/v0.6
Måns Rullgård 16 years ago
parent
commit
014b7ecb66
1 changed files with 30 additions and 0 deletions
  1. +30
    -0
      libavcodec/ppc/mathops.h

+ 30
- 0
libavcodec/ppc/mathops.h View File

@@ -44,4 +44,34 @@ static inline av_const int MULH(int a, int b){
return r;
}

#if !HAVE_PPC64
static inline av_const int64_t MAC64(int64_t d, int a, int b)
{
union { uint64_t x; unsigned hl[2]; } x = { d };
int h, l;
__asm__ ("mullw %3, %4, %5 \n\t"
"mulhw %2, %4, %5 \n\t"
"addc %1, %1, %3 \n\t"
"adde %0, %0, %2 \n\t"
: "+r"(x.hl[0]), "+r"(x.hl[1]), "=&r"(h), "=&r"(l)
: "r"(a), "r"(b));
return x.x;
}
#define MAC64(d, a, b) ((d) = MAC64(d, a, b))

static inline av_const int64_t MLS64(int64_t d, int a, int b)
{
union { uint64_t x; unsigned hl[2]; } x = { d };
int h, l;
__asm__ ("mullw %3, %4, %5 \n\t"
"mulhw %2, %4, %5 \n\t"
"subfc %1, %3, %1 \n\t"
"subfe %0, %2, %0 \n\t"
: "+r"(x.hl[0]), "+r"(x.hl[1]), "=&r"(h), "=&r"(l)
: "r"(a), "r"(b));
return x.x;
}
#define MLS64(d, a, b) ((d) = MLS64(d, a, b))
#endif

#endif /* AVCODEC_PPC_MATHOPS_H */

Loading…
Cancel
Save