PPC: 32-bit asm for MAC64 and MLS64

GCC makes a mess of these operations, so give it a hand. 55% faster MP3 decoding on G4. Originally committed as revision 18794 to svn://svn.ffmpeg.org/ffmpeg/trunk
17 years ago · 014b7ecb66
--- a/libavcodec/ppc/mathops.h
+++ b/libavcodec/ppc/mathops.h
@@ -44,4 +44,34 @@ static inline av_const int MULH(int a, int b){
    return r;
 }

 #if !HAVE_PPC64
 static inline av_const int64_t MAC64(int64_t d, int a, int b)
 {
    union { uint64_t x; unsigned hl[2]; } x = { d };
    int h, l;
    __asm__ ("mullw %3, %4, %5   \n\t"
             "mulhw %2, %4, %5   \n\t"
             "addc  %1, %1, %3   \n\t"
             "adde  %0, %0, %2   \n\t"
             : "+r"(x.hl[0]), "+r"(x.hl[1]), "=&r"(h), "=&r"(l)
             : "r"(a), "r"(b));
    return x.x;
 }
 #define MAC64(d, a, b) ((d) = MAC64(d, a, b))

 static inline av_const int64_t MLS64(int64_t d, int a, int b)
 {
    union { uint64_t x; unsigned hl[2]; } x = { d };
    int h, l;
    __asm__ ("mullw %3, %4, %5   \n\t"
             "mulhw %2, %4, %5   \n\t"
             "subfc %1, %3, %1   \n\t"
             "subfe %0, %2, %0   \n\t"
             : "+r"(x.hl[0]), "+r"(x.hl[1]), "=&r"(h), "=&r"(l)
             : "r"(a), "r"(b));
    return x.x;
 }
 #define MLS64(d, a, b) ((d) = MLS64(d, a, b))
 #endif

 #endif /* AVCODEC_PPC_MATHOPS_H */