ppc: Restrict some Altivec implementations to Big Endian

In Little Endian the vec_ld/vec_st operations work as expected only for byte-vectors.
10 years ago · da60b99a88
--- a/libavcodec/ppc/apedsp_altivec.c
+++ b/libavcodec/ppc/apedsp_altivec.c
@@ -29,7 +29,7 @@
 #include "libavutil/ppc/types_altivec.h"
 #include "libavcodec/apedsp.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
 static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1,
                                                    const int16_t *v2,
                                                    const int16_t *v3,
@@ -73,7 +73,7 @@ static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1,

 av_cold void ff_apedsp_init_ppc(APEDSPContext *c)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/audiodsp.c
+++ b/libavcodec/ppc/audiodsp.c
@@ -35,7 +35,7 @@
 #include "libavutil/ppc/util_altivec.h"
 #include "libavcodec/audiodsp.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN

 static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
                                           int order)
@@ -63,7 +63,7 @@ static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,

 av_cold void ff_audiodsp_init_ppc(AudioDSPContext *c)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/dct-test.c
+++ b/libavcodec/ppc/dct-test.c
@@ -21,7 +21,7 @@
 #include "fdct.h"

 static const struct algo fdct_tab_arch[] = {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    { "altivecfdct", ff_fdct_altivec, FF_IDCT_PERM_NONE, AV_CPU_FLAG_ALTIVEC },
 #endif
    { 0 }
--- a/libavcodec/ppc/fdctdsp.c
+++ b/libavcodec/ppc/fdctdsp.c
@@ -29,7 +29,7 @@
 #include "libavcodec/fdctdsp.h"
 #include "fdct.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN

 #define vs16(v)   ((vector signed short) (v))
 #define vs32(v)     ((vector signed int) (v))
@@ -465,7 +465,7 @@ void ff_fdct_altivec(int16_t *block)
 av_cold void ff_fdctdsp_init_ppc(FDCTDSPContext *c, AVCodecContext *avctx,
                                 unsigned high_bit_depth)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/fft_init.c
+++ b/libavcodec/ppc/fft_init.c
@@ -39,7 +39,7 @@
 void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z);
 void ff_fft_calc_interleave_altivec(FFTContext *s, FFTComplex *z);

 #if HAVE_GNU_AS && HAVE_ALTIVEC
 #if HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN
 static void imdct_half_altivec(FFTContext *s, FFTSample *output, const FFTSample *input)
 {
    int j, k;
@@ -143,7 +143,7 @@ static void imdct_calc_altivec(FFTContext *s, FFTSample *output, const FFTSample

 av_cold void ff_fft_init_ppc(FFTContext *s)
 {
 #if HAVE_GNU_AS && HAVE_ALTIVEC
 #if HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/fmtconvert_altivec.c
+++ b/libavcodec/ppc/fmtconvert_altivec.c
@@ -26,7 +26,7 @@
 #include "libavutil/ppc/util_altivec.h"
 #include "libavcodec/fmtconvert.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN

 static void int32_to_float_fmul_scalar_altivec(float *dst, const int32_t *src,
                                               float mul, int len)
@@ -57,7 +57,7 @@ static void int32_to_float_fmul_scalar_altivec(float *dst, const int32_t *src,
 av_cold void ff_fmt_convert_init_ppc(FmtConvertContext *c,
                                     AVCodecContext *avctx)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/h264chroma_init.c
+++ b/libavcodec/ppc/h264chroma_init.c
@@ -27,7 +27,7 @@
 #include "libavutil/ppc/util_altivec.h"
 #include "libavcodec/h264chroma.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
 #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
 #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)

@@ -50,7 +50,7 @@

 av_cold void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    const int high_bit_depth = bit_depth > 8;

    if (!PPC_ALTIVEC(av_get_cpu_flags()))
--- a/libavcodec/ppc/h264dsp.c
+++ b/libavcodec/ppc/h264dsp.c
@@ -28,7 +28,7 @@
 #include "libavcodec/h264data.h"
 #include "libavcodec/h264dsp.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN

 /****************************************************************************
 * IDCT transform:
@@ -745,7 +745,7 @@ H264_WEIGHT( 8)
 av_cold void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth,
                                 const int chroma_format_idc)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/h264qpel.c
+++ b/libavcodec/ppc/h264qpel.c
@@ -28,7 +28,7 @@
 #include "libavcodec/h264qpel.h"
 #include "hpeldsp_altivec.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN

 #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
 #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
@@ -286,7 +286,7 @@ H264_MC(avg_, 16, altivec)

 av_cold void ff_h264qpel_init_ppc(H264QpelContext *c, int bit_depth)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    const int high_bit_depth = bit_depth > 8;

    if (!PPC_ALTIVEC(av_get_cpu_flags()))
--- a/libavcodec/ppc/hpeldsp_altivec.c
+++ b/libavcodec/ppc/hpeldsp_altivec.c
@@ -34,7 +34,7 @@
 #include "libavcodec/hpeldsp.h"
 #include "hpeldsp_altivec.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
 /* next one assumes that ((line_size % 16) == 0) */
 void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 {
@@ -449,7 +449,7 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdi

 av_cold void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/huffyuvdsp_altivec.c
+++ b/libavcodec/ppc/huffyuvdsp_altivec.c
@@ -32,7 +32,7 @@
 #include "libavutil/ppc/util_altivec.h"
 #include "libavcodec/huffyuvdsp.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
 static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w)
 {
    register int i;
@@ -53,7 +53,7 @@ static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w)

 av_cold void ff_huffyuvdsp_init_ppc(HuffYUVDSPContext *c)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/idctdsp.c
+++ b/libavcodec/ppc/idctdsp.c
@@ -43,7 +43,7 @@
 #include "libavutil/ppc/types_altivec.h"
 #include "libavcodec/idctdsp.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN

 #define IDCT_HALF                                       \
    /* 1st stage */                                     \
@@ -230,7 +230,7 @@ static void idct_add_altivec(uint8_t *dest, int stride, int16_t *blk)
 av_cold void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx,
                                 unsigned high_bit_depth)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/me_cmp.c
+++ b/libavcodec/ppc/me_cmp.c
@@ -34,7 +34,7 @@
 #include "libavcodec/mpegvideo.h"
 #include "libavcodec/me_cmp.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
 static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
                            ptrdiff_t stride, int h)
 {
@@ -746,7 +746,7 @@ static int hadamard8_diff16_altivec(MpegEncContext *s, uint8_t *dst,

 av_cold void ff_me_cmp_init_ppc(MECmpContext *c, AVCodecContext *avctx)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/mpegaudiodsp_altivec.c
+++ b/libavcodec/ppc/mpegaudiodsp_altivec.c
@@ -27,7 +27,7 @@
 #include "libavutil/ppc/util_altivec.h"
 #include "libavcodec/mpegaudiodsp.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN

 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
@@ -132,7 +132,7 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out,

 av_cold void ff_mpadsp_init_ppc(MPADSPContext *s)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/mpegvideo_altivec.c
+++ b/libavcodec/ppc/mpegvideo_altivec.c
@@ -32,7 +32,7 @@
 #include "libavutil/ppc/util_altivec.h"
 #include "libavcodec/mpegvideo.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN

 /* AltiVec version of dct_unquantize_h263
   this code assumes `block' is 16 bytes-aligned */
@@ -117,7 +117,7 @@ static void dct_unquantize_h263_altivec(MpegEncContext *s,

 av_cold void ff_mpv_common_init_ppc(MpegEncContext *s)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/mpegvideodsp.c
+++ b/libavcodec/ppc/mpegvideodsp.c
@@ -25,7 +25,7 @@
 #include "libavutil/ppc/util_altivec.h"
 #include "libavcodec/mpegvideodsp.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
 /* AltiVec-enhanced gmc1. ATM this code assumes stride is a multiple of 8
 * to preserve proper dst alignment. */
 static void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */,
@@ -127,7 +127,7 @@ static void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */,

 av_cold void ff_mpegvideodsp_init_ppc(MpegVideoDSPContext *c)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    c->gmc1 = gmc1_altivec;
 #endif /* HAVE_ALTIVEC */
 }
--- a/libavcodec/ppc/mpegvideoencdsp.c
+++ b/libavcodec/ppc/mpegvideoencdsp.c
@@ -29,7 +29,7 @@
 #include "libavutil/ppc/util_altivec.h"
 #include "libavcodec/mpegvideoencdsp.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN

 static int pix_norm1_altivec(uint8_t *pix, int line_size)
 {
@@ -93,7 +93,7 @@ static int pix_sum_altivec(uint8_t *pix, int line_size)
 av_cold void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c,
                                         AVCodecContext *avctx)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/pixblockdsp.c
+++ b/libavcodec/ppc/pixblockdsp.c
@@ -33,7 +33,7 @@
 #include "libavcodec/avcodec.h"
 #include "libavcodec/pixblockdsp.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN

 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
                               int line_size)
@@ -137,7 +137,7 @@ av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
                                     AVCodecContext *avctx,
                                     unsigned high_bit_depth)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/svq1enc_altivec.c
+++ b/libavcodec/ppc/svq1enc_altivec.c
@@ -32,7 +32,7 @@
 #include "libavutil/ppc/util_altivec.h"
 #include "libavcodec/svq1enc.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
 static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
                                     int size)
 {
@@ -76,7 +76,7 @@ static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,

 av_cold void ff_svq1enc_init_ppc(SVQ1EncContext *c)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/vc1dsp_altivec.c
+++ b/libavcodec/ppc/vc1dsp_altivec.c
@@ -27,7 +27,7 @@
 #include "libavutil/ppc/util_altivec.h"
 #include "libavcodec/vc1dsp.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN

 // main steps of 8x8 transform
 #define STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_rnd) \
@@ -344,7 +344,7 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, int16_t *block)

 av_cold void ff_vc1dsp_init_ppc(VC1DSPContext *dsp)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/vorbisdsp_altivec.c
+++ b/libavcodec/ppc/vorbisdsp_altivec.c
@@ -27,7 +27,7 @@
 #include "libavutil/ppc/cpu.h"
 #include "libavcodec/vorbisdsp.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
 static void vorbis_inverse_coupling_altivec(float *mag, float *ang,
                                            intptr_t blocksize)
 {
@@ -54,7 +54,7 @@ static void vorbis_inverse_coupling_altivec(float *mag, float *ang,

 av_cold void ff_vorbisdsp_init_ppc(VorbisDSPContext *c)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/vp3dsp_altivec.c
+++ b/libavcodec/ppc/vp3dsp_altivec.c
@@ -28,7 +28,7 @@
 #include "libavutil/ppc/util_altivec.h"
 #include "libavcodec/vp3dsp.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN

 static const vec_s16 constants =
    {0, 64277, 60547, 54491, 46341, 36410, 25080, 12785};
@@ -179,7 +179,7 @@ static void vp3_idct_add_altivec(uint8_t *dst, int stride, int16_t block[64])

 av_cold void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavcodec/ppc/vp8dsp_altivec.c
+++ b/libavcodec/ppc/vp8dsp_altivec.c
@@ -29,7 +29,7 @@
 #include "libavcodec/vp8dsp.h"
 #include "hpeldsp_altivec.h"

 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
 #define REPT4(...) { __VA_ARGS__, __VA_ARGS__, __VA_ARGS__, __VA_ARGS__ }

 // h subpel filter uses msum to multiply+add 4 pixel taps at once
@@ -315,7 +315,7 @@ static void put_vp8_pixels16_altivec(uint8_t *dst, ptrdiff_t dstride, uint8_t *s

 av_cold void ff_vp78dsp_init_ppc(VP8DSPContext *c)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

--- a/libavutil/ppc/float_dsp_init.c
+++ b/libavutil/ppc/float_dsp_init.c
@@ -29,7 +29,7 @@ av_cold void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int bit_exact)
 {
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
        return;

 #if HAVE_BIGENDIAN
    fdsp->vector_fmul = ff_vector_fmul_altivec;
    fdsp->vector_fmul_add = ff_vector_fmul_add_altivec;
    fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_altivec;
@@ -37,4 +37,5 @@ av_cold void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int bit_exact)
    if (!bit_exact) {
        fdsp->vector_fmul_window = ff_vector_fmul_window_altivec;
    }
 #endif
 }
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -289,7 +289,7 @@ static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW,

 av_cold void ff_sws_init_swscale_ppc(SwsContext *c)
 {
 #if HAVE_ALTIVEC
 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
    enum AVPixelFormat dstFormat = c->dstFormat;

    if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))