Originally committed as revision 18476 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.6
| @@ -20,10 +20,11 @@ | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "config.h" | |||
| #if HAVE_ALTIVEC_H | |||
| #include <altivec.h> | |||
| #endif | |||
| #include "libavcodec/dsputil.h" | |||
| #include "gcc_fixes.h" | |||
| #include "dsputil_ppc.h" | |||
| #include "util_altivec.h" | |||
| #include "types_altivec.h" | |||
| @@ -1124,70 +1125,70 @@ xlc goes to around 660 on the regular C code... | |||
| static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { | |||
| int sum; | |||
| register vector signed short | |||
| temp0 REG_v(v0), | |||
| temp1 REG_v(v1), | |||
| temp2 REG_v(v2), | |||
| temp3 REG_v(v3), | |||
| temp4 REG_v(v4), | |||
| temp5 REG_v(v5), | |||
| temp6 REG_v(v6), | |||
| temp7 REG_v(v7); | |||
| temp0 __asm__ ("v0"), | |||
| temp1 __asm__ ("v1"), | |||
| temp2 __asm__ ("v2"), | |||
| temp3 __asm__ ("v3"), | |||
| temp4 __asm__ ("v4"), | |||
| temp5 __asm__ ("v5"), | |||
| temp6 __asm__ ("v6"), | |||
| temp7 __asm__ ("v7"); | |||
| register vector signed short | |||
| temp0S REG_v(v8), | |||
| temp1S REG_v(v9), | |||
| temp2S REG_v(v10), | |||
| temp3S REG_v(v11), | |||
| temp4S REG_v(v12), | |||
| temp5S REG_v(v13), | |||
| temp6S REG_v(v14), | |||
| temp7S REG_v(v15); | |||
| register const vector unsigned char vzero REG_v(v31)= | |||
| temp0S __asm__ ("v8"), | |||
| temp1S __asm__ ("v9"), | |||
| temp2S __asm__ ("v10"), | |||
| temp3S __asm__ ("v11"), | |||
| temp4S __asm__ ("v12"), | |||
| temp5S __asm__ ("v13"), | |||
| temp6S __asm__ ("v14"), | |||
| temp7S __asm__ ("v15"); | |||
| register const vector unsigned char vzero __asm__ ("v31") = | |||
| (const vector unsigned char)vec_splat_u8(0); | |||
| { | |||
| register const vector signed short vprod1 REG_v(v16)= | |||
| register const vector signed short vprod1 __asm__ ("v16") = | |||
| (const vector signed short){ 1,-1, 1,-1, 1,-1, 1,-1 }; | |||
| register const vector signed short vprod2 REG_v(v17)= | |||
| register const vector signed short vprod2 __asm__ ("v17") = | |||
| (const vector signed short){ 1, 1,-1,-1, 1, 1,-1,-1 }; | |||
| register const vector signed short vprod3 REG_v(v18)= | |||
| register const vector signed short vprod3 __asm__ ("v18") = | |||
| (const vector signed short){ 1, 1, 1, 1,-1,-1,-1,-1 }; | |||
| register const vector unsigned char perm1 REG_v(v19)= | |||
| register const vector unsigned char perm1 __asm__ ("v19") = | |||
| (const vector unsigned char) | |||
| {0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, | |||
| 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D}; | |||
| register const vector unsigned char perm2 REG_v(v20)= | |||
| register const vector unsigned char perm2 __asm__ ("v20") = | |||
| (const vector unsigned char) | |||
| {0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, | |||
| 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B}; | |||
| register const vector unsigned char perm3 REG_v(v21)= | |||
| register const vector unsigned char perm3 __asm__ ("v21") = | |||
| (const vector unsigned char) | |||
| {0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, | |||
| 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; | |||
| #define ONEITERBUTTERFLY(i, res1, res2) \ | |||
| { \ | |||
| register vector unsigned char src1 REG_v(v22), \ | |||
| src2 REG_v(v23), \ | |||
| dst1 REG_v(v24), \ | |||
| dst2 REG_v(v25), \ | |||
| srcO REG_v(v22), \ | |||
| dstO REG_v(v23); \ | |||
| #define ONEITERBUTTERFLY(i, res1, res2) \ | |||
| { \ | |||
| register vector unsigned char src1 __asm__ ("v22"), \ | |||
| src2 __asm__ ("v23"), \ | |||
| dst1 __asm__ ("v24"), \ | |||
| dst2 __asm__ ("v25"), \ | |||
| srcO __asm__ ("v22"), \ | |||
| dstO __asm__ ("v23"); \ | |||
| \ | |||
| register vector signed short srcV REG_v(v24), \ | |||
| dstV REG_v(v25), \ | |||
| srcW REG_v(v26), \ | |||
| dstW REG_v(v27), \ | |||
| but0 REG_v(v28), \ | |||
| but0S REG_v(v29), \ | |||
| op1 REG_v(v30), \ | |||
| but1 REG_v(v22), \ | |||
| op1S REG_v(v23), \ | |||
| but1S REG_v(v24), \ | |||
| op2 REG_v(v25), \ | |||
| but2 REG_v(v26), \ | |||
| op2S REG_v(v27), \ | |||
| but2S REG_v(v28), \ | |||
| op3 REG_v(v29), \ | |||
| op3S REG_v(v30); \ | |||
| register vector signed short srcV __asm__ ("v24"), \ | |||
| dstV __asm__ ("v25"), \ | |||
| srcW __asm__ ("v26"), \ | |||
| dstW __asm__ ("v27"), \ | |||
| but0 __asm__ ("v28"), \ | |||
| but0S __asm__ ("v29"), \ | |||
| op1 __asm__ ("v30"), \ | |||
| but1 __asm__ ("v22"), \ | |||
| op1S __asm__ ("v23"), \ | |||
| but1S __asm__ ("v24"), \ | |||
| op2 __asm__ ("v25"), \ | |||
| but2 __asm__ ("v26"), \ | |||
| op2S __asm__ ("v27"), \ | |||
| but2S __asm__ ("v28"), \ | |||
| op3 __asm__ ("v29"), \ | |||
| op3S __asm__ ("v30"); \ | |||
| \ | |||
| src1 = vec_ld(stride * i, src); \ | |||
| src2 = vec_ld((stride * i) + 16, src); \ | |||
| @@ -18,11 +18,13 @@ | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "config.h" | |||
| #if HAVE_ALTIVEC_H | |||
| #include <altivec.h> | |||
| #endif | |||
| #include "libavutil/common.h" | |||
| #include "libavcodec/dsputil.h" | |||
| #include "dsputil_ppc.h" | |||
| #include "gcc_fixes.h" | |||
| #define vs16(v) ((vector signed short)(v)) | |||
| @@ -20,8 +20,6 @@ | |||
| #include "libavcodec/dsputil.h" | |||
| #include "gcc_fixes.h" | |||
| #include "dsputil_altivec.h" | |||
| #include "util_altivec.h" | |||
| @@ -1,102 +0,0 @@ | |||
| /* | |||
| * gcc fixes for altivec. | |||
| * Used to workaround broken gcc (FSF gcc-3 pre gcc-3.3) | |||
| * and to stay somewhat compatible with Darwin. | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef AVCODEC_PPC_GCC_FIXES_H | |||
| #define AVCODEC_PPC_GCC_FIXES_H | |||
| #include "config.h" | |||
| #if HAVE_ALTIVEC_H | |||
| #include <altivec.h> | |||
| #endif | |||
| #if (__GNUC__ < 4) | |||
| # define REG_v(a) | |||
| #else | |||
| # define REG_v(a) __asm__ ( #a ) | |||
| #endif | |||
| #if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) | |||
| /* This code was provided to me by Bartosch Pixa | |||
| * as a separate header file (broken_mergel.h). | |||
| * thanks to lu_zero for the workaround. | |||
| * | |||
| * See this mail for more information: | |||
| * http://gcc.gnu.org/ml/gcc/2003-04/msg00967.html | |||
| */ | |||
| static inline vector signed char ff_vmrglb (vector signed char const A, | |||
| vector signed char const B) | |||
| { | |||
| static const vector unsigned char lowbyte = { | |||
| 0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b, 0x1b, | |||
| 0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f | |||
| }; | |||
| return vec_perm (A, B, lowbyte); | |||
| } | |||
| static inline vector signed short ff_vmrglh (vector signed short const A, | |||
| vector signed short const B) | |||
| { | |||
| static const vector unsigned char lowhalf = { | |||
| 0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b, | |||
| 0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f | |||
| }; | |||
| return vec_perm (A, B, lowhalf); | |||
| } | |||
| static inline vector signed int ff_vmrglw (vector signed int const A, | |||
| vector signed int const B) | |||
| { | |||
| static const vector unsigned char lowword = { | |||
| 0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b, | |||
| 0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f | |||
| }; | |||
| return vec_perm (A, B, lowword); | |||
| } | |||
| /*#define ff_vmrglb ff_vmrglb | |||
| #define ff_vmrglh ff_vmrglh | |||
| #define ff_vmrglw ff_vmrglw | |||
| */ | |||
| #undef vec_mergel | |||
| #define vec_mergel(a1, a2) \ | |||
| __ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \ | |||
| ((vector signed char) ff_vmrglb ((vector signed char) (a1), (vector signed char) (a2))), \ | |||
| __ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \ | |||
| ((vector unsigned char) ff_vmrglb ((vector signed char) (a1), (vector signed char) (a2))), \ | |||
| __ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \ | |||
| ((vector signed short) ff_vmrglh ((vector signed short) (a1), (vector signed short) (a2))), \ | |||
| __ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \ | |||
| ((vector unsigned short) ff_vmrglh ((vector signed short) (a1), (vector signed short) (a2))), \ | |||
| __ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \ | |||
| ((vector float) ff_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \ | |||
| __ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \ | |||
| ((vector signed int) ff_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \ | |||
| __ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \ | |||
| ((vector unsigned int) ff_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \ | |||
| __altivec_link_error_invalid_argument ()))))))) | |||
| #endif /* (__GNUC__ == 3 && __GNUC_MINOR__ < 3) */ | |||
| #endif /* AVCODEC_PPC_GCC_FIXES_H */ | |||
| @@ -21,8 +21,6 @@ | |||
| #include "libavcodec/dsputil.h" | |||
| #include "libavcodec/h264data.h" | |||
| #include "gcc_fixes.h" | |||
| #include "dsputil_ppc.h" | |||
| #include "dsputil_altivec.h" | |||
| #include "util_altivec.h" | |||
| @@ -37,9 +37,11 @@ | |||
| #include <stdlib.h> /* malloc(), free() */ | |||
| #include <string.h> | |||
| #include "config.h" | |||
| #if HAVE_ALTIVEC_H | |||
| #include <altivec.h> | |||
| #endif | |||
| #include "libavcodec/dsputil.h" | |||
| #include "gcc_fixes.h" | |||
| #include "types_altivec.h" | |||
| #include "dsputil_ppc.h" | |||
| @@ -26,8 +26,6 @@ | |||
| #include "libavcodec/dsputil.h" | |||
| #include "libavcodec/mpegvideo.h" | |||
| #include "gcc_fixes.h" | |||
| #include "dsputil_ppc.h" | |||
| #include "util_altivec.h" | |||
| // Swaps two variables (used for altivec registers) | |||