You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

439 lines
18KB

  1. /*
  2. * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "../dsputil.h"
  21. #include "gcc_fixes.h"
  22. #include "dsputil_altivec.h"
  23. #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
  24. #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
  25. #define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
  26. #define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec
  27. #define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num
  28. #define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
  29. #define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
  30. #define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
  31. #define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num
  32. #define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec
  33. #define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
  34. #include "h264_template_altivec.c"
  35. #undef OP_U8_ALTIVEC
  36. #undef PREFIX_h264_chroma_mc8_altivec
  37. #undef PREFIX_h264_chroma_mc8_num
  38. #undef PREFIX_h264_qpel16_h_lowpass_altivec
  39. #undef PREFIX_h264_qpel16_h_lowpass_num
  40. #undef PREFIX_h264_qpel16_v_lowpass_altivec
  41. #undef PREFIX_h264_qpel16_v_lowpass_num
  42. #undef PREFIX_h264_qpel16_hv_lowpass_altivec
  43. #undef PREFIX_h264_qpel16_hv_lowpass_num
  44. #define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
  45. #define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec
  46. #define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num
  47. #define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
  48. #define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
  49. #define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
  50. #define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num
  51. #define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec
  52. #define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
  53. #include "h264_template_altivec.c"
  54. #undef OP_U8_ALTIVEC
  55. #undef PREFIX_h264_chroma_mc8_altivec
  56. #undef PREFIX_h264_chroma_mc8_num
  57. #undef PREFIX_h264_qpel16_h_lowpass_altivec
  58. #undef PREFIX_h264_qpel16_h_lowpass_num
  59. #undef PREFIX_h264_qpel16_v_lowpass_altivec
  60. #undef PREFIX_h264_qpel16_v_lowpass_num
  61. #undef PREFIX_h264_qpel16_hv_lowpass_altivec
  62. #undef PREFIX_h264_qpel16_hv_lowpass_num
  63. #define H264_MC(OPNAME, SIZE, CODETYPE) \
  64. static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\
  65. OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
  66. }\
  67. \
  68. static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
  69. DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
  70. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  71. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
  72. }\
  73. \
  74. static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  75. OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
  76. }\
  77. \
  78. static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  79. DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
  80. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  81. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
  82. }\
  83. \
  84. static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  85. DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
  86. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  87. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
  88. }\
  89. \
  90. static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  91. OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
  92. }\
  93. \
  94. static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  95. DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
  96. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  97. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
  98. }\
  99. \
  100. static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  101. DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
  102. DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
  103. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  104. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  105. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  106. }\
  107. \
  108. static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  109. DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
  110. DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
  111. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  112. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  113. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  114. }\
  115. \
  116. static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  117. DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
  118. DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
  119. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  120. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  121. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  122. }\
  123. \
  124. static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  125. DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
  126. DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
  127. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  128. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  129. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  130. }\
  131. \
  132. static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  133. DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
  134. OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
  135. }\
  136. \
  137. static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  138. DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
  139. DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
  140. DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
  141. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  142. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  143. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
  144. }\
  145. \
  146. static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  147. DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
  148. DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
  149. DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
  150. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  151. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  152. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
  153. }\
  154. \
  155. static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  156. DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
  157. DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
  158. DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
  159. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  160. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  161. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
  162. }\
  163. \
  164. static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  165. DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
  166. DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
  167. DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
  168. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  169. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  170. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
  171. }\
  172. /* this code assume that stride % 16 == 0 */
  173. void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
  174. signed int ABCD[4] __attribute__((aligned(16))) =
  175. {((8 - x) * (8 - y)),
  176. ((x) * (8 - y)),
  177. ((8 - x) * (y)),
  178. ((x) * (y))};
  179. register int i;
  180. vector unsigned char fperm;
  181. const vector signed int vABCD = vec_ld(0, ABCD);
  182. const vector signed short vA = vec_splat((vector signed short)vABCD, 1);
  183. const vector signed short vB = vec_splat((vector signed short)vABCD, 3);
  184. const vector signed short vC = vec_splat((vector signed short)vABCD, 5);
  185. const vector signed short vD = vec_splat((vector signed short)vABCD, 7);
  186. const vector signed int vzero = vec_splat_s32(0);
  187. const vector signed short v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
  188. const vector unsigned short v6us = vec_splat_u16(6);
  189. register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
  190. register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
  191. vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
  192. vector unsigned char vsrc0uc, vsrc1uc;
  193. vector signed short vsrc0ssH, vsrc1ssH;
  194. vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;
  195. vector signed short vsrc2ssH, vsrc3ssH, psum;
  196. vector unsigned char vdst, ppsum, vfdst, fsum;
  197. if (((unsigned long)dst) % 16 == 0) {
  198. fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,
  199. 0x14, 0x15, 0x16, 0x17,
  200. 0x08, 0x09, 0x0A, 0x0B,
  201. 0x0C, 0x0D, 0x0E, 0x0F);
  202. } else {
  203. fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,
  204. 0x04, 0x05, 0x06, 0x07,
  205. 0x18, 0x19, 0x1A, 0x1B,
  206. 0x1C, 0x1D, 0x1E, 0x1F);
  207. }
  208. vsrcAuc = vec_ld(0, src);
  209. if (loadSecond)
  210. vsrcBuc = vec_ld(16, src);
  211. vsrcperm0 = vec_lvsl(0, src);
  212. vsrcperm1 = vec_lvsl(1, src);
  213. vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
  214. if (reallyBadAlign)
  215. vsrc1uc = vsrcBuc;
  216. else
  217. vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
  218. vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
  219. (vector unsigned char)vsrc0uc);
  220. vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
  221. (vector unsigned char)vsrc1uc);
  222. if (!loadSecond) {// -> !reallyBadAlign
  223. for (i = 0 ; i < h ; i++) {
  224. vsrcCuc = vec_ld(stride + 0, src);
  225. vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
  226. vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
  227. vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
  228. (vector unsigned char)vsrc2uc);
  229. vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
  230. (vector unsigned char)vsrc3uc);
  231. psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
  232. psum = vec_mladd(vB, vsrc1ssH, psum);
  233. psum = vec_mladd(vC, vsrc2ssH, psum);
  234. psum = vec_mladd(vD, vsrc3ssH, psum);
  235. psum = vec_add(v28ss, psum);
  236. psum = vec_sra(psum, v6us);
  237. vdst = vec_ld(0, dst);
  238. ppsum = (vector unsigned char)vec_packsu(psum, psum);
  239. fsum = vec_perm(vdst, ppsum, fperm);
  240. vec_st(fsum, 0, dst);
  241. vsrc0ssH = vsrc2ssH;
  242. vsrc1ssH = vsrc3ssH;
  243. dst += stride;
  244. src += stride;
  245. }
  246. } else {
  247. vector unsigned char vsrcDuc;
  248. for (i = 0 ; i < h ; i++) {
  249. vsrcCuc = vec_ld(stride + 0, src);
  250. vsrcDuc = vec_ld(stride + 16, src);
  251. vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
  252. if (reallyBadAlign)
  253. vsrc3uc = vsrcDuc;
  254. else
  255. vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
  256. vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
  257. (vector unsigned char)vsrc2uc);
  258. vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
  259. (vector unsigned char)vsrc3uc);
  260. psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
  261. psum = vec_mladd(vB, vsrc1ssH, psum);
  262. psum = vec_mladd(vC, vsrc2ssH, psum);
  263. psum = vec_mladd(vD, vsrc3ssH, psum);
  264. psum = vec_add(v28ss, psum);
  265. psum = vec_sr(psum, v6us);
  266. vdst = vec_ld(0, dst);
  267. ppsum = (vector unsigned char)vec_pack(psum, psum);
  268. fsum = vec_perm(vdst, ppsum, fperm);
  269. vec_st(fsum, 0, dst);
  270. vsrc0ssH = vsrc2ssH;
  271. vsrc1ssH = vsrc3ssH;
  272. dst += stride;
  273. src += stride;
  274. }
  275. }
  276. }
  277. static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
  278. const uint8_t * src2, int dst_stride,
  279. int src_stride1, int h)
  280. {
  281. int i;
  282. vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
  283. mask_ = vec_lvsl(0, src2);
  284. for (i = 0; i < h; i++) {
  285. tmp1 = vec_ld(i * src_stride1, src1);
  286. mask = vec_lvsl(i * src_stride1, src1);
  287. tmp2 = vec_ld(i * src_stride1 + 15, src1);
  288. a = vec_perm(tmp1, tmp2, mask);
  289. tmp1 = vec_ld(i * 16, src2);
  290. tmp2 = vec_ld(i * 16 + 15, src2);
  291. b = vec_perm(tmp1, tmp2, mask_);
  292. tmp1 = vec_ld(0, dst);
  293. mask = vec_lvsl(0, dst);
  294. tmp2 = vec_ld(15, dst);
  295. d = vec_avg(a, b);
  296. edges = vec_perm(tmp2, tmp1, mask);
  297. align = vec_lvsr(0, dst);
  298. tmp2 = vec_perm(d, edges, align);
  299. tmp1 = vec_perm(edges, d, align);
  300. vec_st(tmp2, 15, dst);
  301. vec_st(tmp1, 0 , dst);
  302. dst += dst_stride;
  303. }
  304. }
  305. static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
  306. const uint8_t * src2, int dst_stride,
  307. int src_stride1, int h)
  308. {
  309. int i;
  310. vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
  311. mask_ = vec_lvsl(0, src2);
  312. for (i = 0; i < h; i++) {
  313. tmp1 = vec_ld(i * src_stride1, src1);
  314. mask = vec_lvsl(i * src_stride1, src1);
  315. tmp2 = vec_ld(i * src_stride1 + 15, src1);
  316. a = vec_perm(tmp1, tmp2, mask);
  317. tmp1 = vec_ld(i * 16, src2);
  318. tmp2 = vec_ld(i * 16 + 15, src2);
  319. b = vec_perm(tmp1, tmp2, mask_);
  320. tmp1 = vec_ld(0, dst);
  321. mask = vec_lvsl(0, dst);
  322. tmp2 = vec_ld(15, dst);
  323. d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
  324. edges = vec_perm(tmp2, tmp1, mask);
  325. align = vec_lvsr(0, dst);
  326. tmp2 = vec_perm(d, edges, align);
  327. tmp1 = vec_perm(edges, d, align);
  328. vec_st(tmp2, 15, dst);
  329. vec_st(tmp1, 0 , dst);
  330. dst += dst_stride;
  331. }
  332. }
  333. /* Implemented but could be faster
  334. #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
  335. #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
  336. */
  337. H264_MC(put_, 16, altivec)
  338. H264_MC(avg_, 16, altivec)
  339. void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
  340. #ifdef HAVE_ALTIVEC
  341. if (has_altivec()) {
  342. c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
  343. c->put_no_rnd_h264_chroma_pixels_tab[0] = put_no_rnd_h264_chroma_mc8_altivec;
  344. c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
  345. #define dspfunc(PFX, IDX, NUM) \
  346. c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
  347. c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
  348. c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
  349. c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
  350. c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
  351. c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
  352. c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
  353. c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
  354. c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
  355. c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
  356. c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
  357. c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
  358. c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
  359. c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
  360. c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
  361. c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
  362. dspfunc(put_h264_qpel, 0, 16);
  363. dspfunc(avg_h264_qpel, 0, 16);
  364. #undef dspfunc
  365. } else
  366. #endif /* HAVE_ALTIVEC */
  367. {
  368. // Non-AltiVec PPC optimisations
  369. // ... pending ...
  370. }
  371. }