You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

437 lines
18KB

  1. /*
  2. * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
  3. *
  4. * This library is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2 of the License, or (at your option) any later version.
  8. *
  9. * This library is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with this library; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "../dsputil.h"
  19. #include "gcc_fixes.h"
  20. #include "dsputil_altivec.h"
  21. #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
  22. #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
  23. #define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
  24. #define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec
  25. #define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num
  26. #define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
  27. #define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
  28. #define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
  29. #define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num
  30. #define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec
  31. #define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
  32. #include "h264_template_altivec.c"
  33. #undef OP_U8_ALTIVEC
  34. #undef PREFIX_h264_chroma_mc8_altivec
  35. #undef PREFIX_h264_chroma_mc8_num
  36. #undef PREFIX_h264_qpel16_h_lowpass_altivec
  37. #undef PREFIX_h264_qpel16_h_lowpass_num
  38. #undef PREFIX_h264_qpel16_v_lowpass_altivec
  39. #undef PREFIX_h264_qpel16_v_lowpass_num
  40. #undef PREFIX_h264_qpel16_hv_lowpass_altivec
  41. #undef PREFIX_h264_qpel16_hv_lowpass_num
  42. #define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
  43. #define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec
  44. #define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num
  45. #define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
  46. #define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
  47. #define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
  48. #define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num
  49. #define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec
  50. #define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
  51. #include "h264_template_altivec.c"
  52. #undef OP_U8_ALTIVEC
  53. #undef PREFIX_h264_chroma_mc8_altivec
  54. #undef PREFIX_h264_chroma_mc8_num
  55. #undef PREFIX_h264_qpel16_h_lowpass_altivec
  56. #undef PREFIX_h264_qpel16_h_lowpass_num
  57. #undef PREFIX_h264_qpel16_v_lowpass_altivec
  58. #undef PREFIX_h264_qpel16_v_lowpass_num
  59. #undef PREFIX_h264_qpel16_hv_lowpass_altivec
  60. #undef PREFIX_h264_qpel16_hv_lowpass_num
  61. #define H264_MC(OPNAME, SIZE, CODETYPE) \
  62. static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\
  63. OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
  64. }\
  65. \
  66. static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
  67. DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
  68. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  69. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
  70. }\
  71. \
  72. static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  73. OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
  74. }\
  75. \
  76. static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  77. DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
  78. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  79. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
  80. }\
  81. \
  82. static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  83. DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
  84. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  85. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
  86. }\
  87. \
  88. static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  89. OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
  90. }\
  91. \
  92. static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  93. DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
  94. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  95. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
  96. }\
  97. \
  98. static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  99. DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
  100. DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
  101. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  102. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  103. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  104. }\
  105. \
  106. static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  107. DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
  108. DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
  109. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  110. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  111. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  112. }\
  113. \
  114. static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  115. DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
  116. DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
  117. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  118. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  119. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  120. }\
  121. \
  122. static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  123. DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
  124. DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
  125. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  126. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  127. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  128. }\
  129. \
  130. static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  131. DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
  132. OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
  133. }\
  134. \
  135. static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  136. DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
  137. DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
  138. DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
  139. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  140. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  141. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
  142. }\
  143. \
  144. static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  145. DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
  146. DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
  147. DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
  148. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  149. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  150. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
  151. }\
  152. \
  153. static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  154. DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
  155. DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
  156. DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
  157. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  158. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  159. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
  160. }\
  161. \
  162. static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  163. DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
  164. DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
  165. DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
  166. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  167. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  168. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
  169. }\
  170. /* this code assume that stride % 16 == 0 */
  171. void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
  172. signed int ABCD[4] __attribute__((aligned(16))) =
  173. {((8 - x) * (8 - y)),
  174. ((x) * (8 - y)),
  175. ((8 - x) * (y)),
  176. ((x) * (y))};
  177. register int i;
  178. vector unsigned char fperm;
  179. const vector signed int vABCD = vec_ld(0, ABCD);
  180. const vector signed short vA = vec_splat((vector signed short)vABCD, 1);
  181. const vector signed short vB = vec_splat((vector signed short)vABCD, 3);
  182. const vector signed short vC = vec_splat((vector signed short)vABCD, 5);
  183. const vector signed short vD = vec_splat((vector signed short)vABCD, 7);
  184. const vector signed int vzero = vec_splat_s32(0);
  185. const vector signed short v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
  186. const vector unsigned short v6us = vec_splat_u16(6);
  187. register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
  188. register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
  189. vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
  190. vector unsigned char vsrc0uc, vsrc1uc;
  191. vector signed short vsrc0ssH, vsrc1ssH;
  192. vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;
  193. vector signed short vsrc2ssH, vsrc3ssH, psum;
  194. vector unsigned char vdst, ppsum, vfdst, fsum;
  195. if (((unsigned long)dst) % 16 == 0) {
  196. fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,
  197. 0x14, 0x15, 0x16, 0x17,
  198. 0x08, 0x09, 0x0A, 0x0B,
  199. 0x0C, 0x0D, 0x0E, 0x0F);
  200. } else {
  201. fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,
  202. 0x04, 0x05, 0x06, 0x07,
  203. 0x18, 0x19, 0x1A, 0x1B,
  204. 0x1C, 0x1D, 0x1E, 0x1F);
  205. }
  206. vsrcAuc = vec_ld(0, src);
  207. if (loadSecond)
  208. vsrcBuc = vec_ld(16, src);
  209. vsrcperm0 = vec_lvsl(0, src);
  210. vsrcperm1 = vec_lvsl(1, src);
  211. vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
  212. if (reallyBadAlign)
  213. vsrc1uc = vsrcBuc;
  214. else
  215. vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
  216. vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
  217. (vector unsigned char)vsrc0uc);
  218. vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
  219. (vector unsigned char)vsrc1uc);
  220. if (!loadSecond) {// -> !reallyBadAlign
  221. for (i = 0 ; i < h ; i++) {
  222. vsrcCuc = vec_ld(stride + 0, src);
  223. vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
  224. vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
  225. vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
  226. (vector unsigned char)vsrc2uc);
  227. vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
  228. (vector unsigned char)vsrc3uc);
  229. psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
  230. psum = vec_mladd(vB, vsrc1ssH, psum);
  231. psum = vec_mladd(vC, vsrc2ssH, psum);
  232. psum = vec_mladd(vD, vsrc3ssH, psum);
  233. psum = vec_add(v28ss, psum);
  234. psum = vec_sra(psum, v6us);
  235. vdst = vec_ld(0, dst);
  236. ppsum = (vector unsigned char)vec_packsu(psum, psum);
  237. fsum = vec_perm(vdst, ppsum, fperm);
  238. vec_st(fsum, 0, dst);
  239. vsrc0ssH = vsrc2ssH;
  240. vsrc1ssH = vsrc3ssH;
  241. dst += stride;
  242. src += stride;
  243. }
  244. } else {
  245. vector unsigned char vsrcDuc;
  246. for (i = 0 ; i < h ; i++) {
  247. vsrcCuc = vec_ld(stride + 0, src);
  248. vsrcDuc = vec_ld(stride + 16, src);
  249. vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
  250. if (reallyBadAlign)
  251. vsrc3uc = vsrcDuc;
  252. else
  253. vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
  254. vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
  255. (vector unsigned char)vsrc2uc);
  256. vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
  257. (vector unsigned char)vsrc3uc);
  258. psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
  259. psum = vec_mladd(vB, vsrc1ssH, psum);
  260. psum = vec_mladd(vC, vsrc2ssH, psum);
  261. psum = vec_mladd(vD, vsrc3ssH, psum);
  262. psum = vec_add(v28ss, psum);
  263. psum = vec_sr(psum, v6us);
  264. vdst = vec_ld(0, dst);
  265. ppsum = (vector unsigned char)vec_pack(psum, psum);
  266. fsum = vec_perm(vdst, ppsum, fperm);
  267. vec_st(fsum, 0, dst);
  268. vsrc0ssH = vsrc2ssH;
  269. vsrc1ssH = vsrc3ssH;
  270. dst += stride;
  271. src += stride;
  272. }
  273. }
  274. }
  275. static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
  276. const uint8_t * src2, int dst_stride,
  277. int src_stride1, int h)
  278. {
  279. int i;
  280. vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
  281. mask_ = vec_lvsl(0, src2);
  282. for (i = 0; i < h; i++) {
  283. tmp1 = vec_ld(i * src_stride1, src1);
  284. mask = vec_lvsl(i * src_stride1, src1);
  285. tmp2 = vec_ld(i * src_stride1 + 15, src1);
  286. a = vec_perm(tmp1, tmp2, mask);
  287. tmp1 = vec_ld(i * 16, src2);
  288. tmp2 = vec_ld(i * 16 + 15, src2);
  289. b = vec_perm(tmp1, tmp2, mask_);
  290. tmp1 = vec_ld(0, dst);
  291. mask = vec_lvsl(0, dst);
  292. tmp2 = vec_ld(15, dst);
  293. d = vec_avg(a, b);
  294. edges = vec_perm(tmp2, tmp1, mask);
  295. align = vec_lvsr(0, dst);
  296. tmp2 = vec_perm(d, edges, align);
  297. tmp1 = vec_perm(edges, d, align);
  298. vec_st(tmp2, 15, dst);
  299. vec_st(tmp1, 0 , dst);
  300. dst += dst_stride;
  301. }
  302. }
  303. static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
  304. const uint8_t * src2, int dst_stride,
  305. int src_stride1, int h)
  306. {
  307. int i;
  308. vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
  309. mask_ = vec_lvsl(0, src2);
  310. for (i = 0; i < h; i++) {
  311. tmp1 = vec_ld(i * src_stride1, src1);
  312. mask = vec_lvsl(i * src_stride1, src1);
  313. tmp2 = vec_ld(i * src_stride1 + 15, src1);
  314. a = vec_perm(tmp1, tmp2, mask);
  315. tmp1 = vec_ld(i * 16, src2);
  316. tmp2 = vec_ld(i * 16 + 15, src2);
  317. b = vec_perm(tmp1, tmp2, mask_);
  318. tmp1 = vec_ld(0, dst);
  319. mask = vec_lvsl(0, dst);
  320. tmp2 = vec_ld(15, dst);
  321. d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
  322. edges = vec_perm(tmp2, tmp1, mask);
  323. align = vec_lvsr(0, dst);
  324. tmp2 = vec_perm(d, edges, align);
  325. tmp1 = vec_perm(edges, d, align);
  326. vec_st(tmp2, 15, dst);
  327. vec_st(tmp1, 0 , dst);
  328. dst += dst_stride;
  329. }
  330. }
  331. /* Implemented but could be faster
  332. #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
  333. #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
  334. */
  335. H264_MC(put_, 16, altivec)
  336. H264_MC(avg_, 16, altivec)
  337. void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
  338. #ifdef HAVE_ALTIVEC
  339. if (has_altivec()) {
  340. c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
  341. c->put_no_rnd_h264_chroma_pixels_tab[0] = put_no_rnd_h264_chroma_mc8_altivec;
  342. c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
  343. #define dspfunc(PFX, IDX, NUM) \
  344. c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
  345. c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
  346. c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
  347. c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
  348. c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
  349. c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
  350. c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
  351. c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
  352. c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
  353. c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
  354. c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
  355. c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
  356. c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
  357. c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
  358. c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
  359. c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
  360. dspfunc(put_h264_qpel, 0, 16);
  361. dspfunc(avg_h264_qpel, 0, 16);
  362. #undef dspfunc
  363. } else
  364. #endif /* HAVE_ALTIVEC */
  365. {
  366. // Non-AltiVec PPC optimisations
  367. // ... pending ...
  368. }
  369. }