You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

319 lines
13KB

  1. /*
  2. * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
  3. *
  4. * This file is part of Libav.
  5. *
  6. * Libav is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * Libav is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with Libav; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "config.h"
  21. #include "libavutil/attributes.h"
  22. #include "libavutil/cpu.h"
  23. #include "libavutil/intreadwrite.h"
  24. #include "libavutil/ppc/types_altivec.h"
  25. #include "libavutil/ppc/util_altivec.h"
  26. #include "libavcodec/h264qpel.h"
  27. #include "dsputil_altivec.h"
  28. #if HAVE_ALTIVEC
  29. #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
  30. #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
  31. #define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
  32. #define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
  33. #define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
  34. #define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
  35. #define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num
  36. #define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec
  37. #define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
  38. #include "h264qpel_template.c"
  39. #undef OP_U8_ALTIVEC
  40. #undef PREFIX_h264_qpel16_h_lowpass_altivec
  41. #undef PREFIX_h264_qpel16_h_lowpass_num
  42. #undef PREFIX_h264_qpel16_v_lowpass_altivec
  43. #undef PREFIX_h264_qpel16_v_lowpass_num
  44. #undef PREFIX_h264_qpel16_hv_lowpass_altivec
  45. #undef PREFIX_h264_qpel16_hv_lowpass_num
  46. #define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
  47. #define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
  48. #define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
  49. #define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
  50. #define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num
  51. #define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec
  52. #define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
  53. #include "h264qpel_template.c"
  54. #undef OP_U8_ALTIVEC
  55. #undef PREFIX_h264_qpel16_h_lowpass_altivec
  56. #undef PREFIX_h264_qpel16_h_lowpass_num
  57. #undef PREFIX_h264_qpel16_v_lowpass_altivec
  58. #undef PREFIX_h264_qpel16_v_lowpass_num
  59. #undef PREFIX_h264_qpel16_hv_lowpass_altivec
  60. #undef PREFIX_h264_qpel16_hv_lowpass_num
  61. #define H264_MC(OPNAME, SIZE, CODETYPE) \
  62. static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  63. {\
  64. ff_ ## OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
  65. }\
  66. \
  67. static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  68. { \
  69. DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
  70. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  71. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
  72. }\
  73. \
  74. static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  75. {\
  76. OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
  77. }\
  78. \
  79. static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  80. {\
  81. DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
  82. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  83. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
  84. }\
  85. \
  86. static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  87. {\
  88. DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
  89. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  90. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
  91. }\
  92. \
  93. static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  94. {\
  95. OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
  96. }\
  97. \
  98. static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  99. {\
  100. DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
  101. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  102. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
  103. }\
  104. \
  105. static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  106. {\
  107. DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  108. DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  109. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  110. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  111. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  112. }\
  113. \
  114. static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  115. {\
  116. DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  117. DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  118. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  119. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  120. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  121. }\
  122. \
  123. static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  124. {\
  125. DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  126. DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  127. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  128. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  129. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  130. }\
  131. \
  132. static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  133. {\
  134. DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  135. DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  136. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  137. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  138. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  139. }\
  140. \
  141. static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  142. {\
  143. DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
  144. OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
  145. }\
  146. \
  147. static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  148. {\
  149. DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  150. DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
  151. DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
  152. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  153. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  154. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
  155. }\
  156. \
  157. static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  158. {\
  159. DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  160. DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
  161. DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
  162. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  163. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  164. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
  165. }\
  166. \
  167. static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  168. {\
  169. DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  170. DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
  171. DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
  172. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  173. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  174. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
  175. }\
  176. \
  177. static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  178. {\
  179. DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  180. DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
  181. DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
  182. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  183. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  184. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
  185. }\
  186. static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
  187. const uint8_t * src2, int dst_stride,
  188. int src_stride1, int h)
  189. {
  190. int i;
  191. vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;
  192. mask_ = vec_lvsl(0, src2);
  193. for (i = 0; i < h; i++) {
  194. tmp1 = vec_ld(i * src_stride1, src1);
  195. mask = vec_lvsl(i * src_stride1, src1);
  196. tmp2 = vec_ld(i * src_stride1 + 15, src1);
  197. a = vec_perm(tmp1, tmp2, mask);
  198. tmp1 = vec_ld(i * 16, src2);
  199. tmp2 = vec_ld(i * 16 + 15, src2);
  200. b = vec_perm(tmp1, tmp2, mask_);
  201. tmp1 = vec_ld(0, dst);
  202. mask = vec_lvsl(0, dst);
  203. tmp2 = vec_ld(15, dst);
  204. d = vec_avg(a, b);
  205. edges = vec_perm(tmp2, tmp1, mask);
  206. align = vec_lvsr(0, dst);
  207. tmp2 = vec_perm(d, edges, align);
  208. tmp1 = vec_perm(edges, d, align);
  209. vec_st(tmp2, 15, dst);
  210. vec_st(tmp1, 0 , dst);
  211. dst += dst_stride;
  212. }
  213. }
  214. static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
  215. const uint8_t * src2, int dst_stride,
  216. int src_stride1, int h)
  217. {
  218. int i;
  219. vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;
  220. mask_ = vec_lvsl(0, src2);
  221. for (i = 0; i < h; i++) {
  222. tmp1 = vec_ld(i * src_stride1, src1);
  223. mask = vec_lvsl(i * src_stride1, src1);
  224. tmp2 = vec_ld(i * src_stride1 + 15, src1);
  225. a = vec_perm(tmp1, tmp2, mask);
  226. tmp1 = vec_ld(i * 16, src2);
  227. tmp2 = vec_ld(i * 16 + 15, src2);
  228. b = vec_perm(tmp1, tmp2, mask_);
  229. tmp1 = vec_ld(0, dst);
  230. mask = vec_lvsl(0, dst);
  231. tmp2 = vec_ld(15, dst);
  232. d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
  233. edges = vec_perm(tmp2, tmp1, mask);
  234. align = vec_lvsr(0, dst);
  235. tmp2 = vec_perm(d, edges, align);
  236. tmp1 = vec_perm(edges, d, align);
  237. vec_st(tmp2, 15, dst);
  238. vec_st(tmp1, 0 , dst);
  239. dst += dst_stride;
  240. }
  241. }
  242. /* Implemented but could be faster
  243. #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
  244. #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
  245. */
  246. H264_MC(put_, 16, altivec)
  247. H264_MC(avg_, 16, altivec)
  248. #endif /* HAVE_ALTIVEC */
  249. av_cold void ff_h264qpel_init_ppc(H264QpelContext *c, int bit_depth)
  250. {
  251. #if HAVE_ALTIVEC
  252. const int high_bit_depth = bit_depth > 8;
  253. if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
  254. return;
  255. if (!high_bit_depth) {
  256. #define dspfunc(PFX, IDX, NUM) \
  257. c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
  258. c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
  259. c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
  260. c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
  261. c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
  262. c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
  263. c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
  264. c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
  265. c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
  266. c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
  267. c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
  268. c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
  269. c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
  270. c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
  271. c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
  272. c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
  273. dspfunc(put_h264_qpel, 0, 16);
  274. dspfunc(avg_h264_qpel, 0, 16);
  275. #undef dspfunc
  276. }
  277. #endif /* HAVE_ALTIVEC */
  278. }