You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

302 lines
13KB

  1. /*
  2. * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "config.h"
  21. #include "libavutil/attributes.h"
  22. #include "libavcodec/h264qpel.h"
  23. #if HAVE_ALTIVEC
  24. #include "libavutil/cpu.h"
  25. #include "libavutil/intreadwrite.h"
  26. #include "libavutil/ppc/types_altivec.h"
  27. #include "libavutil/ppc/util_altivec.h"
  28. #include "dsputil_altivec.h"
  29. #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
  30. #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
  31. #define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
  32. #define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
  33. #define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
  34. #define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
  35. #define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num
  36. #define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec
  37. #define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
  38. #include "h264_qpel_template.c"
  39. #undef OP_U8_ALTIVEC
  40. #undef PREFIX_h264_qpel16_h_lowpass_altivec
  41. #undef PREFIX_h264_qpel16_h_lowpass_num
  42. #undef PREFIX_h264_qpel16_v_lowpass_altivec
  43. #undef PREFIX_h264_qpel16_v_lowpass_num
  44. #undef PREFIX_h264_qpel16_hv_lowpass_altivec
  45. #undef PREFIX_h264_qpel16_hv_lowpass_num
  46. #define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
  47. #define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
  48. #define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
  49. #define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
  50. #define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num
  51. #define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec
  52. #define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
  53. #include "h264_qpel_template.c"
  54. #undef OP_U8_ALTIVEC
  55. #undef PREFIX_h264_qpel16_h_lowpass_altivec
  56. #undef PREFIX_h264_qpel16_h_lowpass_num
  57. #undef PREFIX_h264_qpel16_v_lowpass_altivec
  58. #undef PREFIX_h264_qpel16_v_lowpass_num
  59. #undef PREFIX_h264_qpel16_hv_lowpass_altivec
  60. #undef PREFIX_h264_qpel16_hv_lowpass_num
  61. #define H264_MC(OPNAME, SIZE, CODETYPE) \
  62. static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\
  63. ff_ ## OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
  64. }\
  65. \
  66. static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
  67. DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
  68. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  69. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
  70. }\
  71. \
  72. static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  73. OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
  74. }\
  75. \
  76. static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  77. DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
  78. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  79. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
  80. }\
  81. \
  82. static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  83. DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
  84. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  85. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
  86. }\
  87. \
  88. static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  89. OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
  90. }\
  91. \
  92. static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  93. DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
  94. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  95. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
  96. }\
  97. \
  98. static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  99. DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  100. DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  101. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  102. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  103. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  104. }\
  105. \
  106. static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  107. DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  108. DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  109. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  110. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  111. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  112. }\
  113. \
  114. static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  115. DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  116. DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  117. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  118. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  119. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  120. }\
  121. \
  122. static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  123. DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  124. DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  125. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  126. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  127. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  128. }\
  129. \
  130. static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  131. DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
  132. OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
  133. }\
  134. \
  135. static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  136. DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  137. DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
  138. DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
  139. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  140. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  141. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
  142. }\
  143. \
  144. static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  145. DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  146. DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
  147. DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
  148. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  149. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  150. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
  151. }\
  152. \
  153. static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  154. DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  155. DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
  156. DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
  157. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  158. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  159. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
  160. }\
  161. \
  162. static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  163. DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  164. DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
  165. DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
  166. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  167. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  168. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
  169. }\
  170. static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
  171. const uint8_t * src2, int dst_stride,
  172. int src_stride1, int h)
  173. {
  174. int i;
  175. vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;
  176. mask_ = vec_lvsl(0, src2);
  177. for (i = 0; i < h; i++) {
  178. tmp1 = vec_ld(i * src_stride1, src1);
  179. mask = vec_lvsl(i * src_stride1, src1);
  180. tmp2 = vec_ld(i * src_stride1 + 15, src1);
  181. a = vec_perm(tmp1, tmp2, mask);
  182. tmp1 = vec_ld(i * 16, src2);
  183. tmp2 = vec_ld(i * 16 + 15, src2);
  184. b = vec_perm(tmp1, tmp2, mask_);
  185. tmp1 = vec_ld(0, dst);
  186. mask = vec_lvsl(0, dst);
  187. tmp2 = vec_ld(15, dst);
  188. d = vec_avg(a, b);
  189. edges = vec_perm(tmp2, tmp1, mask);
  190. align = vec_lvsr(0, dst);
  191. tmp2 = vec_perm(d, edges, align);
  192. tmp1 = vec_perm(edges, d, align);
  193. vec_st(tmp2, 15, dst);
  194. vec_st(tmp1, 0 , dst);
  195. dst += dst_stride;
  196. }
  197. }
  198. static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
  199. const uint8_t * src2, int dst_stride,
  200. int src_stride1, int h)
  201. {
  202. int i;
  203. vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;
  204. mask_ = vec_lvsl(0, src2);
  205. for (i = 0; i < h; i++) {
  206. tmp1 = vec_ld(i * src_stride1, src1);
  207. mask = vec_lvsl(i * src_stride1, src1);
  208. tmp2 = vec_ld(i * src_stride1 + 15, src1);
  209. a = vec_perm(tmp1, tmp2, mask);
  210. tmp1 = vec_ld(i * 16, src2);
  211. tmp2 = vec_ld(i * 16 + 15, src2);
  212. b = vec_perm(tmp1, tmp2, mask_);
  213. tmp1 = vec_ld(0, dst);
  214. mask = vec_lvsl(0, dst);
  215. tmp2 = vec_ld(15, dst);
  216. d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
  217. edges = vec_perm(tmp2, tmp1, mask);
  218. align = vec_lvsr(0, dst);
  219. tmp2 = vec_perm(d, edges, align);
  220. tmp1 = vec_perm(edges, d, align);
  221. vec_st(tmp2, 15, dst);
  222. vec_st(tmp1, 0 , dst);
  223. dst += dst_stride;
  224. }
  225. }
  226. /* Implemented but could be faster
  227. #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
  228. #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
  229. */
  230. H264_MC(put_, 16, altivec)
  231. H264_MC(avg_, 16, altivec)
  232. #endif /* HAVE_ALTIVEC */
  233. av_cold void ff_h264qpel_init_ppc(H264QpelContext *c, int bit_depth)
  234. {
  235. #if HAVE_ALTIVEC
  236. const int high_bit_depth = bit_depth > 8;
  237. if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
  238. if (!high_bit_depth) {
  239. #define dspfunc(PFX, IDX, NUM) \
  240. c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
  241. c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
  242. c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
  243. c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
  244. c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
  245. c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
  246. c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
  247. c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
  248. c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
  249. c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
  250. c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
  251. c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
  252. c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
  253. c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
  254. c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
  255. c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
  256. dspfunc(put_h264_qpel, 0, 16);
  257. dspfunc(avg_h264_qpel, 0, 16);
  258. #undef dspfunc
  259. }
  260. }
  261. #endif /* HAVE_ALTIVEC */
  262. }