You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

267 lines
14KB

  1. /*
  2. * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
  3. *
  4. * This library is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2 of the License, or (at your option) any later version.
  8. *
  9. * This library is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with this library; if not, write to the Free Software
  16. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17. */
  18. #include "../dsputil.h"
  19. #include "gcc_fixes.h"
  20. #include "dsputil_altivec.h"
  21. #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
  22. #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
  23. #define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
  24. #define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec
  25. #define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num
  26. #define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
  27. #define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
  28. #define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
  29. #define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num
  30. #define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec
  31. #define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
  32. #include "dsputil_h264_template_altivec.c"
  33. #undef OP_U8_ALTIVEC
  34. #undef PREFIX_h264_chroma_mc8_altivec
  35. #undef PREFIX_h264_chroma_mc8_num
  36. #undef PREFIX_h264_qpel16_h_lowpass_altivec
  37. #undef PREFIX_h264_qpel16_h_lowpass_num
  38. #undef PREFIX_h264_qpel16_v_lowpass_altivec
  39. #undef PREFIX_h264_qpel16_v_lowpass_num
  40. #undef PREFIX_h264_qpel16_hv_lowpass_altivec
  41. #undef PREFIX_h264_qpel16_hv_lowpass_num
  42. #define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
  43. #define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec
  44. #define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num
  45. #define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
  46. #define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
  47. #define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
  48. #define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num
  49. #define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec
  50. #define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
  51. #include "dsputil_h264_template_altivec.c"
  52. #undef OP_U8_ALTIVEC
  53. #undef PREFIX_h264_chroma_mc8_altivec
  54. #undef PREFIX_h264_chroma_mc8_num
  55. #undef PREFIX_h264_qpel16_h_lowpass_altivec
  56. #undef PREFIX_h264_qpel16_h_lowpass_num
  57. #undef PREFIX_h264_qpel16_v_lowpass_altivec
  58. #undef PREFIX_h264_qpel16_v_lowpass_num
  59. #undef PREFIX_h264_qpel16_hv_lowpass_altivec
  60. #undef PREFIX_h264_qpel16_hv_lowpass_num
  61. #define H264_MC(OPNAME, SIZE, CODETYPE) \
  62. static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\
  63. OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
  64. }\
  65. \
  66. static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
  67. uint64_t temp[SIZE*SIZE/8] __align16;\
  68. uint8_t * const half= (uint8_t*)temp;\
  69. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  70. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
  71. }\
  72. \
  73. static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  74. OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
  75. }\
  76. \
  77. static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  78. uint64_t temp[SIZE*SIZE/8] __align16;\
  79. uint8_t * const half= (uint8_t*)temp;\
  80. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  81. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
  82. }\
  83. \
  84. static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  85. uint64_t temp[SIZE*SIZE/8] __align16;\
  86. uint8_t * const half= (uint8_t*)temp;\
  87. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  88. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
  89. }\
  90. \
  91. static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  92. OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
  93. }\
  94. \
  95. static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  96. uint64_t temp[SIZE*SIZE/8] __align16;\
  97. uint8_t * const half= (uint8_t*)temp;\
  98. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  99. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
  100. }\
  101. \
  102. static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  103. uint64_t temp[SIZE*SIZE/4] __align16;\
  104. uint8_t * const halfH= (uint8_t*)temp;\
  105. uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\
  106. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  107. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  108. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  109. }\
  110. \
  111. static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  112. uint64_t temp[SIZE*SIZE/4] __align16;\
  113. uint8_t * const halfH= (uint8_t*)temp;\
  114. uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\
  115. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  116. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  117. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  118. }\
  119. \
  120. static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  121. uint64_t temp[SIZE*SIZE/4] __align16;\
  122. uint8_t * const halfH= (uint8_t*)temp;\
  123. uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\
  124. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  125. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  126. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  127. }\
  128. \
  129. static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  130. uint64_t temp[SIZE*SIZE/4] __align16;\
  131. uint8_t * const halfH= (uint8_t*)temp;\
  132. uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\
  133. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  134. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  135. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  136. }\
  137. \
  138. static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  139. uint64_t temp[SIZE*(SIZE+8)/4] __align16;\
  140. int16_t * const tmp= (int16_t*)temp;\
  141. OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
  142. }\
  143. \
  144. static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  145. uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4] __align16;\
  146. uint8_t * const halfH= (uint8_t*)temp;\
  147. uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\
  148. int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\
  149. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  150. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  151. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
  152. }\
  153. \
  154. static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  155. uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4] __align16;\
  156. uint8_t * const halfH= (uint8_t*)temp;\
  157. uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\
  158. int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\
  159. put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  160. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  161. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
  162. }\
  163. \
  164. static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  165. uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4] __align16;\
  166. uint8_t * const halfV= (uint8_t*)temp;\
  167. uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\
  168. int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\
  169. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  170. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  171. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
  172. }\
  173. \
  174. static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
  175. uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4] __align16;\
  176. uint8_t * const halfV= (uint8_t*)temp;\
  177. uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\
  178. int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\
  179. put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  180. put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  181. OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
  182. }\
  183. /* from dsputil.c */
  184. static inline void put_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
  185. int i;
  186. for (i = 0; i < h; i++) {
  187. uint32_t a, b;
  188. a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
  189. b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
  190. *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b);
  191. a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
  192. b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
  193. *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b);
  194. }
  195. } static inline void avg_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
  196. int i;
  197. for (i = 0; i < h; i++) {
  198. uint32_t a, b;
  199. a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
  200. b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
  201. *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b));
  202. a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
  203. b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
  204. *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b));
  205. }
  206. } static inline void put_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
  207. put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
  208. put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
  209. } static inline void avg_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
  210. avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
  211. avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
  212. }
  213. /* UNIMPLEMENTED YET !! */
  214. #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
  215. #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
  216. H264_MC(put_, 16, altivec)
  217. H264_MC(avg_, 16, altivec)
  218. void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
  219. #ifdef HAVE_ALTIVEC
  220. if (has_altivec()) {
  221. c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
  222. c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
  223. #define dspfunc(PFX, IDX, NUM) \
  224. c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
  225. c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
  226. c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
  227. c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
  228. c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
  229. c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
  230. c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
  231. c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
  232. c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
  233. c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
  234. c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
  235. c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
  236. c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
  237. c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
  238. c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
  239. c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
  240. dspfunc(put_h264_qpel, 0, 16);
  241. dspfunc(avg_h264_qpel, 0, 16);
  242. #undef dspfunc
  243. } else
  244. #endif /* HAVE_ALTIVEC */
  245. {
  246. // Non-AltiVec PPC optimisations
  247. // ... pending ...
  248. }
  249. }