You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

258 lines
14KB

  1. /*
  2. * ARM NEON optimised DSP functions
  3. * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include <stdint.h>
  22. #include "libavcodec/avcodec.h"
  23. #include "libavcodec/dsputil.h"
  24. void ff_put_pixels16_neon(uint8_t *, const uint8_t *, int, int);
  25. void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, int, int);
  26. void ff_put_pixels16_y2_neon(uint8_t *, const uint8_t *, int, int);
  27. void ff_put_pixels16_xy2_neon(uint8_t *, const uint8_t *, int, int);
  28. void ff_put_pixels8_neon(uint8_t *, const uint8_t *, int, int);
  29. void ff_put_pixels8_x2_neon(uint8_t *, const uint8_t *, int, int);
  30. void ff_put_pixels8_y2_neon(uint8_t *, const uint8_t *, int, int);
  31. void ff_put_pixels8_xy2_neon(uint8_t *, const uint8_t *, int, int);
  32. void ff_put_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
  33. void ff_put_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
  34. void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
  35. void ff_put_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
  36. void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
  37. void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
  38. void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, int, int);
  39. void ff_put_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int);
  40. void ff_put_h264_qpel16_mc10_neon(uint8_t *, uint8_t *, int);
  41. void ff_put_h264_qpel16_mc20_neon(uint8_t *, uint8_t *, int);
  42. void ff_put_h264_qpel16_mc30_neon(uint8_t *, uint8_t *, int);
  43. void ff_put_h264_qpel16_mc01_neon(uint8_t *, uint8_t *, int);
  44. void ff_put_h264_qpel16_mc11_neon(uint8_t *, uint8_t *, int);
  45. void ff_put_h264_qpel16_mc21_neon(uint8_t *, uint8_t *, int);
  46. void ff_put_h264_qpel16_mc31_neon(uint8_t *, uint8_t *, int);
  47. void ff_put_h264_qpel16_mc02_neon(uint8_t *, uint8_t *, int);
  48. void ff_put_h264_qpel16_mc12_neon(uint8_t *, uint8_t *, int);
  49. void ff_put_h264_qpel16_mc22_neon(uint8_t *, uint8_t *, int);
  50. void ff_put_h264_qpel16_mc32_neon(uint8_t *, uint8_t *, int);
  51. void ff_put_h264_qpel16_mc03_neon(uint8_t *, uint8_t *, int);
  52. void ff_put_h264_qpel16_mc13_neon(uint8_t *, uint8_t *, int);
  53. void ff_put_h264_qpel16_mc23_neon(uint8_t *, uint8_t *, int);
  54. void ff_put_h264_qpel16_mc33_neon(uint8_t *, uint8_t *, int);
  55. void ff_put_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int);
  56. void ff_put_h264_qpel8_mc10_neon(uint8_t *, uint8_t *, int);
  57. void ff_put_h264_qpel8_mc20_neon(uint8_t *, uint8_t *, int);
  58. void ff_put_h264_qpel8_mc30_neon(uint8_t *, uint8_t *, int);
  59. void ff_put_h264_qpel8_mc01_neon(uint8_t *, uint8_t *, int);
  60. void ff_put_h264_qpel8_mc11_neon(uint8_t *, uint8_t *, int);
  61. void ff_put_h264_qpel8_mc21_neon(uint8_t *, uint8_t *, int);
  62. void ff_put_h264_qpel8_mc31_neon(uint8_t *, uint8_t *, int);
  63. void ff_put_h264_qpel8_mc02_neon(uint8_t *, uint8_t *, int);
  64. void ff_put_h264_qpel8_mc12_neon(uint8_t *, uint8_t *, int);
  65. void ff_put_h264_qpel8_mc22_neon(uint8_t *, uint8_t *, int);
  66. void ff_put_h264_qpel8_mc32_neon(uint8_t *, uint8_t *, int);
  67. void ff_put_h264_qpel8_mc03_neon(uint8_t *, uint8_t *, int);
  68. void ff_put_h264_qpel8_mc13_neon(uint8_t *, uint8_t *, int);
  69. void ff_put_h264_qpel8_mc23_neon(uint8_t *, uint8_t *, int);
  70. void ff_put_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int);
  71. void ff_avg_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int);
  72. void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
  73. void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
  74. void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
  75. void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
  76. void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
  77. int beta, int8_t *tc0);
  78. void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
  79. int beta, int8_t *tc0);
  80. void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
  81. int beta, int8_t *tc0);
  82. void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
  83. int beta, int8_t *tc0);
  84. void ff_weight_h264_pixels_16x16_neon(uint8_t *ds, int stride, int log2_den,
  85. int weight, int offset);
  86. void ff_weight_h264_pixels_16x8_neon(uint8_t *ds, int stride, int log2_den,
  87. int weight, int offset);
  88. void ff_weight_h264_pixels_8x16_neon(uint8_t *ds, int stride, int log2_den,
  89. int weight, int offset);
  90. void ff_weight_h264_pixels_8x8_neon(uint8_t *ds, int stride, int log2_den,
  91. int weight, int offset);
  92. void ff_weight_h264_pixels_8x4_neon(uint8_t *ds, int stride, int log2_den,
  93. int weight, int offset);
  94. void ff_weight_h264_pixels_4x8_neon(uint8_t *ds, int stride, int log2_den,
  95. int weight, int offset);
  96. void ff_weight_h264_pixels_4x4_neon(uint8_t *ds, int stride, int log2_den,
  97. int weight, int offset);
  98. void ff_weight_h264_pixels_4x2_neon(uint8_t *ds, int stride, int log2_den,
  99. int weight, int offset);
  100. void ff_biweight_h264_pixels_16x16_neon(uint8_t *dst, uint8_t *src, int stride,
  101. int log2_den, int weightd, int weights,
  102. int offset);
  103. void ff_biweight_h264_pixels_16x8_neon(uint8_t *dst, uint8_t *src, int stride,
  104. int log2_den, int weightd, int weights,
  105. int offset);
  106. void ff_biweight_h264_pixels_8x16_neon(uint8_t *dst, uint8_t *src, int stride,
  107. int log2_den, int weightd, int weights,
  108. int offset);
  109. void ff_biweight_h264_pixels_8x8_neon(uint8_t *dst, uint8_t *src, int stride,
  110. int log2_den, int weightd, int weights,
  111. int offset);
  112. void ff_biweight_h264_pixels_8x4_neon(uint8_t *dst, uint8_t *src, int stride,
  113. int log2_den, int weightd, int weights,
  114. int offset);
  115. void ff_biweight_h264_pixels_4x8_neon(uint8_t *dst, uint8_t *src, int stride,
  116. int log2_den, int weightd, int weights,
  117. int offset);
  118. void ff_biweight_h264_pixels_4x4_neon(uint8_t *dst, uint8_t *src, int stride,
  119. int log2_den, int weightd, int weights,
  120. int offset);
  121. void ff_biweight_h264_pixels_4x2_neon(uint8_t *dst, uint8_t *src, int stride,
  122. int log2_den, int weightd, int weights,
  123. int offset);
  124. void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
  125. void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
  126. void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset,
  127. DCTELEM *block, int stride,
  128. const uint8_t nnzc[6*8]);
  129. void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset,
  130. DCTELEM *block, int stride,
  131. const uint8_t nnzc[6*8]);
  132. void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset,
  133. DCTELEM *block, int stride,
  134. const uint8_t nnzc[6*8]);
  135. void ff_vector_fmul_neon(float *dst, const float *src, int len);
  136. void ff_vector_fmul_window_neon(float *dst, const float *src0,
  137. const float *src1, const float *win,
  138. float add_bias, int len);
  139. void ff_float_to_int16_neon(int16_t *, const float *, long);
  140. void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
  141. void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
  142. {
  143. c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
  144. c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;
  145. c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon;
  146. c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon;
  147. c->put_pixels_tab[1][0] = ff_put_pixels8_neon;
  148. c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon;
  149. c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon;
  150. c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon;
  151. c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon;
  152. c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon;
  153. c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon;
  154. c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon;
  155. c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon;
  156. c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon;
  157. c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon;
  158. c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;
  159. c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
  160. c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
  161. c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
  162. c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
  163. c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
  164. c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon;
  165. c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon;
  166. c->put_h264_qpel_pixels_tab[0][ 2] = ff_put_h264_qpel16_mc20_neon;
  167. c->put_h264_qpel_pixels_tab[0][ 3] = ff_put_h264_qpel16_mc30_neon;
  168. c->put_h264_qpel_pixels_tab[0][ 4] = ff_put_h264_qpel16_mc01_neon;
  169. c->put_h264_qpel_pixels_tab[0][ 5] = ff_put_h264_qpel16_mc11_neon;
  170. c->put_h264_qpel_pixels_tab[0][ 6] = ff_put_h264_qpel16_mc21_neon;
  171. c->put_h264_qpel_pixels_tab[0][ 7] = ff_put_h264_qpel16_mc31_neon;
  172. c->put_h264_qpel_pixels_tab[0][ 8] = ff_put_h264_qpel16_mc02_neon;
  173. c->put_h264_qpel_pixels_tab[0][ 9] = ff_put_h264_qpel16_mc12_neon;
  174. c->put_h264_qpel_pixels_tab[0][10] = ff_put_h264_qpel16_mc22_neon;
  175. c->put_h264_qpel_pixels_tab[0][11] = ff_put_h264_qpel16_mc32_neon;
  176. c->put_h264_qpel_pixels_tab[0][12] = ff_put_h264_qpel16_mc03_neon;
  177. c->put_h264_qpel_pixels_tab[0][13] = ff_put_h264_qpel16_mc13_neon;
  178. c->put_h264_qpel_pixels_tab[0][14] = ff_put_h264_qpel16_mc23_neon;
  179. c->put_h264_qpel_pixels_tab[0][15] = ff_put_h264_qpel16_mc33_neon;
  180. c->put_h264_qpel_pixels_tab[1][ 0] = ff_put_h264_qpel8_mc00_neon;
  181. c->put_h264_qpel_pixels_tab[1][ 1] = ff_put_h264_qpel8_mc10_neon;
  182. c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_neon;
  183. c->put_h264_qpel_pixels_tab[1][ 3] = ff_put_h264_qpel8_mc30_neon;
  184. c->put_h264_qpel_pixels_tab[1][ 4] = ff_put_h264_qpel8_mc01_neon;
  185. c->put_h264_qpel_pixels_tab[1][ 5] = ff_put_h264_qpel8_mc11_neon;
  186. c->put_h264_qpel_pixels_tab[1][ 6] = ff_put_h264_qpel8_mc21_neon;
  187. c->put_h264_qpel_pixels_tab[1][ 7] = ff_put_h264_qpel8_mc31_neon;
  188. c->put_h264_qpel_pixels_tab[1][ 8] = ff_put_h264_qpel8_mc02_neon;
  189. c->put_h264_qpel_pixels_tab[1][ 9] = ff_put_h264_qpel8_mc12_neon;
  190. c->put_h264_qpel_pixels_tab[1][10] = ff_put_h264_qpel8_mc22_neon;
  191. c->put_h264_qpel_pixels_tab[1][11] = ff_put_h264_qpel8_mc32_neon;
  192. c->put_h264_qpel_pixels_tab[1][12] = ff_put_h264_qpel8_mc03_neon;
  193. c->put_h264_qpel_pixels_tab[1][13] = ff_put_h264_qpel8_mc13_neon;
  194. c->put_h264_qpel_pixels_tab[1][14] = ff_put_h264_qpel8_mc23_neon;
  195. c->put_h264_qpel_pixels_tab[1][15] = ff_put_h264_qpel8_mc33_neon;
  196. c->avg_h264_qpel_pixels_tab[0][ 0] = ff_avg_h264_qpel16_mc00_neon;
  197. c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
  198. c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
  199. c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
  200. c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
  201. c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16x16_neon;
  202. c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_16x8_neon;
  203. c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_8x16_neon;
  204. c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels_8x8_neon;
  205. c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels_8x4_neon;
  206. c->weight_h264_pixels_tab[5] = ff_weight_h264_pixels_4x8_neon;
  207. c->weight_h264_pixels_tab[6] = ff_weight_h264_pixels_4x4_neon;
  208. c->weight_h264_pixels_tab[7] = ff_weight_h264_pixels_4x2_neon;
  209. c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16x16_neon;
  210. c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_16x8_neon;
  211. c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_8x16_neon;
  212. c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels_8x8_neon;
  213. c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels_8x4_neon;
  214. c->biweight_h264_pixels_tab[5] = ff_biweight_h264_pixels_4x8_neon;
  215. c->biweight_h264_pixels_tab[6] = ff_biweight_h264_pixels_4x4_neon;
  216. c->biweight_h264_pixels_tab[7] = ff_biweight_h264_pixels_4x2_neon;
  217. c->h264_idct_add = ff_h264_idct_add_neon;
  218. c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
  219. c->h264_idct_add16 = ff_h264_idct_add16_neon;
  220. c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
  221. c->h264_idct_add8 = ff_h264_idct_add8_neon;
  222. c->vector_fmul = ff_vector_fmul_neon;
  223. c->vector_fmul_window = ff_vector_fmul_window_neon;
  224. if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
  225. c->float_to_int16 = ff_float_to_int16_neon;
  226. c->float_to_int16_interleave = ff_float_to_int16_interleave_neon;
  227. }
  228. }