You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

228 lines
8.2KB

  1. /*
  2. * ARM optimized DSP utils
  3. * Copyright (c) 2001 Lionel Ulmer
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavcodec/dsputil.h"
  22. #if HAVE_IPP
  23. #include <ipp.h>
  24. #endif
  25. void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
  26. void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx);
  27. void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx);
  28. void j_rev_dct_ARM(DCTELEM *data);
  29. void simple_idct_ARM(DCTELEM *data);
  30. void simple_idct_armv5te(DCTELEM *data);
  31. void simple_idct_put_armv5te(uint8_t *dest, int line_size, DCTELEM *data);
  32. void simple_idct_add_armv5te(uint8_t *dest, int line_size, DCTELEM *data);
  33. void ff_simple_idct_armv6(DCTELEM *data);
  34. void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data);
  35. void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data);
  36. void ff_simple_idct_neon(DCTELEM *data);
  37. void ff_simple_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
  38. void ff_simple_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data);
  39. void ff_vp3_idct_neon(DCTELEM *data);
  40. void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
  41. void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data);
  42. /* XXX: local hack */
  43. static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
  44. static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
  45. void put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  46. void put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  47. void put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  48. void put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  49. void put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  50. void put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  51. void put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  52. void put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  53. void ff_prefetch_arm(void *mem, int stride, int h);
  54. CALL_2X_PIXELS(put_pixels16_x2_arm , put_pixels8_x2_arm , 8)
  55. CALL_2X_PIXELS(put_pixels16_y2_arm , put_pixels8_y2_arm , 8)
  56. CALL_2X_PIXELS(put_pixels16_xy2_arm, put_pixels8_xy2_arm, 8)
  57. CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm , put_no_rnd_pixels8_x2_arm , 8)
  58. CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm , put_no_rnd_pixels8_y2_arm , 8)
  59. CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8)
  60. void ff_add_pixels_clamped_ARM(short *block, unsigned char *dest,
  61. int line_size);
  62. /* XXX: those functions should be suppressed ASAP when all IDCTs are
  63. converted */
  64. static void j_rev_dct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
  65. {
  66. j_rev_dct_ARM (block);
  67. ff_put_pixels_clamped(block, dest, line_size);
  68. }
  69. static void j_rev_dct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
  70. {
  71. j_rev_dct_ARM (block);
  72. ff_add_pixels_clamped(block, dest, line_size);
  73. }
  74. static void simple_idct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
  75. {
  76. simple_idct_ARM (block);
  77. ff_put_pixels_clamped(block, dest, line_size);
  78. }
  79. static void simple_idct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
  80. {
  81. simple_idct_ARM (block);
  82. ff_add_pixels_clamped(block, dest, line_size);
  83. }
  84. #if HAVE_IPP
  85. static void simple_idct_ipp(DCTELEM *block)
  86. {
  87. ippiDCT8x8Inv_Video_16s_C1I(block);
  88. }
  89. static void simple_idct_ipp_put(uint8_t *dest, int line_size, DCTELEM *block)
  90. {
  91. ippiDCT8x8Inv_Video_16s8u_C1R(block, dest, line_size);
  92. }
  93. void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size);
  94. static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block)
  95. {
  96. ippiDCT8x8Inv_Video_16s_C1I(block);
  97. #if HAVE_IWMMXT
  98. add_pixels_clamped_iwmmxt(block, dest, line_size);
  99. #else
  100. ff_add_pixels_clamped_ARM(block, dest, line_size);
  101. #endif
  102. }
  103. #endif
  104. int mm_support(void)
  105. {
  106. return HAVE_IWMMXT * FF_MM_IWMMXT;
  107. }
  108. void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
  109. {
  110. int idct_algo= avctx->idct_algo;
  111. ff_put_pixels_clamped = c->put_pixels_clamped;
  112. ff_add_pixels_clamped = c->add_pixels_clamped;
  113. if (avctx->lowres == 0) {
  114. if(idct_algo == FF_IDCT_AUTO){
  115. #if HAVE_IPP
  116. idct_algo = FF_IDCT_IPP;
  117. #elif HAVE_NEON
  118. idct_algo = FF_IDCT_SIMPLENEON;
  119. #elif HAVE_ARMV6
  120. idct_algo = FF_IDCT_SIMPLEARMV6;
  121. #elif HAVE_ARMV5TE
  122. idct_algo = FF_IDCT_SIMPLEARMV5TE;
  123. #else
  124. idct_algo = FF_IDCT_ARM;
  125. #endif
  126. }
  127. if(idct_algo==FF_IDCT_ARM){
  128. c->idct_put= j_rev_dct_ARM_put;
  129. c->idct_add= j_rev_dct_ARM_add;
  130. c->idct = j_rev_dct_ARM;
  131. c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
  132. } else if (idct_algo==FF_IDCT_SIMPLEARM){
  133. c->idct_put= simple_idct_ARM_put;
  134. c->idct_add= simple_idct_ARM_add;
  135. c->idct = simple_idct_ARM;
  136. c->idct_permutation_type= FF_NO_IDCT_PERM;
  137. #if HAVE_ARMV6
  138. } else if (idct_algo==FF_IDCT_SIMPLEARMV6){
  139. c->idct_put= ff_simple_idct_put_armv6;
  140. c->idct_add= ff_simple_idct_add_armv6;
  141. c->idct = ff_simple_idct_armv6;
  142. c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
  143. #endif
  144. #if HAVE_ARMV5TE
  145. } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){
  146. c->idct_put= simple_idct_put_armv5te;
  147. c->idct_add= simple_idct_add_armv5te;
  148. c->idct = simple_idct_armv5te;
  149. c->idct_permutation_type = FF_NO_IDCT_PERM;
  150. #endif
  151. #if HAVE_IPP
  152. } else if (idct_algo==FF_IDCT_IPP){
  153. c->idct_put= simple_idct_ipp_put;
  154. c->idct_add= simple_idct_ipp_add;
  155. c->idct = simple_idct_ipp;
  156. c->idct_permutation_type= FF_NO_IDCT_PERM;
  157. #endif
  158. #if HAVE_NEON
  159. } else if (idct_algo==FF_IDCT_SIMPLENEON){
  160. c->idct_put= ff_simple_idct_put_neon;
  161. c->idct_add= ff_simple_idct_add_neon;
  162. c->idct = ff_simple_idct_neon;
  163. c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM;
  164. } else if ((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) &&
  165. idct_algo==FF_IDCT_VP3){
  166. c->idct_put= ff_vp3_idct_put_neon;
  167. c->idct_add= ff_vp3_idct_add_neon;
  168. c->idct = ff_vp3_idct_neon;
  169. c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
  170. #endif
  171. }
  172. }
  173. c->put_pixels_tab[0][0] = put_pixels16_arm;
  174. c->put_pixels_tab[0][1] = put_pixels16_x2_arm;
  175. c->put_pixels_tab[0][2] = put_pixels16_y2_arm;
  176. c->put_pixels_tab[0][3] = put_pixels16_xy2_arm;
  177. c->put_no_rnd_pixels_tab[0][0] = put_pixels16_arm;
  178. c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_arm;
  179. c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_arm;
  180. c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_arm;
  181. c->put_pixels_tab[1][0] = put_pixels8_arm;
  182. c->put_pixels_tab[1][1] = put_pixels8_x2_arm;
  183. c->put_pixels_tab[1][2] = put_pixels8_y2_arm;
  184. c->put_pixels_tab[1][3] = put_pixels8_xy2_arm;
  185. c->put_no_rnd_pixels_tab[1][0] = put_pixels8_arm;
  186. c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_arm;
  187. c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm;
  188. c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm;
  189. #if HAVE_ARMV5TE
  190. c->prefetch = ff_prefetch_arm;
  191. #endif
  192. #if HAVE_IWMMXT
  193. dsputil_init_iwmmxt(c, avctx);
  194. #endif
  195. #if HAVE_ARMVFP
  196. ff_float_init_arm_vfp(c, avctx);
  197. #endif
  198. #if HAVE_NEON
  199. ff_dsputil_init_neon(c, avctx);
  200. #endif
  201. }