You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

193 lines
7.3KB

  1. /*
  2. * ARM optimized DSP utils
  3. * Copyright (c) 2001 Lionel Ulmer
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavcodec/dsputil.h"
  22. void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
  23. void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx);
  24. void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx);
  25. void j_rev_dct_ARM(DCTELEM *data);
  26. void simple_idct_ARM(DCTELEM *data);
  27. void simple_idct_armv5te(DCTELEM *data);
  28. void simple_idct_put_armv5te(uint8_t *dest, int line_size, DCTELEM *data);
  29. void simple_idct_add_armv5te(uint8_t *dest, int line_size, DCTELEM *data);
  30. void ff_simple_idct_armv6(DCTELEM *data);
  31. void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data);
  32. void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data);
  33. void ff_simple_idct_neon(DCTELEM *data);
  34. void ff_simple_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
  35. void ff_simple_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data);
  36. void ff_vp3_idct_neon(DCTELEM *data);
  37. void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
  38. void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data);
  39. /* XXX: local hack */
  40. static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
  41. static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
  42. void put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  43. void put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  44. void put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  45. void put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  46. void put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  47. void put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  48. void put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  49. void put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  50. void ff_prefetch_arm(void *mem, int stride, int h);
  51. CALL_2X_PIXELS(put_pixels16_x2_arm , put_pixels8_x2_arm , 8)
  52. CALL_2X_PIXELS(put_pixels16_y2_arm , put_pixels8_y2_arm , 8)
  53. CALL_2X_PIXELS(put_pixels16_xy2_arm, put_pixels8_xy2_arm, 8)
  54. CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm , put_no_rnd_pixels8_x2_arm , 8)
  55. CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm , put_no_rnd_pixels8_y2_arm , 8)
  56. CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8)
  57. void ff_add_pixels_clamped_ARM(short *block, unsigned char *dest,
  58. int line_size);
  59. /* XXX: those functions should be suppressed ASAP when all IDCTs are
  60. converted */
  61. static void j_rev_dct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
  62. {
  63. j_rev_dct_ARM (block);
  64. ff_put_pixels_clamped(block, dest, line_size);
  65. }
  66. static void j_rev_dct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
  67. {
  68. j_rev_dct_ARM (block);
  69. ff_add_pixels_clamped(block, dest, line_size);
  70. }
  71. static void simple_idct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
  72. {
  73. simple_idct_ARM (block);
  74. ff_put_pixels_clamped(block, dest, line_size);
  75. }
  76. static void simple_idct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
  77. {
  78. simple_idct_ARM (block);
  79. ff_add_pixels_clamped(block, dest, line_size);
  80. }
  81. int mm_support(void)
  82. {
  83. return HAVE_IWMMXT * FF_MM_IWMMXT;
  84. }
  85. void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
  86. {
  87. int idct_algo= avctx->idct_algo;
  88. ff_put_pixels_clamped = c->put_pixels_clamped;
  89. ff_add_pixels_clamped = c->add_pixels_clamped;
  90. if (avctx->lowres == 0) {
  91. if(idct_algo == FF_IDCT_AUTO){
  92. #if HAVE_NEON
  93. idct_algo = FF_IDCT_SIMPLENEON;
  94. #elif HAVE_ARMV6
  95. idct_algo = FF_IDCT_SIMPLEARMV6;
  96. #elif HAVE_ARMV5TE
  97. idct_algo = FF_IDCT_SIMPLEARMV5TE;
  98. #else
  99. idct_algo = FF_IDCT_ARM;
  100. #endif
  101. }
  102. if(idct_algo==FF_IDCT_ARM){
  103. c->idct_put= j_rev_dct_ARM_put;
  104. c->idct_add= j_rev_dct_ARM_add;
  105. c->idct = j_rev_dct_ARM;
  106. c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
  107. } else if (idct_algo==FF_IDCT_SIMPLEARM){
  108. c->idct_put= simple_idct_ARM_put;
  109. c->idct_add= simple_idct_ARM_add;
  110. c->idct = simple_idct_ARM;
  111. c->idct_permutation_type= FF_NO_IDCT_PERM;
  112. #if HAVE_ARMV6
  113. } else if (idct_algo==FF_IDCT_SIMPLEARMV6){
  114. c->idct_put= ff_simple_idct_put_armv6;
  115. c->idct_add= ff_simple_idct_add_armv6;
  116. c->idct = ff_simple_idct_armv6;
  117. c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
  118. #endif
  119. #if HAVE_ARMV5TE
  120. } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){
  121. c->idct_put= simple_idct_put_armv5te;
  122. c->idct_add= simple_idct_add_armv5te;
  123. c->idct = simple_idct_armv5te;
  124. c->idct_permutation_type = FF_NO_IDCT_PERM;
  125. #endif
  126. #if HAVE_NEON
  127. } else if (idct_algo==FF_IDCT_SIMPLENEON){
  128. c->idct_put= ff_simple_idct_put_neon;
  129. c->idct_add= ff_simple_idct_add_neon;
  130. c->idct = ff_simple_idct_neon;
  131. c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM;
  132. } else if ((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) &&
  133. idct_algo==FF_IDCT_VP3){
  134. c->idct_put= ff_vp3_idct_put_neon;
  135. c->idct_add= ff_vp3_idct_add_neon;
  136. c->idct = ff_vp3_idct_neon;
  137. c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
  138. #endif
  139. }
  140. }
  141. c->put_pixels_tab[0][0] = put_pixels16_arm;
  142. c->put_pixels_tab[0][1] = put_pixels16_x2_arm;
  143. c->put_pixels_tab[0][2] = put_pixels16_y2_arm;
  144. c->put_pixels_tab[0][3] = put_pixels16_xy2_arm;
  145. c->put_no_rnd_pixels_tab[0][0] = put_pixels16_arm;
  146. c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_arm;
  147. c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_arm;
  148. c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_arm;
  149. c->put_pixels_tab[1][0] = put_pixels8_arm;
  150. c->put_pixels_tab[1][1] = put_pixels8_x2_arm;
  151. c->put_pixels_tab[1][2] = put_pixels8_y2_arm;
  152. c->put_pixels_tab[1][3] = put_pixels8_xy2_arm;
  153. c->put_no_rnd_pixels_tab[1][0] = put_pixels8_arm;
  154. c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_arm;
  155. c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm;
  156. c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm;
  157. #if HAVE_ARMV5TE
  158. c->prefetch = ff_prefetch_arm;
  159. #endif
  160. #if HAVE_IWMMXT
  161. dsputil_init_iwmmxt(c, avctx);
  162. #endif
  163. #if HAVE_ARMVFP
  164. ff_float_init_arm_vfp(c, avctx);
  165. #endif
  166. #if HAVE_NEON
  167. ff_dsputil_init_neon(c, avctx);
  168. #endif
  169. }