You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

201 lines
7.2KB

  1. /*
  2. * ARMv4L optimized DSP utils
  3. * Copyright (c) 2001 Lionel Ulmer.
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavcodec/dsputil.h"
  22. #ifdef HAVE_IPP
  23. #include <ipp.h>
  24. #endif
  25. void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
  26. void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx);
  27. void j_rev_dct_ARM(DCTELEM *data);
  28. void simple_idct_ARM(DCTELEM *data);
  29. void simple_idct_armv5te(DCTELEM *data);
  30. void simple_idct_put_armv5te(uint8_t *dest, int line_size, DCTELEM *data);
  31. void simple_idct_add_armv5te(uint8_t *dest, int line_size, DCTELEM *data);
  32. void ff_simple_idct_armv6(DCTELEM *data);
  33. void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data);
  34. void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data);
  35. /* XXX: local hack */
  36. static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
  37. static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
  38. void put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  39. void put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  40. void put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  41. void put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  42. void put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  43. void put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  44. void put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  45. void put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  46. void ff_prefetch_arm(void *mem, int stride, int h);
  47. CALL_2X_PIXELS(put_pixels16_x2_arm , put_pixels8_x2_arm , 8)
  48. CALL_2X_PIXELS(put_pixels16_y2_arm , put_pixels8_y2_arm , 8)
  49. CALL_2X_PIXELS(put_pixels16_xy2_arm, put_pixels8_xy2_arm, 8)
  50. CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm , put_no_rnd_pixels8_x2_arm , 8)
  51. CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm , put_no_rnd_pixels8_y2_arm , 8)
  52. CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8)
  53. void ff_add_pixels_clamped_ARM(short *block, unsigned char *dest,
  54. int line_size);
  55. /* XXX: those functions should be suppressed ASAP when all IDCTs are
  56. converted */
  57. static void j_rev_dct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
  58. {
  59. j_rev_dct_ARM (block);
  60. ff_put_pixels_clamped(block, dest, line_size);
  61. }
  62. static void j_rev_dct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
  63. {
  64. j_rev_dct_ARM (block);
  65. ff_add_pixels_clamped(block, dest, line_size);
  66. }
  67. static void simple_idct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
  68. {
  69. simple_idct_ARM (block);
  70. ff_put_pixels_clamped(block, dest, line_size);
  71. }
  72. static void simple_idct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
  73. {
  74. simple_idct_ARM (block);
  75. ff_add_pixels_clamped(block, dest, line_size);
  76. }
  77. #ifdef HAVE_IPP
  78. static void simple_idct_ipp(DCTELEM *block)
  79. {
  80. ippiDCT8x8Inv_Video_16s_C1I(block);
  81. }
  82. static void simple_idct_ipp_put(uint8_t *dest, int line_size, DCTELEM *block)
  83. {
  84. ippiDCT8x8Inv_Video_16s8u_C1R(block, dest, line_size);
  85. }
  86. void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size);
  87. static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block)
  88. {
  89. ippiDCT8x8Inv_Video_16s_C1I(block);
  90. #ifdef HAVE_IWMMXT
  91. add_pixels_clamped_iwmmxt(block, dest, line_size);
  92. #else
  93. ff_add_pixels_clamped_ARM(block, dest, line_size);
  94. #endif
  95. }
  96. #endif
  97. int mm_support(void)
  98. {
  99. return ENABLE_IWMMXT * FF_MM_IWMMXT;
  100. }
  101. void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
  102. {
  103. int idct_algo= avctx->idct_algo;
  104. ff_put_pixels_clamped = c->put_pixels_clamped;
  105. ff_add_pixels_clamped = c->add_pixels_clamped;
  106. if (avctx->lowres == 0) {
  107. if(idct_algo == FF_IDCT_AUTO){
  108. #if defined(HAVE_IPP)
  109. idct_algo = FF_IDCT_IPP;
  110. #elif defined(HAVE_ARMV6)
  111. idct_algo = FF_IDCT_SIMPLEARMV6;
  112. #elif defined(HAVE_ARMV5TE)
  113. idct_algo = FF_IDCT_SIMPLEARMV5TE;
  114. #else
  115. idct_algo = FF_IDCT_ARM;
  116. #endif
  117. }
  118. if(idct_algo==FF_IDCT_ARM){
  119. c->idct_put= j_rev_dct_ARM_put;
  120. c->idct_add= j_rev_dct_ARM_add;
  121. c->idct = j_rev_dct_ARM;
  122. c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
  123. } else if (idct_algo==FF_IDCT_SIMPLEARM){
  124. c->idct_put= simple_idct_ARM_put;
  125. c->idct_add= simple_idct_ARM_add;
  126. c->idct = simple_idct_ARM;
  127. c->idct_permutation_type= FF_NO_IDCT_PERM;
  128. #ifdef HAVE_ARMV6
  129. } else if (idct_algo==FF_IDCT_SIMPLEARMV6){
  130. c->idct_put= ff_simple_idct_put_armv6;
  131. c->idct_add= ff_simple_idct_add_armv6;
  132. c->idct = ff_simple_idct_armv6;
  133. c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
  134. #endif
  135. #ifdef HAVE_ARMV5TE
  136. } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){
  137. c->idct_put= simple_idct_put_armv5te;
  138. c->idct_add= simple_idct_add_armv5te;
  139. c->idct = simple_idct_armv5te;
  140. c->idct_permutation_type = FF_NO_IDCT_PERM;
  141. #endif
  142. #ifdef HAVE_IPP
  143. } else if (idct_algo==FF_IDCT_IPP){
  144. c->idct_put= simple_idct_ipp_put;
  145. c->idct_add= simple_idct_ipp_add;
  146. c->idct = simple_idct_ipp;
  147. c->idct_permutation_type= FF_NO_IDCT_PERM;
  148. #endif
  149. }
  150. }
  151. c->put_pixels_tab[0][0] = put_pixels16_arm;
  152. c->put_pixels_tab[0][1] = put_pixels16_x2_arm;
  153. c->put_pixels_tab[0][2] = put_pixels16_y2_arm;
  154. c->put_pixels_tab[0][3] = put_pixels16_xy2_arm;
  155. c->put_no_rnd_pixels_tab[0][0] = put_pixels16_arm;
  156. c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_arm;
  157. c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_arm;
  158. c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_arm;
  159. c->put_pixels_tab[1][0] = put_pixels8_arm;
  160. c->put_pixels_tab[1][1] = put_pixels8_x2_arm;
  161. c->put_pixels_tab[1][2] = put_pixels8_y2_arm;
  162. c->put_pixels_tab[1][3] = put_pixels8_xy2_arm;
  163. c->put_no_rnd_pixels_tab[1][0] = put_pixels8_arm;
  164. c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_arm;
  165. c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm;
  166. c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm;
  167. #ifdef HAVE_ARMV5TE
  168. c->prefetch = ff_prefetch_arm;
  169. #endif
  170. #ifdef HAVE_IWMMXT
  171. dsputil_init_iwmmxt(c, avctx);
  172. #endif
  173. #ifdef HAVE_ARMVFP
  174. ff_float_init_arm_vfp(c, avctx);
  175. #endif
  176. }