You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

278 lines
11KB

  1. /*
  2. * ARMv4L optimized DSP utils
  3. * Copyright (c) 2001 Lionel Ulmer.
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "../dsputil.h"
  22. #ifdef HAVE_IPP
  23. #include "ipp.h"
  24. #endif
  25. extern void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
  26. extern void j_rev_dct_ARM(DCTELEM *data);
  27. extern void simple_idct_ARM(DCTELEM *data);
  28. extern void simple_idct_armv5te(DCTELEM *data);
  29. extern void simple_idct_put_armv5te(uint8_t *dest, int line_size,
  30. DCTELEM *data);
  31. extern void simple_idct_add_armv5te(uint8_t *dest, int line_size,
  32. DCTELEM *data);
  33. extern void ff_simple_idct_armv6(DCTELEM *data);
  34. extern void ff_simple_idct_put_armv6(uint8_t *dest, int line_size,
  35. DCTELEM *data);
  36. extern void ff_simple_idct_add_armv6(uint8_t *dest, int line_size,
  37. DCTELEM *data);
  38. /* XXX: local hack */
  39. static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
  40. static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
  41. void put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  42. void put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  43. void put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  44. void put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  45. void put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  46. void put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  47. void put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  48. void put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  49. CALL_2X_PIXELS(put_pixels16_x2_arm , put_pixels8_x2_arm , 8)
  50. CALL_2X_PIXELS(put_pixels16_y2_arm , put_pixels8_y2_arm , 8)
  51. CALL_2X_PIXELS(put_pixels16_xy2_arm, put_pixels8_xy2_arm, 8)
  52. CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm , put_no_rnd_pixels8_x2_arm , 8)
  53. CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm , put_no_rnd_pixels8_y2_arm , 8)
  54. CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8)
  55. static void add_pixels_clamped_ARM(short *block, unsigned char *dest, int line_size)
  56. {
  57. asm volatile (
  58. "mov r10, #8 \n\t"
  59. "1: \n\t"
  60. /* load dest */
  61. "ldr r4, [%1] \n\t"
  62. /* block[0] and block[1]*/
  63. "ldrsh r5, [%0] \n\t"
  64. "ldrsh r7, [%0, #2] \n\t"
  65. "and r6, r4, #0xFF \n\t"
  66. "and r8, r4, #0xFF00 \n\t"
  67. "add r6, r5, r6 \n\t"
  68. "add r8, r7, r8, lsr #8 \n\t"
  69. "mvn r5, r5 \n\t"
  70. "mvn r7, r7 \n\t"
  71. "tst r6, #0x100 \n\t"
  72. "movne r6, r5, lsr #24 \n\t"
  73. "tst r8, #0x100 \n\t"
  74. "movne r8, r7, lsr #24 \n\t"
  75. "mov r9, r6 \n\t"
  76. "ldrsh r5, [%0, #4] \n\t" /* moved form [A] */
  77. "orr r9, r9, r8, lsl #8 \n\t"
  78. /* block[2] and block[3] */
  79. /* [A] */
  80. "ldrsh r7, [%0, #6] \n\t"
  81. "and r6, r4, #0xFF0000 \n\t"
  82. "and r8, r4, #0xFF000000 \n\t"
  83. "add r6, r5, r6, lsr #16 \n\t"
  84. "add r8, r7, r8, lsr #24 \n\t"
  85. "mvn r5, r5 \n\t"
  86. "mvn r7, r7 \n\t"
  87. "tst r6, #0x100 \n\t"
  88. "movne r6, r5, lsr #24 \n\t"
  89. "tst r8, #0x100 \n\t"
  90. "movne r8, r7, lsr #24 \n\t"
  91. "orr r9, r9, r6, lsl #16 \n\t"
  92. "ldr r4, [%1, #4] \n\t" /* moved form [B] */
  93. "orr r9, r9, r8, lsl #24 \n\t"
  94. /* store dest */
  95. "ldrsh r5, [%0, #8] \n\t" /* moved form [C] */
  96. "str r9, [%1] \n\t"
  97. /* load dest */
  98. /* [B] */
  99. /* block[4] and block[5] */
  100. /* [C] */
  101. "ldrsh r7, [%0, #10] \n\t"
  102. "and r6, r4, #0xFF \n\t"
  103. "and r8, r4, #0xFF00 \n\t"
  104. "add r6, r5, r6 \n\t"
  105. "add r8, r7, r8, lsr #8 \n\t"
  106. "mvn r5, r5 \n\t"
  107. "mvn r7, r7 \n\t"
  108. "tst r6, #0x100 \n\t"
  109. "movne r6, r5, lsr #24 \n\t"
  110. "tst r8, #0x100 \n\t"
  111. "movne r8, r7, lsr #24 \n\t"
  112. "mov r9, r6 \n\t"
  113. "ldrsh r5, [%0, #12] \n\t" /* moved from [D] */
  114. "orr r9, r9, r8, lsl #8 \n\t"
  115. /* block[6] and block[7] */
  116. /* [D] */
  117. "ldrsh r7, [%0, #14] \n\t"
  118. "and r6, r4, #0xFF0000 \n\t"
  119. "and r8, r4, #0xFF000000 \n\t"
  120. "add r6, r5, r6, lsr #16 \n\t"
  121. "add r8, r7, r8, lsr #24 \n\t"
  122. "mvn r5, r5 \n\t"
  123. "mvn r7, r7 \n\t"
  124. "tst r6, #0x100 \n\t"
  125. "movne r6, r5, lsr #24 \n\t"
  126. "tst r8, #0x100 \n\t"
  127. "movne r8, r7, lsr #24 \n\t"
  128. "orr r9, r9, r6, lsl #16 \n\t"
  129. "add %0, %0, #16 \n\t" /* moved from [E] */
  130. "orr r9, r9, r8, lsl #24 \n\t"
  131. "subs r10, r10, #1 \n\t" /* moved from [F] */
  132. /* store dest */
  133. "str r9, [%1, #4] \n\t"
  134. /* [E] */
  135. /* [F] */
  136. "add %1, %1, %2 \n\t"
  137. "bne 1b \n\t"
  138. : "+r"(block),
  139. "+r"(dest)
  140. : "r"(line_size)
  141. : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" );
  142. }
  143. /* XXX: those functions should be suppressed ASAP when all IDCTs are
  144. converted */
  145. static void j_rev_dct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
  146. {
  147. j_rev_dct_ARM (block);
  148. ff_put_pixels_clamped(block, dest, line_size);
  149. }
  150. static void j_rev_dct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
  151. {
  152. j_rev_dct_ARM (block);
  153. ff_add_pixels_clamped(block, dest, line_size);
  154. }
  155. static void simple_idct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
  156. {
  157. simple_idct_ARM (block);
  158. ff_put_pixels_clamped(block, dest, line_size);
  159. }
  160. static void simple_idct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
  161. {
  162. simple_idct_ARM (block);
  163. ff_add_pixels_clamped(block, dest, line_size);
  164. }
  165. #ifdef HAVE_IPP
  166. static void simple_idct_ipp(DCTELEM *block)
  167. {
  168. ippiDCT8x8Inv_Video_16s_C1I(block);
  169. }
  170. static void simple_idct_ipp_put(uint8_t *dest, int line_size, DCTELEM *block)
  171. {
  172. ippiDCT8x8Inv_Video_16s8u_C1R(block, dest, line_size);
  173. }
  174. void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size);
  175. static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block)
  176. {
  177. ippiDCT8x8Inv_Video_16s_C1I(block);
  178. #ifdef HAVE_IWMMXT
  179. add_pixels_clamped_iwmmxt(block, dest, line_size);
  180. #else
  181. add_pixels_clamped_ARM(block, dest, line_size);
  182. #endif
  183. }
  184. #endif
  185. void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
  186. {
  187. int idct_algo= avctx->idct_algo;
  188. ff_put_pixels_clamped = c->put_pixels_clamped;
  189. ff_add_pixels_clamped = c->add_pixels_clamped;
  190. if(idct_algo == FF_IDCT_AUTO){
  191. #if defined(HAVE_IPP)
  192. idct_algo = FF_IDCT_IPP;
  193. #elif defined(HAVE_ARMV6)
  194. idct_algo = FF_IDCT_SIMPLEARMV6;
  195. #elif defined(HAVE_ARMV5TE)
  196. idct_algo = FF_IDCT_SIMPLEARMV5TE;
  197. #else
  198. idct_algo = FF_IDCT_ARM;
  199. #endif
  200. }
  201. if(idct_algo==FF_IDCT_ARM){
  202. c->idct_put= j_rev_dct_ARM_put;
  203. c->idct_add= j_rev_dct_ARM_add;
  204. c->idct = j_rev_dct_ARM;
  205. c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
  206. } else if (idct_algo==FF_IDCT_SIMPLEARM){
  207. c->idct_put= simple_idct_ARM_put;
  208. c->idct_add= simple_idct_ARM_add;
  209. c->idct = simple_idct_ARM;
  210. c->idct_permutation_type= FF_NO_IDCT_PERM;
  211. #ifdef HAVE_ARMV6
  212. } else if (idct_algo==FF_IDCT_SIMPLEARMV6){
  213. c->idct_put= ff_simple_idct_put_armv6;
  214. c->idct_add= ff_simple_idct_add_armv6;
  215. c->idct = ff_simple_idct_armv6;
  216. c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
  217. #endif
  218. #ifdef HAVE_ARMV5TE
  219. } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){
  220. c->idct_put= simple_idct_put_armv5te;
  221. c->idct_add= simple_idct_add_armv5te;
  222. c->idct = simple_idct_armv5te;
  223. c->idct_permutation_type = FF_NO_IDCT_PERM;
  224. #endif
  225. #ifdef HAVE_IPP
  226. } else if (idct_algo==FF_IDCT_IPP){
  227. c->idct_put= simple_idct_ipp_put;
  228. c->idct_add= simple_idct_ipp_add;
  229. c->idct = simple_idct_ipp;
  230. c->idct_permutation_type= FF_NO_IDCT_PERM;
  231. #endif
  232. }
  233. /* c->put_pixels_tab[0][0] = put_pixels16_arm; */ // NG!
  234. c->put_pixels_tab[0][1] = put_pixels16_x2_arm; //OK!
  235. c->put_pixels_tab[0][2] = put_pixels16_y2_arm; //OK!
  236. /* c->put_pixels_tab[0][3] = put_pixels16_xy2_arm; /\* NG *\/ */
  237. /* c->put_no_rnd_pixels_tab[0][0] = put_pixels16_arm; */
  238. c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_arm; // OK
  239. c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_arm; //OK
  240. /* c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_arm; //NG */
  241. c->put_pixels_tab[1][0] = put_pixels8_arm; //OK
  242. c->put_pixels_tab[1][1] = put_pixels8_x2_arm; //OK
  243. /* c->put_pixels_tab[1][2] = put_pixels8_y2_arm; //NG */
  244. /* c->put_pixels_tab[1][3] = put_pixels8_xy2_arm; //NG */
  245. c->put_no_rnd_pixels_tab[1][0] = put_pixels8_arm;//OK
  246. c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_arm; //OK
  247. c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm; //OK
  248. /* c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm;//NG */
  249. #ifdef HAVE_IWMMXT
  250. dsputil_init_iwmmxt(c, avctx);
  251. #endif
  252. }