You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

162 lines
5.3KB

  1. /*
  2. * MMI optimized DSP utils
  3. * Copyright (c) 2000, 2001 Fabrice Bellard.
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either
  8. * version 2 of the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. *
  19. * MMI optimization by Leon van Stuivenberg
  20. * clear_blocks_mmi() by BroadQ
  21. */
  22. #include "../dsputil.h"
  23. #include "mmi.h"
  24. void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block);
  25. void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block);
  26. void ff_mmi_idct(DCTELEM *block);
  27. static void clear_blocks_mmi(DCTELEM * blocks)
  28. {
  29. asm volatile(
  30. ".set noreorder \n"
  31. "addiu $9, %0, 768 \n"
  32. "nop \n"
  33. "1: \n"
  34. "sq $0, 0(%0) \n"
  35. "move $8, %0 \n"
  36. "addi %0, %0, 64 \n"
  37. "sq $0, 16($8) \n"
  38. "slt $10, %0, $9 \n"
  39. "sq $0, 32($8) \n"
  40. "bnez $10, 1b \n"
  41. "sq $0, 48($8) \n"
  42. ".set reorder \n"
  43. : "+r" (blocks) :: "$8", "$9", "memory" );
  44. }
  45. static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size)
  46. {
  47. asm volatile(
  48. ".set push \n\t"
  49. ".set mips3 \n\t"
  50. "ld $8, 0(%0) \n\t"
  51. "add %0, %0, %2 \n\t"
  52. "ld $9, 0(%0) \n\t"
  53. "add %0, %0, %2 \n\t"
  54. "ld $10, 0(%0) \n\t"
  55. "pextlb $8, $0, $8 \n\t"
  56. "sq $8, 0(%1) \n\t"
  57. "add %0, %0, %2 \n\t"
  58. "ld $8, 0(%0) \n\t"
  59. "pextlb $9, $0, $9 \n\t"
  60. "sq $9, 16(%1) \n\t"
  61. "add %0, %0, %2 \n\t"
  62. "ld $9, 0(%0) \n\t"
  63. "pextlb $10, $0, $10 \n\t"
  64. "sq $10, 32(%1) \n\t"
  65. "add %0, %0, %2 \n\t"
  66. "ld $10, 0(%0) \n\t"
  67. "pextlb $8, $0, $8 \n\t"
  68. "sq $8, 48(%1) \n\t"
  69. "add %0, %0, %2 \n\t"
  70. "ld $8, 0(%0) \n\t"
  71. "pextlb $9, $0, $9 \n\t"
  72. "sq $9, 64(%1) \n\t"
  73. "add %0, %0, %2 \n\t"
  74. "ld $9, 0(%0) \n\t"
  75. "pextlb $10, $0, $10 \n\t"
  76. "sq $10, 80(%1) \n\t"
  77. "pextlb $8, $0, $8 \n\t"
  78. "sq $8, 96(%1) \n\t"
  79. "pextlb $9, $0, $9 \n\t"
  80. "sq $9, 112(%1) \n\t"
  81. ".set pop \n\t"
  82. : "+r" (pixels) : "r" (block), "r" (line_size) : "$8", "$9", "$10", "memory" );
  83. }
  84. static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  85. {
  86. asm volatile(
  87. ".set push \n\t"
  88. ".set mips3 \n\t"
  89. "1: \n\t"
  90. "ldr $8, 0(%1) \n\t"
  91. "addiu %2, %2, -1 \n\t"
  92. "ldl $8, 7(%1) \n\t"
  93. "add %1, %1, %3 \n\t"
  94. "sd $8, 0(%0) \n\t"
  95. "add %0, %0, %3 \n\t"
  96. "bgtz %2, 1b \n\t"
  97. ".set pop \n\t"
  98. : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size)
  99. : "$8", "memory" );
  100. }
  101. static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  102. {
  103. asm volatile (
  104. ".set push \n\t"
  105. ".set mips3 \n\t"
  106. "1: \n\t"
  107. "ldr $8, 0(%1) \n\t"
  108. "add $11, %1, %3 \n\t"
  109. "ldl $8, 7(%1) \n\t"
  110. "add $10, %0, %3 \n\t"
  111. "ldr $9, 8(%1) \n\t"
  112. "ldl $9, 15(%1) \n\t"
  113. "ldr $12, 0($11) \n\t"
  114. "add %1, $11, %3 \n\t"
  115. "ldl $12, 7($11) \n\t"
  116. "pcpyld $8, $9, $8 \n\t"
  117. "sq $8, 0(%0) \n\t"
  118. "ldr $13, 8($11) \n\t"
  119. "addiu %2, %2, -2 \n\t"
  120. "ldl $13, 15($11) \n\t"
  121. "add %0, $10, %3 \n\t"
  122. "pcpyld $12, $13, $12 \n\t"
  123. "sq $12, 0($10) \n\t"
  124. "bgtz %2, 1b \n\t"
  125. ".set pop \n\t"
  126. : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size)
  127. : "$8", "$9", "$10", "$11", "$12", "$13", "memory" );
  128. }
  129. void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
  130. {
  131. const int idct_algo= avctx->idct_algo;
  132. c->clear_blocks = clear_blocks_mmi;
  133. c->put_pixels_tab[1][0] = put_pixels8_mmi;
  134. c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmi;
  135. c->put_pixels_tab[0][0] = put_pixels16_mmi;
  136. c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi;
  137. c->get_pixels = get_pixels_mmi;
  138. if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){
  139. c->idct_put= ff_mmi_idct_put;
  140. c->idct_add= ff_mmi_idct_add;
  141. c->idct = ff_mmi_idct;
  142. c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
  143. }
  144. }