You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

225 lines
10KB

  1. /*
  2. * Loongson SIMD optimized idctdsp
  3. *
  4. * Copyright (c) 2015 Loongson Technology Corporation Limited
  5. * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
  6. *
  7. * This file is part of FFmpeg.
  8. *
  9. * FFmpeg is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2.1 of the License, or (at your option) any later version.
  13. *
  14. * FFmpeg is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with FFmpeg; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. */
  23. #include "idctdsp_mips.h"
  24. #include "constants.h"
  25. #include "libavutil/mips/mmiutils.h"
  26. void ff_put_pixels_clamped_mmi(const int16_t *block,
  27. uint8_t *av_restrict pixels, ptrdiff_t line_size)
  28. {
  29. double ftmp[8];
  30. mips_reg addr[1];
  31. DECLARE_VAR_ALL64;
  32. DECLARE_VAR_ADDRT;
  33. __asm__ volatile (
  34. MMI_LDC1(%[ftmp0], %[block], 0x00)
  35. MMI_LDC1(%[ftmp1], %[block], 0x08)
  36. MMI_LDC1(%[ftmp2], %[block], 0x10)
  37. MMI_LDC1(%[ftmp3], %[block], 0x18)
  38. MMI_LDC1(%[ftmp4], %[block], 0x20)
  39. MMI_LDC1(%[ftmp5], %[block], 0x28)
  40. MMI_LDC1(%[ftmp6], %[block], 0x30)
  41. MMI_LDC1(%[ftmp7], %[block], 0x38)
  42. PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
  43. "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
  44. "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
  45. "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
  46. "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
  47. MMI_SDC1(%[ftmp0], %[pixels], 0x00)
  48. MMI_SDC1(%[ftmp2], %[addr0], 0x00)
  49. MMI_SDXC1(%[ftmp4], %[addr0], %[line_size], 0x00)
  50. MMI_SDXC1(%[ftmp6], %[pixels], %[line_sizex3], 0x00)
  51. : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
  52. [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
  53. [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
  54. [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
  55. RESTRICT_ASM_ALL64
  56. RESTRICT_ASM_ADDRT
  57. [addr0]"=&r"(addr[0]),
  58. [pixels]"+&r"(pixels)
  59. : [line_size]"r"((mips_reg)line_size),
  60. [line_sizex3]"r"((mips_reg)(line_size*3)),
  61. [block]"r"(block)
  62. : "memory"
  63. );
  64. pixels += line_size*4;
  65. block += 32;
  66. __asm__ volatile (
  67. MMI_LDC1(%[ftmp0], %[block], 0x00)
  68. MMI_LDC1(%[ftmp1], %[block], 0x08)
  69. MMI_LDC1(%[ftmp2], %[block], 0x10)
  70. MMI_LDC1(%[ftmp3], %[block], 0x18)
  71. MMI_LDC1(%[ftmp4], %[block], 0x20)
  72. MMI_LDC1(%[ftmp5], %[block], 0x28)
  73. MMI_LDC1(%[ftmp6], %[block], 0x30)
  74. MMI_LDC1(%[ftmp7], %[block], 0x38)
  75. PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
  76. "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
  77. "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
  78. "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
  79. "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
  80. MMI_SDC1(%[ftmp0], %[pixels], 0x00)
  81. MMI_SDC1(%[ftmp2], %[addr0], 0x00)
  82. MMI_SDXC1(%[ftmp4], %[addr0], %[line_size], 0x00)
  83. MMI_SDXC1(%[ftmp6], %[pixels], %[line_sizex3], 0x00)
  84. : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
  85. [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
  86. [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
  87. [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
  88. RESTRICT_ASM_ALL64
  89. RESTRICT_ASM_ADDRT
  90. [addr0]"=&r"(addr[0]),
  91. [pixels]"+&r"(pixels)
  92. : [line_size]"r"((mips_reg)line_size),
  93. [line_sizex3]"r"((mips_reg)(line_size*3)),
  94. [block]"r"(block)
  95. : "memory"
  96. );
  97. }
  98. void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
  99. uint8_t *av_restrict pixels, ptrdiff_t line_size)
  100. {
  101. int64_t line_skip = line_size;
  102. int64_t line_skip3 = 0;
  103. double ftmp[5];
  104. mips_reg addr[1];
  105. DECLARE_VAR_ALL64;
  106. DECLARE_VAR_ADDRT;
  107. __asm__ volatile (
  108. PTR_ADDU "%[line_skip3], %[line_skip], %[line_skip] \n\t"
  109. MMI_LDC1(%[ftmp1], %[block], 0x00)
  110. MMI_LDC1(%[ftmp0], %[block], 0x08)
  111. "packsshb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
  112. MMI_LDC1(%[ftmp2], %[block], 0x10)
  113. MMI_LDC1(%[ftmp0], %[block], 0x18)
  114. "packsshb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
  115. MMI_LDC1(%[ftmp3], %[block], 0x20)
  116. MMI_LDC1(%[ftmp0], %[block], 0x28)
  117. "packsshb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
  118. MMI_LDC1(%[ftmp4], %[block], 0x30)
  119. MMI_LDC1(%[ftmp0], %[block], 0x38)
  120. "packsshb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
  121. "paddb %[ftmp1], %[ftmp1], %[ff_pb_80] \n\t"
  122. "paddb %[ftmp2], %[ftmp2], %[ff_pb_80] \n\t"
  123. "paddb %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
  124. "paddb %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
  125. MMI_SDC1(%[ftmp1], %[pixels], 0x00)
  126. MMI_SDXC1(%[ftmp2], %[pixels], %[line_skip], 0x00)
  127. MMI_SDXC1(%[ftmp3], %[pixels], %[line_skip3], 0x00)
  128. PTR_ADDU "%[line_skip3], %[line_skip3], %[line_skip] \n\t"
  129. MMI_SDXC1(%[ftmp4], %[pixels], %[line_skip3], 0x00)
  130. PTR_ADDU "%[addr0], %[line_skip3], %[line_skip] \n\t"
  131. PTR_ADDU "%[pixels], %[pixels], %[addr0] \n\t"
  132. MMI_LDC1(%[ftmp1], %[block], 0x40)
  133. MMI_LDC1(%[ftmp0], %[block], 0x48)
  134. "packsshb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
  135. MMI_LDC1(%[ftmp2], %[block], 0x50)
  136. MMI_LDC1(%[ftmp0], %[block], 0x58)
  137. "packsshb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
  138. MMI_LDC1(%[ftmp3], %[block], 0x60)
  139. MMI_LDC1(%[ftmp0], %[block], 0x68)
  140. "packsshb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
  141. MMI_LDC1(%[ftmp4], %[block], 0x70)
  142. MMI_LDC1(%[ftmp0], %[block], 0x78)
  143. "packsshb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
  144. "paddb %[ftmp1], %[ftmp1], %[ff_pb_80] \n\t"
  145. "paddb %[ftmp2], %[ftmp2], %[ff_pb_80] \n\t"
  146. "paddb %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
  147. "paddb %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
  148. MMI_SDC1(%[ftmp1], %[pixels], 0x00)
  149. MMI_SDXC1(%[ftmp2], %[pixels], %[line_skip], 0x00)
  150. PTR_ADDU "%[addr0], %[line_skip], %[line_skip] \n\t"
  151. MMI_SDXC1(%[ftmp3], %[pixels], %[addr0], 0x00)
  152. MMI_SDXC1(%[ftmp4], %[pixels], %[line_skip3], 0x00)
  153. : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
  154. [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
  155. [ftmp4]"=&f"(ftmp[4]),
  156. RESTRICT_ASM_ALL64
  157. RESTRICT_ASM_ADDRT
  158. [addr0]"=&r"(addr[0]),
  159. [pixels]"+&r"(pixels), [line_skip3]"+&r"(line_skip3)
  160. : [block]"r"(block),
  161. [line_skip]"r"((mips_reg)line_skip),
  162. [ff_pb_80]"f"(ff_pb_80)
  163. : "memory"
  164. );
  165. }
  166. void ff_add_pixels_clamped_mmi(const int16_t *block,
  167. uint8_t *av_restrict pixels, ptrdiff_t line_size)
  168. {
  169. double ftmp[8];
  170. uint64_t tmp[1];
  171. mips_reg addr[1];
  172. DECLARE_VAR_ALL64;
  173. DECLARE_VAR_ADDRT;
  174. __asm__ volatile (
  175. "li %[tmp0], 0x04 \n\t"
  176. "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
  177. "1: \n\t"
  178. MMI_LDC1(%[ftmp1], %[block], 0x00)
  179. MMI_LDC1(%[ftmp2], %[block], 0x08)
  180. MMI_LDC1(%[ftmp3], %[block], 0x10)
  181. MMI_LDC1(%[ftmp4], %[block], 0x18)
  182. MMI_LDC1(%[ftmp5], %[pixels], 0x00)
  183. MMI_LDXC1(%[ftmp6], %[pixels], %[line_size], 0x00)
  184. "mov.d %[ftmp7], %[ftmp5] \n\t"
  185. "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
  186. "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
  187. "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
  188. "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
  189. "mov.d %[ftmp7], %[ftmp6] \n\t"
  190. "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
  191. "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
  192. "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
  193. "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
  194. "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
  195. "packushb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
  196. MMI_SDC1(%[ftmp1], %[pixels], 0x00)
  197. MMI_SDXC1(%[ftmp3], %[pixels], %[line_size], 0x00)
  198. "addi %[tmp0], %[tmp0], -0x01 \n\t"
  199. PTR_ADDIU "%[block], %[block], 0x20 \n\t"
  200. PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
  201. PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
  202. "bnez %[tmp0], 1b"
  203. : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
  204. [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
  205. [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
  206. [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
  207. [tmp0]"=&r"(tmp[0]),
  208. RESTRICT_ASM_ALL64
  209. RESTRICT_ASM_ADDRT
  210. [addr0]"=&r"(addr[0]),
  211. [pixels]"+&r"(pixels), [block]"+&r"(block)
  212. : [line_size]"r"((mips_reg)line_size)
  213. : "memory"
  214. );
  215. }