You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

136 lines
6.2KB

  1. /*
  2. * Loongson SIMD optimized pixblockdsp
  3. *
  4. * Copyright (c) 2015 Loongson Technology Corporation Limited
  5. * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
  6. *
  7. * This file is part of FFmpeg.
  8. *
  9. * FFmpeg is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2.1 of the License, or (at your option) any later version.
  13. *
  14. * FFmpeg is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with FFmpeg; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. */
  23. #include "pixblockdsp_mips.h"
  24. #include "libavutil/mips/asmdefs.h"
  25. #include "libavutil/mips/mmiutils.h"
  26. void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
  27. ptrdiff_t line_size)
  28. {
  29. double ftmp[7];
  30. DECLARE_VAR_ALL64;
  31. DECLARE_VAR_ADDRT;
  32. __asm__ volatile (
  33. "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
  34. MMI_LDC1(%[ftmp1], %[pixels], 0x00)
  35. MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
  36. "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
  37. "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
  38. "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t"
  39. "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t"
  40. MMI_SDC1(%[ftmp3], %[block], 0x00)
  41. MMI_SDC1(%[ftmp4], %[block], 0x08)
  42. MMI_SDC1(%[ftmp5], %[block], 0x10)
  43. MMI_SDC1(%[ftmp6], %[block], 0x18)
  44. PTR_ADDU "%[pixels], %[pixels], %[line_size_x2] \n\t"
  45. MMI_LDC1(%[ftmp1], %[pixels], 0x00)
  46. MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
  47. "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
  48. "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
  49. "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t"
  50. "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t"
  51. MMI_SDC1(%[ftmp3], %[block], 0x20)
  52. MMI_SDC1(%[ftmp4], %[block], 0x28)
  53. MMI_SDC1(%[ftmp5], %[block], 0x30)
  54. MMI_SDC1(%[ftmp6], %[block], 0x38)
  55. PTR_ADDU "%[pixels], %[pixels], %[line_size_x2] \n\t"
  56. MMI_LDC1(%[ftmp1], %[pixels], 0x00)
  57. MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
  58. "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
  59. "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
  60. "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t"
  61. "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t"
  62. MMI_SDC1(%[ftmp3], %[block], 0x40)
  63. MMI_SDC1(%[ftmp4], %[block], 0x48)
  64. MMI_SDC1(%[ftmp5], %[block], 0x50)
  65. MMI_SDC1(%[ftmp6], %[block], 0x58)
  66. PTR_ADDU "%[pixels], %[pixels], %[line_size_x2] \n\t"
  67. MMI_LDC1(%[ftmp1], %[pixels], 0x00)
  68. MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
  69. "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
  70. "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
  71. "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t"
  72. "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t"
  73. MMI_SDC1(%[ftmp3], %[block], 0x60)
  74. MMI_SDC1(%[ftmp4], %[block], 0x68)
  75. MMI_SDC1(%[ftmp5], %[block], 0x70)
  76. MMI_SDC1(%[ftmp6], %[block], 0x78)
  77. : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
  78. [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
  79. [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
  80. [ftmp6]"=&f"(ftmp[6]),
  81. RESTRICT_ASM_ALL64
  82. RESTRICT_ASM_ADDRT
  83. [pixels]"+&r"(pixels)
  84. : [block]"r"((mips_reg)block), [line_size]"r"((mips_reg)line_size),
  85. [line_size_x2]"r"((mips_reg)(line_size<<1))
  86. : "memory"
  87. );
  88. }
  89. void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1,
  90. const uint8_t *src2, int stride)
  91. {
  92. double ftmp[5];
  93. mips_reg tmp[1];
  94. DECLARE_VAR_ALL64;
  95. __asm__ volatile (
  96. "li %[tmp0], 0x08 \n\t"
  97. "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
  98. "1: \n\t"
  99. MMI_LDC1(%[ftmp0], %[src1], 0x00)
  100. "or %[ftmp1], %[ftmp0], %[ftmp0] \n\t"
  101. MMI_LDC1(%[ftmp2], %[src2], 0x00)
  102. "or %[ftmp3], %[ftmp2], %[ftmp2] \n\t"
  103. "punpcklbh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
  104. "punpckhbh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
  105. "punpcklbh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
  106. "punpckhbh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
  107. "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
  108. "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
  109. MMI_SDC1(%[ftmp0], %[block], 0x00)
  110. MMI_SDC1(%[ftmp1], %[block], 0x08)
  111. PTR_ADDI "%[tmp0], %[tmp0], -0x01 \n\t"
  112. PTR_ADDIU "%[block], %[block], 0x10 \n\t"
  113. PTR_ADDU "%[src1], %[src1], %[stride] \n\t"
  114. PTR_ADDU "%[src2], %[src2], %[stride] \n\t"
  115. "bgtz %[tmp0], 1b \n\t"
  116. : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
  117. [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
  118. [ftmp4]"=&f"(ftmp[4]),
  119. [tmp0]"=&r"(tmp[0]),
  120. RESTRICT_ASM_ALL64
  121. [block]"+&r"(block), [src1]"+&r"(src1),
  122. [src2]"+&r"(src2)
  123. : [stride]"r"((mips_reg)stride)
  124. : "memory"
  125. );
  126. }