You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

120 lines
3.5KB

  1. ;******************************************************************************
  2. ;* SIMD-optimized UTVideo functions
  3. ;* Copyright (c) 2017 Paul B Mahol
  4. ;*
  5. ;* This file is part of FFmpeg.
  6. ;*
  7. ;* FFmpeg is free software; you can redistribute it and/or
  8. ;* modify it under the terms of the GNU Lesser General Public
  9. ;* License as published by the Free Software Foundation; either
  10. ;* version 2.1 of the License, or (at your option) any later version.
  11. ;*
  12. ;* FFmpeg is distributed in the hope that it will be useful,
  13. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. ;* Lesser General Public License for more details.
  16. ;*
  17. ;* You should have received a copy of the GNU Lesser General Public
  18. ;* License along with FFmpeg; if not, write to the Free Software
  19. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. ;******************************************************************************
  21. %include "libavutil/x86/x86util.asm"
  22. SECTION_RODATA
  23. pb_128: times 16 db 128
  24. pw_512: times 8 dw 512
  25. pw_1023: times 8 dw 1023
  26. SECTION .text
  27. ; void restore_rgb_planes(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
  28. ; ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b,
  29. ; int width, int height)
  30. %macro RESTORE_RGB_PLANES 0
  31. cglobal restore_rgb_planes, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 4, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
  32. movsxdifnidn wq, wd
  33. add src_rq, wq
  34. add src_gq, wq
  35. add src_bq, wq
  36. neg wq
  37. %if ARCH_X86_64 == 0
  38. mov wm, wq
  39. DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
  40. %define wq r6m
  41. %define hd r7mp
  42. %endif
  43. mova m3, [pb_128]
  44. .nextrow:
  45. mov xq, wq
  46. .loop:
  47. mova m0, [src_rq + xq]
  48. mova m1, [src_gq + xq]
  49. mova m2, [src_bq + xq]
  50. psubb m1, m3
  51. paddb m0, m1
  52. paddb m2, m1
  53. mova [src_rq+xq], m0
  54. mova [src_bq+xq], m2
  55. add xq, mmsize
  56. jl .loop
  57. add src_rq, linesize_rq
  58. add src_gq, linesize_gq
  59. add src_bq, linesize_bq
  60. sub hd, 1
  61. jg .nextrow
  62. REP_RET
  63. %endmacro
  64. INIT_XMM sse2
  65. RESTORE_RGB_PLANES
  66. %macro RESTORE_RGB_PLANES10 0
  67. cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
  68. shl wd, 1
  69. shl linesize_rq, 1
  70. shl linesize_gq, 1
  71. shl linesize_bq, 1
  72. add src_rq, wq
  73. add src_gq, wq
  74. add src_bq, wq
  75. mova m3, [pw_512]
  76. mova m4, [pw_1023]
  77. neg wq
  78. %if ARCH_X86_64 == 0
  79. mov wm, wq
  80. DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
  81. %define wq r6m
  82. %define hd r7mp
  83. %endif
  84. .nextrow:
  85. mov xq, wq
  86. .loop:
  87. mova m0, [src_rq + xq]
  88. mova m1, [src_gq + xq]
  89. mova m2, [src_bq + xq]
  90. psubw m1, m3
  91. paddw m0, m1
  92. paddw m2, m1
  93. pand m0, m4
  94. pand m2, m4
  95. mova [src_rq+xq], m0
  96. mova [src_bq+xq], m2
  97. add xq, mmsize
  98. jl .loop
  99. add src_rq, linesize_rq
  100. add src_gq, linesize_gq
  101. add src_bq, linesize_bq
  102. sub hd, 1
  103. jg .nextrow
  104. REP_RET
  105. %endmacro
  106. INIT_XMM sse2
  107. RESTORE_RGB_PLANES10