You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

107 lines
3.0KB

  1. ;******************************************************************************
  2. ;* Copyright (c) 2012 Loren Merritt
  3. ;*
  4. ;* This file is part of Libav.
  5. ;*
  6. ;* Libav is free software; you can redistribute it and/or
  7. ;* modify it under the terms of the GNU Lesser General Public
  8. ;* License as published by the Free Software Foundation; either
  9. ;* version 2.1 of the License, or (at your option) any later version.
  10. ;*
  11. ;* Libav is distributed in the hope that it will be useful,
  12. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. ;* Lesser General Public License for more details.
  15. ;*
  16. ;* You should have received a copy of the GNU Lesser General Public
  17. ;* License along with Libav; if not, write to the Free Software
  18. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. ;******************************************************************************
  20. %include "x86inc.asm"
  21. SECTION .text
  22. %macro LOWPASS 3 ; prevsample, cursample, lut
  23. sub %1q, %2q
  24. %if lut_bits != 8
  25. sar %1q, 8-lut_bits
  26. %endif
  27. movsx %1d, word [%3q+%1q*2]
  28. add %1d, %2d
  29. %endmacro
  30. %macro LOAD 3 ; dstreg, x, bitdepth
  31. %if %3 == 8
  32. movzx %1, byte [srcq+%2]
  33. %else
  34. movzx %1, word [srcq+(%2)*2]
  35. %endif
  36. %if %3 != 16
  37. shl %1, 16-%3
  38. %endif
  39. %endmacro
  40. %macro HQDN3D_ROW 1 ; bitdepth
  41. %if ARCH_X86_64
  42. cglobal hqdn3d_row_%1_x86, 7,10,0, src, dst, lineant, frameant, width, spatial, temporal, pixelant, t0, t1
  43. %else
  44. cglobal hqdn3d_row_%1_x86, 7,7,0, src, dst, lineant, frameant, width, spatial, temporal
  45. %endif
  46. %assign bytedepth (%1+7)>>3
  47. %assign lut_bits 4+4*(%1/16)
  48. dec widthq
  49. lea srcq, [srcq+widthq*bytedepth]
  50. lea dstq, [dstq+widthq*bytedepth]
  51. lea frameantq, [frameantq+widthq*2]
  52. lea lineantq, [lineantq+widthq*2]
  53. neg widthq
  54. %define xq widthq
  55. %if ARCH_X86_32
  56. mov dstmp, dstq
  57. mov srcmp, srcq
  58. mov frameantmp, frameantq
  59. mov lineantmp, lineantq
  60. %define dstq r0
  61. %define frameantq r0
  62. %define lineantq r0
  63. %define pixelantq r1
  64. %define pixelantd r1d
  65. DECLARE_REG_TMP 2,3
  66. %endif
  67. LOAD pixelantd, xq, %1
  68. ALIGN 16
  69. .loop:
  70. movifnidn srcq, srcmp
  71. LOAD t0d, xq+1, %1 ; skip on the last iteration to avoid overread
  72. .loop2:
  73. movifnidn lineantq, lineantmp
  74. movzx t1d, word [lineantq+xq*2]
  75. LOWPASS t1, pixelant, spatial
  76. mov [lineantq+xq*2], t1w
  77. LOWPASS pixelant, t0, spatial
  78. movifnidn frameantq, frameantmp
  79. movzx t0d, word [frameantq+xq*2]
  80. LOWPASS t0, t1, temporal
  81. mov [frameantq+xq*2], t0w
  82. movifnidn dstq, dstmp
  83. %if %1 != 16
  84. add t0d, (1<<(15-%1))-1
  85. shr t0d, 16-%1 ; could eliminate this by storing from t0h, but only with some contraints on register allocation
  86. %endif
  87. %if %1 == 8
  88. mov [dstq+xq], t0b
  89. %else
  90. mov [dstq+xq*2], t0w
  91. %endif
  92. inc xq
  93. jl .loop
  94. je .loop2
  95. REP_RET
  96. %endmacro ; HQDN3D_ROW
  97. HQDN3D_ROW 8
  98. HQDN3D_ROW 9
  99. HQDN3D_ROW 10
  100. HQDN3D_ROW 16