You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

122 lines
4.8KB

  1. /*
  2. * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
  3. *
  4. * This file is part of Libav.
  5. *
  6. * Libav is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * Libav is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with Libav; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "libavutil/arm/asm.S"
  21. .macro vp6_edge_filter
  22. vdup.16 q3, r2 @ t
  23. vmov.i16 q13, #1
  24. vsubl.u8 q0, d20, d18 @ p[ 0] - p[-s]
  25. vsubl.u8 q1, d16, d22 @ p[-2*s] - p[ s]
  26. vsubl.u8 q14, d21, d19
  27. vsubl.u8 q15, d17, d23
  28. vadd.i16 q2, q0, q0 @ 2*(p[0]-p[-s])
  29. vadd.i16 d29, d28, d28
  30. vadd.i16 q0, q0, q1 @ p[0]-p[-s] + p[-2*s]-p[s]
  31. vadd.i16 d28, d28, d30
  32. vadd.i16 q0, q0, q2 @ 3*(p[0]-p[-s]) + p[-2*s]-p[s]
  33. vadd.i16 d28, d28, d29
  34. vrshr.s16 q0, q0, #3 @ v
  35. vrshr.s16 d28, d28, #3
  36. vsub.i16 q8, q3, q13 @ t-1
  37. vabs.s16 q1, q0 @ V
  38. vshr.s16 q2, q0, #15 @ s
  39. vabs.s16 d30, d28
  40. vshr.s16 d29, d28, #15
  41. vsub.i16 q12, q1, q3 @ V-t
  42. vsub.i16 d31, d30, d6
  43. vsub.i16 q12, q12, q13 @ V-t-1
  44. vsub.i16 d31, d31, d26
  45. vcge.u16 q12, q12, q8 @ V-t-1 >= t-1
  46. vcge.u16 d31, d31, d16
  47. vadd.i16 q13, q3, q3 @ 2*t
  48. vadd.i16 d16, d6, d6
  49. vsub.i16 q13, q13, q1 @ 2*t - V
  50. vsub.i16 d16, d16, d30
  51. vadd.i16 q13, q13, q2 @ += s
  52. vadd.i16 d16, d16, d29
  53. veor q13, q13, q2 @ ^= s
  54. veor d16, d16, d29
  55. vbif q0, q13, q12
  56. vbif d28, d16, d31
  57. vmovl.u8 q1, d20
  58. vmovl.u8 q15, d21
  59. vaddw.u8 q2, q0, d18
  60. vaddw.u8 q3, q14, d19
  61. vsub.i16 q1, q1, q0
  62. vsub.i16 d30, d30, d28
  63. vqmovun.s16 d18, q2
  64. vqmovun.s16 d19, q3
  65. vqmovun.s16 d20, q1
  66. vqmovun.s16 d21, q15
  67. .endm
  68. function ff_vp6_edge_filter_ver_neon, export=1
  69. sub r0, r0, r1, lsl #1
  70. vld1.8 {q8}, [r0], r1 @ p[-2*s]
  71. vld1.8 {q9}, [r0], r1 @ p[-s]
  72. vld1.8 {q10}, [r0], r1 @ p[0]
  73. vld1.8 {q11}, [r0] @ p[s]
  74. vp6_edge_filter
  75. sub r0, r0, r1, lsl #1
  76. sub r1, r1, #8
  77. vst1.8 {d18}, [r0]!
  78. vst1.32 {d19[0]}, [r0], r1
  79. vst1.8 {d20}, [r0]!
  80. vst1.32 {d21[0]}, [r0]
  81. bx lr
  82. endfunc
  83. function ff_vp6_edge_filter_hor_neon, export=1
  84. sub r3, r0, #1
  85. sub r0, r0, #2
  86. vld1.32 {d16[0]}, [r0], r1
  87. vld1.32 {d18[0]}, [r0], r1
  88. vld1.32 {d20[0]}, [r0], r1
  89. vld1.32 {d22[0]}, [r0], r1
  90. vld1.32 {d16[1]}, [r0], r1
  91. vld1.32 {d18[1]}, [r0], r1
  92. vld1.32 {d20[1]}, [r0], r1
  93. vld1.32 {d22[1]}, [r0], r1
  94. vld1.32 {d17[0]}, [r0], r1
  95. vld1.32 {d19[0]}, [r0], r1
  96. vld1.32 {d21[0]}, [r0], r1
  97. vld1.32 {d23[0]}, [r0], r1
  98. vtrn.8 q8, q9
  99. vtrn.8 q10, q11
  100. vtrn.16 q8, q10
  101. vtrn.16 q9, q11
  102. vp6_edge_filter
  103. vtrn.8 q9, q10
  104. vst1.16 {d18[0]}, [r3], r1
  105. vst1.16 {d20[0]}, [r3], r1
  106. vst1.16 {d18[1]}, [r3], r1
  107. vst1.16 {d20[1]}, [r3], r1
  108. vst1.16 {d18[2]}, [r3], r1
  109. vst1.16 {d20[2]}, [r3], r1
  110. vst1.16 {d18[3]}, [r3], r1
  111. vst1.16 {d20[3]}, [r3], r1
  112. vst1.16 {d19[0]}, [r3], r1
  113. vst1.16 {d21[0]}, [r3], r1
  114. vst1.16 {d19[1]}, [r3], r1
  115. vst1.16 {d21[1]}, [r3], r1
  116. bx lr
  117. endfunc