You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

224 lines
15KB

  1. /*
  2. * DSP utils
  3. * Copyright (c) 2000, 2001 Fabrice Bellard
  4. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  5. *
  6. * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
  7. *
  8. * This file is part of Libav.
  9. *
  10. * Libav is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Lesser General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2.1 of the License, or (at your option) any later version.
  14. *
  15. * Libav is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Lesser General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Lesser General Public
  21. * License along with Libav; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23. */
  24. /**
  25. * @file
  26. * DSP utils
  27. */
  28. #define PIXOP2(OPNAME, OP) \
  29. static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst, \
  30. const uint8_t *src1, \
  31. const uint8_t *src2, \
  32. int dst_stride, \
  33. int src_stride1, \
  34. int src_stride2, \
  35. int h) \
  36. { \
  37. int i; \
  38. \
  39. for (i = 0; i < h; i++) { \
  40. uint32_t a, b; \
  41. a = AV_RN32(&src1[i * src_stride1]); \
  42. b = AV_RN32(&src2[i * src_stride2]); \
  43. OP(*((uint32_t *) &dst[i * dst_stride]), \
  44. no_rnd_avg32(a, b)); \
  45. a = AV_RN32(&src1[i * src_stride1 + 4]); \
  46. b = AV_RN32(&src2[i * src_stride2 + 4]); \
  47. OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
  48. no_rnd_avg32(a, b)); \
  49. } \
  50. } \
  51. \
  52. static inline void OPNAME ## _no_rnd_pixels16_l2_8(uint8_t *dst, \
  53. const uint8_t *src1, \
  54. const uint8_t *src2, \
  55. int dst_stride, \
  56. int src_stride1, \
  57. int src_stride2, \
  58. int h) \
  59. { \
  60. OPNAME ## _no_rnd_pixels8_l2_8(dst, src1, src2, dst_stride, \
  61. src_stride1, src_stride2, h); \
  62. OPNAME ## _no_rnd_pixels8_l2_8(dst + 8, \
  63. src1 + 8, \
  64. src2 + 8, \
  65. dst_stride, src_stride1, \
  66. src_stride2, h); \
  67. } \
  68. \
  69. static inline void OPNAME ## _pixels8_l4_8(uint8_t *dst, \
  70. const uint8_t *src1, \
  71. const uint8_t *src2, \
  72. const uint8_t *src3, \
  73. const uint8_t *src4, \
  74. int dst_stride, \
  75. int src_stride1, \
  76. int src_stride2, \
  77. int src_stride3, \
  78. int src_stride4, \
  79. int h) \
  80. { \
  81. /* FIXME HIGH BIT DEPTH */ \
  82. int i; \
  83. \
  84. for (i = 0; i < h; i++) { \
  85. uint32_t a, b, c, d, l0, l1, h0, h1; \
  86. a = AV_RN32(&src1[i * src_stride1]); \
  87. b = AV_RN32(&src2[i * src_stride2]); \
  88. c = AV_RN32(&src3[i * src_stride3]); \
  89. d = AV_RN32(&src4[i * src_stride4]); \
  90. l0 = (a & 0x03030303UL) + \
  91. (b & 0x03030303UL) + \
  92. 0x02020202UL; \
  93. h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
  94. ((b & 0xFCFCFCFCUL) >> 2); \
  95. l1 = (c & 0x03030303UL) + \
  96. (d & 0x03030303UL); \
  97. h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
  98. ((d & 0xFCFCFCFCUL) >> 2); \
  99. OP(*((uint32_t *) &dst[i * dst_stride]), \
  100. h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
  101. a = AV_RN32(&src1[i * src_stride1 + 4]); \
  102. b = AV_RN32(&src2[i * src_stride2 + 4]); \
  103. c = AV_RN32(&src3[i * src_stride3 + 4]); \
  104. d = AV_RN32(&src4[i * src_stride4 + 4]); \
  105. l0 = (a & 0x03030303UL) + \
  106. (b & 0x03030303UL) + \
  107. 0x02020202UL; \
  108. h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
  109. ((b & 0xFCFCFCFCUL) >> 2); \
  110. l1 = (c & 0x03030303UL) + \
  111. (d & 0x03030303UL); \
  112. h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
  113. ((d & 0xFCFCFCFCUL) >> 2); \
  114. OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
  115. h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
  116. } \
  117. } \
  118. \
  119. static inline void OPNAME ## _no_rnd_pixels8_l4_8(uint8_t *dst, \
  120. const uint8_t *src1, \
  121. const uint8_t *src2, \
  122. const uint8_t *src3, \
  123. const uint8_t *src4, \
  124. int dst_stride, \
  125. int src_stride1, \
  126. int src_stride2, \
  127. int src_stride3, \
  128. int src_stride4, \
  129. int h) \
  130. { \
  131. /* FIXME HIGH BIT DEPTH */ \
  132. int i; \
  133. \
  134. for (i = 0; i < h; i++) { \
  135. uint32_t a, b, c, d, l0, l1, h0, h1; \
  136. a = AV_RN32(&src1[i * src_stride1]); \
  137. b = AV_RN32(&src2[i * src_stride2]); \
  138. c = AV_RN32(&src3[i * src_stride3]); \
  139. d = AV_RN32(&src4[i * src_stride4]); \
  140. l0 = (a & 0x03030303UL) + \
  141. (b & 0x03030303UL) + \
  142. 0x01010101UL; \
  143. h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
  144. ((b & 0xFCFCFCFCUL) >> 2); \
  145. l1 = (c & 0x03030303UL) + \
  146. (d & 0x03030303UL); \
  147. h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
  148. ((d & 0xFCFCFCFCUL) >> 2); \
  149. OP(*((uint32_t *) &dst[i * dst_stride]), \
  150. h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
  151. a = AV_RN32(&src1[i * src_stride1 + 4]); \
  152. b = AV_RN32(&src2[i * src_stride2 + 4]); \
  153. c = AV_RN32(&src3[i * src_stride3 + 4]); \
  154. d = AV_RN32(&src4[i * src_stride4 + 4]); \
  155. l0 = (a & 0x03030303UL) + \
  156. (b & 0x03030303UL) + \
  157. 0x01010101UL; \
  158. h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
  159. ((b & 0xFCFCFCFCUL) >> 2); \
  160. l1 = (c & 0x03030303UL) + \
  161. (d & 0x03030303UL); \
  162. h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
  163. ((d & 0xFCFCFCFCUL) >> 2); \
  164. OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
  165. h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
  166. } \
  167. } \
  168. \
  169. static inline void OPNAME ## _pixels16_l4_8(uint8_t *dst, \
  170. const uint8_t *src1, \
  171. const uint8_t *src2, \
  172. const uint8_t *src3, \
  173. const uint8_t *src4, \
  174. int dst_stride, \
  175. int src_stride1, \
  176. int src_stride2, \
  177. int src_stride3, \
  178. int src_stride4, \
  179. int h) \
  180. { \
  181. OPNAME ## _pixels8_l4_8(dst, src1, src2, src3, src4, dst_stride, \
  182. src_stride1, src_stride2, src_stride3, \
  183. src_stride4, h); \
  184. OPNAME ## _pixels8_l4_8(dst + 8, \
  185. src1 + 8, src2 + 8, \
  186. src3 + 8, src4 + 8, \
  187. dst_stride, src_stride1, src_stride2, \
  188. src_stride3, src_stride4, h); \
  189. } \
  190. \
  191. static inline void OPNAME ## _no_rnd_pixels16_l4_8(uint8_t *dst, \
  192. const uint8_t *src1, \
  193. const uint8_t *src2, \
  194. const uint8_t *src3, \
  195. const uint8_t *src4, \
  196. int dst_stride, \
  197. int src_stride1, \
  198. int src_stride2, \
  199. int src_stride3, \
  200. int src_stride4, \
  201. int h) \
  202. { \
  203. OPNAME ## _no_rnd_pixels8_l4_8(dst, src1, src2, src3, src4, \
  204. dst_stride, src_stride1, \
  205. src_stride2, src_stride3, \
  206. src_stride4, h); \
  207. OPNAME ## _no_rnd_pixels8_l4_8(dst + 8, \
  208. src1 + 8, src2 + 8, \
  209. src3 + 8, src4 + 8, \
  210. dst_stride, src_stride1, \
  211. src_stride2, src_stride3, \
  212. src_stride4, h); \
  213. } \
  214. #define op_avg(a, b) a = rnd_avg32(a, b)
  215. #define op_put(a, b) a = b
  216. #define put_no_rnd_pixels8_8_c put_pixels8_8_c
  217. PIXOP2(avg, op_avg)
  218. PIXOP2(put, op_put)
  219. #undef op_avg
  220. #undef op_put