You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

220 lines
14KB

  1. /*
  2. * quarterpel DSP function templates
  3. *
  4. * This file is part of Libav.
  5. *
  6. * Libav is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * Libav is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with Libav; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /**
  21. * @file
  22. * quarterpel DSP function templates
  23. */
  24. #define PIXOP2(OPNAME, OP) \
  25. static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst, \
  26. const uint8_t *src1, \
  27. const uint8_t *src2, \
  28. int dst_stride, \
  29. int src_stride1, \
  30. int src_stride2, \
  31. int h) \
  32. { \
  33. int i; \
  34. \
  35. for (i = 0; i < h; i++) { \
  36. uint32_t a, b; \
  37. a = AV_RN32(&src1[i * src_stride1]); \
  38. b = AV_RN32(&src2[i * src_stride2]); \
  39. OP(*((uint32_t *) &dst[i * dst_stride]), \
  40. no_rnd_avg32(a, b)); \
  41. a = AV_RN32(&src1[i * src_stride1 + 4]); \
  42. b = AV_RN32(&src2[i * src_stride2 + 4]); \
  43. OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
  44. no_rnd_avg32(a, b)); \
  45. } \
  46. } \
  47. \
  48. static inline void OPNAME ## _no_rnd_pixels16_l2_8(uint8_t *dst, \
  49. const uint8_t *src1, \
  50. const uint8_t *src2, \
  51. int dst_stride, \
  52. int src_stride1, \
  53. int src_stride2, \
  54. int h) \
  55. { \
  56. OPNAME ## _no_rnd_pixels8_l2_8(dst, src1, src2, dst_stride, \
  57. src_stride1, src_stride2, h); \
  58. OPNAME ## _no_rnd_pixels8_l2_8(dst + 8, \
  59. src1 + 8, \
  60. src2 + 8, \
  61. dst_stride, src_stride1, \
  62. src_stride2, h); \
  63. } \
  64. \
  65. static inline void OPNAME ## _pixels8_l4_8(uint8_t *dst, \
  66. const uint8_t *src1, \
  67. const uint8_t *src2, \
  68. const uint8_t *src3, \
  69. const uint8_t *src4, \
  70. int dst_stride, \
  71. int src_stride1, \
  72. int src_stride2, \
  73. int src_stride3, \
  74. int src_stride4, \
  75. int h) \
  76. { \
  77. /* FIXME HIGH BIT DEPTH */ \
  78. int i; \
  79. \
  80. for (i = 0; i < h; i++) { \
  81. uint32_t a, b, c, d, l0, l1, h0, h1; \
  82. a = AV_RN32(&src1[i * src_stride1]); \
  83. b = AV_RN32(&src2[i * src_stride2]); \
  84. c = AV_RN32(&src3[i * src_stride3]); \
  85. d = AV_RN32(&src4[i * src_stride4]); \
  86. l0 = (a & 0x03030303UL) + \
  87. (b & 0x03030303UL) + \
  88. 0x02020202UL; \
  89. h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
  90. ((b & 0xFCFCFCFCUL) >> 2); \
  91. l1 = (c & 0x03030303UL) + \
  92. (d & 0x03030303UL); \
  93. h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
  94. ((d & 0xFCFCFCFCUL) >> 2); \
  95. OP(*((uint32_t *) &dst[i * dst_stride]), \
  96. h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
  97. a = AV_RN32(&src1[i * src_stride1 + 4]); \
  98. b = AV_RN32(&src2[i * src_stride2 + 4]); \
  99. c = AV_RN32(&src3[i * src_stride3 + 4]); \
  100. d = AV_RN32(&src4[i * src_stride4 + 4]); \
  101. l0 = (a & 0x03030303UL) + \
  102. (b & 0x03030303UL) + \
  103. 0x02020202UL; \
  104. h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
  105. ((b & 0xFCFCFCFCUL) >> 2); \
  106. l1 = (c & 0x03030303UL) + \
  107. (d & 0x03030303UL); \
  108. h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
  109. ((d & 0xFCFCFCFCUL) >> 2); \
  110. OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
  111. h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
  112. } \
  113. } \
  114. \
  115. static inline void OPNAME ## _no_rnd_pixels8_l4_8(uint8_t *dst, \
  116. const uint8_t *src1, \
  117. const uint8_t *src2, \
  118. const uint8_t *src3, \
  119. const uint8_t *src4, \
  120. int dst_stride, \
  121. int src_stride1, \
  122. int src_stride2, \
  123. int src_stride3, \
  124. int src_stride4, \
  125. int h) \
  126. { \
  127. /* FIXME HIGH BIT DEPTH */ \
  128. int i; \
  129. \
  130. for (i = 0; i < h; i++) { \
  131. uint32_t a, b, c, d, l0, l1, h0, h1; \
  132. a = AV_RN32(&src1[i * src_stride1]); \
  133. b = AV_RN32(&src2[i * src_stride2]); \
  134. c = AV_RN32(&src3[i * src_stride3]); \
  135. d = AV_RN32(&src4[i * src_stride4]); \
  136. l0 = (a & 0x03030303UL) + \
  137. (b & 0x03030303UL) + \
  138. 0x01010101UL; \
  139. h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
  140. ((b & 0xFCFCFCFCUL) >> 2); \
  141. l1 = (c & 0x03030303UL) + \
  142. (d & 0x03030303UL); \
  143. h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
  144. ((d & 0xFCFCFCFCUL) >> 2); \
  145. OP(*((uint32_t *) &dst[i * dst_stride]), \
  146. h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
  147. a = AV_RN32(&src1[i * src_stride1 + 4]); \
  148. b = AV_RN32(&src2[i * src_stride2 + 4]); \
  149. c = AV_RN32(&src3[i * src_stride3 + 4]); \
  150. d = AV_RN32(&src4[i * src_stride4 + 4]); \
  151. l0 = (a & 0x03030303UL) + \
  152. (b & 0x03030303UL) + \
  153. 0x01010101UL; \
  154. h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
  155. ((b & 0xFCFCFCFCUL) >> 2); \
  156. l1 = (c & 0x03030303UL) + \
  157. (d & 0x03030303UL); \
  158. h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
  159. ((d & 0xFCFCFCFCUL) >> 2); \
  160. OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
  161. h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
  162. } \
  163. } \
  164. \
  165. static inline void OPNAME ## _pixels16_l4_8(uint8_t *dst, \
  166. const uint8_t *src1, \
  167. const uint8_t *src2, \
  168. const uint8_t *src3, \
  169. const uint8_t *src4, \
  170. int dst_stride, \
  171. int src_stride1, \
  172. int src_stride2, \
  173. int src_stride3, \
  174. int src_stride4, \
  175. int h) \
  176. { \
  177. OPNAME ## _pixels8_l4_8(dst, src1, src2, src3, src4, dst_stride, \
  178. src_stride1, src_stride2, src_stride3, \
  179. src_stride4, h); \
  180. OPNAME ## _pixels8_l4_8(dst + 8, \
  181. src1 + 8, src2 + 8, \
  182. src3 + 8, src4 + 8, \
  183. dst_stride, src_stride1, src_stride2, \
  184. src_stride3, src_stride4, h); \
  185. } \
  186. \
  187. static inline void OPNAME ## _no_rnd_pixels16_l4_8(uint8_t *dst, \
  188. const uint8_t *src1, \
  189. const uint8_t *src2, \
  190. const uint8_t *src3, \
  191. const uint8_t *src4, \
  192. int dst_stride, \
  193. int src_stride1, \
  194. int src_stride2, \
  195. int src_stride3, \
  196. int src_stride4, \
  197. int h) \
  198. { \
  199. OPNAME ## _no_rnd_pixels8_l4_8(dst, src1, src2, src3, src4, \
  200. dst_stride, src_stride1, \
  201. src_stride2, src_stride3, \
  202. src_stride4, h); \
  203. OPNAME ## _no_rnd_pixels8_l4_8(dst + 8, \
  204. src1 + 8, src2 + 8, \
  205. src3 + 8, src4 + 8, \
  206. dst_stride, src_stride1, \
  207. src_stride2, src_stride3, \
  208. src_stride4, h); \
  209. } \
  210. #define op_avg(a, b) a = rnd_avg32(a, b)
  211. #define op_put(a, b) a = b
  212. #define put_no_rnd_pixels8_8_c put_pixels8_8_c
  213. PIXOP2(avg, op_avg)
  214. PIXOP2(put, op_put)
  215. #undef op_avg
  216. #undef op_put