You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

286 lines
12KB

  1. /*
  2. * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #ifndef AVUTIL_MIPS_GENERIC_MACROS_MSA_H
  21. #define AVUTIL_MIPS_GENERIC_MACROS_MSA_H
  22. #include <stdint.h>
  23. #include <msa.h>
  24. #define LOAD_UB(psrc) \
  25. ( { \
  26. v16u8 out_m; \
  27. out_m = *((v16u8 *) (psrc)); \
  28. out_m; \
  29. } )
  30. #define LOAD_SB(psrc) \
  31. ( { \
  32. v16i8 out_m; \
  33. out_m = *((v16i8 *) (psrc)); \
  34. out_m; \
  35. } )
  36. #define LOAD_SH(psrc) \
  37. ( { \
  38. v8i16 out_m; \
  39. out_m = *((v8i16 *) (psrc)); \
  40. out_m; \
  41. } )
  42. #define STORE_SH(vec, pdest) \
  43. { \
  44. *((v8i16 *) (pdest)) = (vec); \
  45. }
  46. #if (__mips_isa_rev >= 6)
  47. #define STORE_DWORD(pdst, val) \
  48. { \
  49. uint8_t *dst_ptr_m = (uint8_t *) (pdst); \
  50. uint64_t val_m = (val); \
  51. \
  52. __asm__ __volatile__ ( \
  53. "sd %[val_m], %[dst_ptr_m] \n\t" \
  54. \
  55. : [dst_ptr_m] "=m" (*dst_ptr_m) \
  56. : [val_m] "r" (val_m) \
  57. ); \
  58. }
  59. #else
  60. #define STORE_DWORD(pdst, val) \
  61. { \
  62. uint8_t *dst1_m = (uint8_t *) (pdst); \
  63. uint8_t *dst2_m = ((uint8_t *) (pdst)) + 4; \
  64. uint32_t val0_m, val1_m; \
  65. \
  66. val0_m = (uint32_t) ((val) & 0x00000000FFFFFFFF); \
  67. val1_m = (uint32_t) (((val) >> 32) & 0x00000000FFFFFFFF); \
  68. \
  69. __asm__ __volatile__ ( \
  70. "usw %[val0_m], %[dst1_m] \n\t" \
  71. "usw %[val1_m], %[dst2_m] \n\t" \
  72. \
  73. : [dst1_m] "=m" (*dst1_m), [dst2_m] "=m" (*dst2_m) \
  74. : [val0_m] "r" (val0_m), [val1_m] "r" (val1_m) \
  75. ); \
  76. }
  77. #endif
  78. #define LOAD_4VECS_SB(psrc, stride, \
  79. val0, val1, val2, val3) \
  80. { \
  81. val0 = LOAD_SB(psrc + 0 * stride); \
  82. val1 = LOAD_SB(psrc + 1 * stride); \
  83. val2 = LOAD_SB(psrc + 2 * stride); \
  84. val3 = LOAD_SB(psrc + 3 * stride); \
  85. }
  86. #define LOAD_7VECS_SB(psrc, stride, \
  87. val0, val1, val2, val3, \
  88. val4, val5, val6) \
  89. { \
  90. val0 = LOAD_SB((psrc) + 0 * (stride)); \
  91. val1 = LOAD_SB((psrc) + 1 * (stride)); \
  92. val2 = LOAD_SB((psrc) + 2 * (stride)); \
  93. val3 = LOAD_SB((psrc) + 3 * (stride)); \
  94. val4 = LOAD_SB((psrc) + 4 * (stride)); \
  95. val5 = LOAD_SB((psrc) + 5 * (stride)); \
  96. val6 = LOAD_SB((psrc) + 6 * (stride)); \
  97. }
  98. #define LOAD_8VECS_SB(psrc, stride, \
  99. out0, out1, out2, out3, \
  100. out4, out5, out6, out7) \
  101. { \
  102. LOAD_4VECS_SB((psrc), (stride), \
  103. (out0), (out1), (out2), (out3)); \
  104. LOAD_4VECS_SB((psrc + 4 * stride), (stride), \
  105. (out4), (out5), (out6), (out7)); \
  106. }
  107. #define ILVR_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
  108. out0, out1) \
  109. { \
  110. out0 = __msa_ilvr_b((v16i8) (in0_l), (v16i8) (in0_r)); \
  111. out1 = __msa_ilvr_b((v16i8) (in1_l), (v16i8) (in1_r)); \
  112. }
  113. #define ILVR_B_4VECS_SB(in0_r, in1_r, in2_r, in3_r, \
  114. in0_l, in1_l, in2_l, in3_l, \
  115. out0, out1, out2, out3) \
  116. { \
  117. ILVR_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
  118. out0, out1); \
  119. ILVR_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l, \
  120. out2, out3); \
  121. }
  122. #define ILVR_B_6VECS_SB(in0_r, in1_r, in2_r, \
  123. in3_r, in4_r, in5_r, \
  124. in0_l, in1_l, in2_l, \
  125. in3_l, in4_l, in5_l, \
  126. out0, out1, out2, \
  127. out3, out4, out5) \
  128. { \
  129. ILVR_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
  130. out0, out1); \
  131. ILVR_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l, \
  132. out2, out3); \
  133. ILVR_B_2VECS_SB(in4_r, in5_r, in4_l, in5_l, \
  134. out4, out5); \
  135. }
  136. #define ILVR_B_8VECS_SB(in0_r, in1_r, in2_r, in3_r, \
  137. in4_r, in5_r, in6_r, in7_r, \
  138. in0_l, in1_l, in2_l, in3_l, \
  139. in4_l, in5_l, in6_l, in7_l, \
  140. out0, out1, out2, out3, \
  141. out4, out5, out6, out7) \
  142. { \
  143. ILVR_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
  144. out0, out1); \
  145. ILVR_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l, \
  146. out2, out3); \
  147. ILVR_B_2VECS_SB(in4_r, in5_r, in4_l, in5_l, \
  148. out4, out5); \
  149. ILVR_B_2VECS_SB(in6_r, in7_r, in6_l, in7_l, \
  150. out6, out7); \
  151. }
  152. #define ILVL_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
  153. out0, out1) \
  154. { \
  155. out0 = __msa_ilvl_b((v16i8) (in0_l), (v16i8) (in0_r)); \
  156. out1 = __msa_ilvl_b((v16i8) (in1_l), (v16i8) (in1_r)); \
  157. }
  158. #define ILVL_B_4VECS_SB(in0_r, in1_r, in2_r, in3_r, \
  159. in0_l, in1_l, in2_l, in3_l, \
  160. out0, out1, out2, out3) \
  161. { \
  162. ILVL_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
  163. out0, out1); \
  164. ILVL_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l, \
  165. out2, out3); \
  166. }
  167. #define ILVL_B_6VECS_SB(in0_r, in1_r, in2_r, \
  168. in3_r, in4_r, in5_r, \
  169. in0_l, in1_l, in2_l, \
  170. in3_l, in4_l, in5_l, \
  171. out0, out1, out2, \
  172. out3, out4, out5) \
  173. { \
  174. ILVL_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
  175. out0, out1); \
  176. ILVL_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l, \
  177. out2, out3); \
  178. ILVL_B_2VECS_SB(in4_r, in5_r, in4_l, in5_l, \
  179. out4, out5); \
  180. }
  181. #define ILVR_D_2VECS_SB(out0, in0_l, in0_r, \
  182. out1, in1_l, in1_r) \
  183. { \
  184. out0 = (v16i8) __msa_ilvr_d((v2i64) (in0_l), (v2i64) (in0_r)); \
  185. out1 = (v16i8) __msa_ilvr_d((v2i64) (in1_l), (v2i64) (in1_r)); \
  186. }
  187. #define ILVR_D_3VECS_SB(out0, in0_l, in0_r, \
  188. out1, in1_l, in1_r, \
  189. out2, in2_l, in2_r) \
  190. { \
  191. ILVR_D_2VECS_SB(out0, in0_l, in0_r, \
  192. out1, in1_l, in1_r); \
  193. out2 = (v16i8) __msa_ilvr_d((v2i64) (in2_l), (v2i64) (in2_r)); \
  194. }
  195. #define ILVR_D_4VECS_SB(out0, in0_l, in0_r, \
  196. out1, in1_l, in1_r, \
  197. out2, in2_l, in2_r, \
  198. out3, in3_l, in3_r) \
  199. { \
  200. ILVR_D_2VECS_SB(out0, in0_l, in0_r, \
  201. out1, in1_l, in1_r); \
  202. ILVR_D_2VECS_SB(out2, in2_l, in2_r, \
  203. out3, in3_l, in3_r); \
  204. }
  205. #define XORI_B_2VECS_SB(val0, val1, \
  206. out0, out1, xor_val) \
  207. { \
  208. out0 = (v16i8) __msa_xori_b((v16u8) (val0), (xor_val)); \
  209. out1 = (v16i8) __msa_xori_b((v16u8) (val1), (xor_val)); \
  210. }
  211. #define XORI_B_3VECS_SB(val0, val1, val2, \
  212. out0, out1, out2, \
  213. xor_val) \
  214. { \
  215. XORI_B_2VECS_SB(val0, val1, \
  216. out0, out1, xor_val); \
  217. out2 = (v16i8) __msa_xori_b((v16u8) (val2), (xor_val)); \
  218. }
  219. #define XORI_B_4VECS_SB(val0, val1, val2, val3, \
  220. out0, out1, out2, out3, \
  221. xor_val) \
  222. { \
  223. XORI_B_2VECS_SB(val0, val1, \
  224. out0, out1, xor_val); \
  225. XORI_B_2VECS_SB(val2, val3, \
  226. out2, out3, xor_val); \
  227. }
  228. #define XORI_B_5VECS_SB(val0, val1, val2, val3, val4, \
  229. out0, out1, out2, out3, out4, \
  230. xor_val) \
  231. { \
  232. XORI_B_3VECS_SB(val0, val1, val2, \
  233. out0, out1, out2, xor_val); \
  234. XORI_B_2VECS_SB(val3, val4, \
  235. out3, out4, xor_val); \
  236. }
  237. #define XORI_B_7VECS_SB(val0, val1, val2, val3, \
  238. val4, val5, val6, \
  239. out0, out1, out2, out3, \
  240. out4, out5, out6, \
  241. xor_val) \
  242. { \
  243. XORI_B_4VECS_SB(val0, val1, val2, val3, \
  244. out0, out1, out2, out3, xor_val); \
  245. XORI_B_3VECS_SB(val4, val5, val6, \
  246. out4, out5, out6, xor_val); \
  247. }
  248. #define XORI_B_8VECS_SB(val0, val1, val2, val3, \
  249. val4, val5, val6, val7, \
  250. out0, out1, out2, out3, \
  251. out4, out5, out6, out7, xor_val) \
  252. { \
  253. XORI_B_4VECS_SB(val0, val1, val2, val3, \
  254. out0, out1, out2, out3, xor_val); \
  255. XORI_B_4VECS_SB(val4, val5, val6, val7, \
  256. out4, out5, out6, out7, xor_val); \
  257. }
  258. #endif /* AVUTIL_MIPS_GENERIC_MACROS_MSA_H */