You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

634 lines
17KB

  1. ;******************************************************************************
  2. ;* Copyright (c) 2012 Michael Niedermayer
  3. ;*
  4. ;* This file is part of FFmpeg.
  5. ;*
  6. ;* FFmpeg is free software; you can redistribute it and/or
  7. ;* modify it under the terms of the GNU Lesser General Public
  8. ;* License as published by the Free Software Foundation; either
  9. ;* version 2.1 of the License, or (at your option) any later version.
  10. ;*
  11. ;* FFmpeg is distributed in the hope that it will be useful,
  12. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. ;* Lesser General Public License for more details.
  15. ;*
  16. ;* You should have received a copy of the GNU Lesser General Public
  17. ;* License along with FFmpeg; if not, write to the Free Software
  18. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. ;******************************************************************************
  20. %include "libavutil/x86/x86util.asm"
  21. SECTION_RODATA 32
  22. flt2pm31: times 8 dd 4.6566129e-10
  23. flt2p31 : times 8 dd 2147483648.0
  24. flt2p15 : times 8 dd 32768.0
  25. word_unpack_shuf : db 0, 1, 4, 5, 8, 9,12,13, 2, 3, 6, 7,10,11,14,15
  26. SECTION .text
  27. ;to, from, a/u, log2_outsize, log_intsize, const
  28. %macro PACK_2CH 5-7
  29. cglobal pack_2ch_%2_to_%1_%3, 3, 4, 6, dst, src, len, src2
  30. mov src2q , [srcq+gprsize]
  31. mov srcq , [srcq]
  32. mov dstq , [dstq]
  33. %ifidn %3, a
  34. test dstq, mmsize-1
  35. jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
  36. test srcq, mmsize-1
  37. jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
  38. test src2q, mmsize-1
  39. jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
  40. %else
  41. pack_2ch_%2_to_%1_u_int %+ SUFFIX
  42. %endif
  43. lea srcq , [srcq + (1<<%5)*lenq]
  44. lea src2q, [src2q + (1<<%5)*lenq]
  45. lea dstq , [dstq + (2<<%4)*lenq]
  46. neg lenq
  47. %7 m0,m1,m2,m3,m4,m5
  48. .next:
  49. %if %4 >= %5
  50. mov%3 m0, [ srcq +(1<<%5)*lenq]
  51. mova m1, m0
  52. mov%3 m2, [ src2q+(1<<%5)*lenq]
  53. %if %5 == 1
  54. punpcklwd m0, m2
  55. punpckhwd m1, m2
  56. %else
  57. punpckldq m0, m2
  58. punpckhdq m1, m2
  59. %endif
  60. %6 m0,m1,m2,m3,m4,m5
  61. %else
  62. mov%3 m0, [ srcq +(1<<%5)*lenq]
  63. mov%3 m1, [mmsize + srcq +(1<<%5)*lenq]
  64. mov%3 m2, [ src2q+(1<<%5)*lenq]
  65. mov%3 m3, [mmsize + src2q+(1<<%5)*lenq]
  66. %6 m0,m1,m2,m3,m4,m5
  67. mova m2, m0
  68. punpcklwd m0, m1
  69. punpckhwd m2, m1
  70. SWAP 1,2
  71. %endif
  72. mov%3 [ dstq+(2<<%4)*lenq], m0
  73. mov%3 [ mmsize + dstq+(2<<%4)*lenq], m1
  74. %if %4 > %5
  75. mov%3 [2*mmsize + dstq+(2<<%4)*lenq], m2
  76. mov%3 [3*mmsize + dstq+(2<<%4)*lenq], m3
  77. add lenq, 4*mmsize/(2<<%4)
  78. %else
  79. add lenq, 2*mmsize/(2<<%4)
  80. %endif
  81. jl .next
  82. REP_RET
  83. %endmacro
  84. %macro UNPACK_2CH 5-7
  85. cglobal unpack_2ch_%2_to_%1_%3, 3, 4, 7, dst, src, len, dst2
  86. mov dst2q , [dstq+gprsize]
  87. mov srcq , [srcq]
  88. mov dstq , [dstq]
  89. %ifidn %3, a
  90. test dstq, mmsize-1
  91. jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
  92. test srcq, mmsize-1
  93. jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
  94. test dst2q, mmsize-1
  95. jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
  96. %else
  97. unpack_2ch_%2_to_%1_u_int %+ SUFFIX
  98. %endif
  99. lea srcq , [srcq + (2<<%5)*lenq]
  100. lea dstq , [dstq + (1<<%4)*lenq]
  101. lea dst2q, [dst2q + (1<<%4)*lenq]
  102. neg lenq
  103. %7 m0,m1,m2,m3,m4,m5
  104. mova m6, [word_unpack_shuf]
  105. .next:
  106. mov%3 m0, [ srcq +(2<<%5)*lenq]
  107. mov%3 m2, [ mmsize + srcq +(2<<%5)*lenq]
  108. %if %5 == 1
  109. %ifidn SUFFIX, _ssse3
  110. pshufb m0, m6
  111. mova m1, m0
  112. pshufb m2, m6
  113. punpcklqdq m0,m2
  114. punpckhqdq m1,m2
  115. %else
  116. mova m1, m0
  117. punpcklwd m0,m2
  118. punpckhwd m1,m2
  119. mova m2, m0
  120. punpcklwd m0,m1
  121. punpckhwd m2,m1
  122. mova m1, m0
  123. punpcklwd m0,m2
  124. punpckhwd m1,m2
  125. %endif
  126. %else
  127. mova m1, m0
  128. shufps m0, m2, 10001000b
  129. shufps m1, m2, 11011101b
  130. %endif
  131. %if %4 < %5
  132. mov%3 m2, [2*mmsize + srcq +(2<<%5)*lenq]
  133. mova m3, m2
  134. mov%3 m4, [3*mmsize + srcq +(2<<%5)*lenq]
  135. shufps m2, m4, 10001000b
  136. shufps m3, m4, 11011101b
  137. SWAP 1,2
  138. %endif
  139. %6 m0,m1,m2,m3,m4,m5
  140. mov%3 [ dstq+(1<<%4)*lenq], m0
  141. %if %4 > %5
  142. mov%3 [ dst2q+(1<<%4)*lenq], m2
  143. mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1
  144. mov%3 [ mmsize + dst2q+(1<<%4)*lenq], m3
  145. add lenq, 2*mmsize/(1<<%4)
  146. %else
  147. mov%3 [ dst2q+(1<<%4)*lenq], m1
  148. add lenq, mmsize/(1<<%4)
  149. %endif
  150. jl .next
  151. REP_RET
  152. %endmacro
  153. %macro CONV 5-7
  154. cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len
  155. mov srcq , [srcq]
  156. mov dstq , [dstq]
  157. %ifidn %3, a
  158. test dstq, mmsize-1
  159. jne %2_to_%1_u_int %+ SUFFIX
  160. test srcq, mmsize-1
  161. jne %2_to_%1_u_int %+ SUFFIX
  162. %else
  163. %2_to_%1_u_int %+ SUFFIX
  164. %endif
  165. lea srcq , [srcq + (1<<%5)*lenq]
  166. lea dstq , [dstq + (1<<%4)*lenq]
  167. neg lenq
  168. %7 m0,m1,m2,m3,m4,m5
  169. .next:
  170. mov%3 m0, [ srcq +(1<<%5)*lenq]
  171. mov%3 m1, [ mmsize + srcq +(1<<%5)*lenq]
  172. %if %4 < %5
  173. mov%3 m2, [2*mmsize + srcq +(1<<%5)*lenq]
  174. mov%3 m3, [3*mmsize + srcq +(1<<%5)*lenq]
  175. %endif
  176. %6 m0,m1,m2,m3,m4,m5
  177. mov%3 [ dstq+(1<<%4)*lenq], m0
  178. mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1
  179. %if %4 > %5
  180. mov%3 [2*mmsize + dstq+(1<<%4)*lenq], m2
  181. mov%3 [3*mmsize + dstq+(1<<%4)*lenq], m3
  182. add lenq, 4*mmsize/(1<<%4)
  183. %else
  184. add lenq, 2*mmsize/(1<<%4)
  185. %endif
  186. jl .next
  187. %if mmsize == 8
  188. emms
  189. RET
  190. %else
  191. REP_RET
  192. %endif
  193. %endmacro
  194. %macro PACK_6CH 5-7
  195. cglobal pack_6ch_%2_to_%1_%3, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
  196. %if ARCH_X86_64
  197. mov lend, r2d
  198. %else
  199. %define lend dword r2m
  200. %endif
  201. mov src1q, [srcq+1*gprsize]
  202. mov src2q, [srcq+2*gprsize]
  203. mov src3q, [srcq+3*gprsize]
  204. mov src4q, [srcq+4*gprsize]
  205. mov src5q, [srcq+5*gprsize]
  206. mov srcq, [srcq]
  207. mov dstq, [dstq]
  208. %ifidn %3, a
  209. test dstq, mmsize-1
  210. jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
  211. test srcq, mmsize-1
  212. jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
  213. test src1q, mmsize-1
  214. jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
  215. test src2q, mmsize-1
  216. jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
  217. test src3q, mmsize-1
  218. jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
  219. test src4q, mmsize-1
  220. jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
  221. test src5q, mmsize-1
  222. jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
  223. %else
  224. pack_6ch_%2_to_%1_u_int %+ SUFFIX
  225. %endif
  226. sub src1q, srcq
  227. sub src2q, srcq
  228. sub src3q, srcq
  229. sub src4q, srcq
  230. sub src5q, srcq
  231. .loop:
  232. mov%3 m0, [srcq ]
  233. mov%3 m1, [srcq+src1q]
  234. mov%3 m2, [srcq+src2q]
  235. mov%3 m3, [srcq+src3q]
  236. mov%3 m4, [srcq+src4q]
  237. mov%3 m5, [srcq+src5q]
  238. %7 x,x,x,x,m7,x
  239. %if cpuflag(sse)
  240. SBUTTERFLYPS 0, 1, 6
  241. SBUTTERFLYPS 2, 3, 6
  242. SBUTTERFLYPS 4, 5, 6
  243. %if cpuflag(avx)
  244. blendps m6, m4, m0, 1100b
  245. %else
  246. movaps m6, m4
  247. shufps m4, m0, q3210
  248. SWAP 4,6
  249. %endif
  250. movlhps m0, m2
  251. movhlps m4, m2
  252. %if cpuflag(avx)
  253. blendps m2, m5, m1, 1100b
  254. %else
  255. movaps m2, m5
  256. shufps m5, m1, q3210
  257. SWAP 2,5
  258. %endif
  259. movlhps m1, m3
  260. movhlps m5, m3
  261. %6 m0,m6,x,x,m7,m3
  262. %6 m4,m1,x,x,m7,m3
  263. %6 m2,m5,x,x,m7,m3
  264. mov %+ %3 %+ ps [dstq ], m0
  265. mov %+ %3 %+ ps [dstq+16], m6
  266. mov %+ %3 %+ ps [dstq+32], m4
  267. mov %+ %3 %+ ps [dstq+48], m1
  268. mov %+ %3 %+ ps [dstq+64], m2
  269. mov %+ %3 %+ ps [dstq+80], m5
  270. %else ; mmx
  271. SBUTTERFLY dq, 0, 1, 6
  272. SBUTTERFLY dq, 2, 3, 6
  273. SBUTTERFLY dq, 4, 5, 6
  274. movq [dstq ], m0
  275. movq [dstq+ 8], m2
  276. movq [dstq+16], m4
  277. movq [dstq+24], m1
  278. movq [dstq+32], m3
  279. movq [dstq+40], m5
  280. %endif
  281. add srcq, mmsize
  282. add dstq, mmsize*6
  283. sub lend, mmsize/4
  284. jg .loop
  285. %if mmsize == 8
  286. emms
  287. RET
  288. %else
  289. REP_RET
  290. %endif
  291. %endmacro
  292. %macro PACK_8CH 5-7
  293. cglobal pack_8ch_%2_to_%1_%3, 2,10,10, ARCH_X86_32*32, dst, src, len, src1, src2, src3, src4, src5, src6, src7
  294. mov dstq, [dstq]
  295. %if ARCH_X86_32
  296. DEFINE_ARGS dst, src, src2, src3, src4, src5, src6
  297. %define lend dword r2m
  298. %define src1q r0q
  299. %define src1m r1mp
  300. %define src7q r0q
  301. %define src7m r3mp
  302. mov dstm, dstq
  303. %endif
  304. mov src7q, [srcq+7*gprsize]
  305. mov src6q, [srcq+6*gprsize]
  306. mov src5q, [srcq+5*gprsize]
  307. mov src4q, [srcq+4*gprsize]
  308. %if ARCH_X86_32
  309. mov src7m, src7q
  310. %endif
  311. mov src3q, [srcq+3*gprsize]
  312. mov src2q, [srcq+2*gprsize]
  313. mov src1q, [srcq+1*gprsize]
  314. mov srcq, [srcq]
  315. %ifidn %3, a
  316. %if ARCH_X86_32
  317. test dstmp, mmsize-1
  318. %else
  319. test dstq, mmsize-1
  320. %endif
  321. jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
  322. test srcq, mmsize-1
  323. jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
  324. test src1q, mmsize-1
  325. jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
  326. test src2q, mmsize-1
  327. jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
  328. test src3q, mmsize-1
  329. jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
  330. test src4q, mmsize-1
  331. jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
  332. test src5q, mmsize-1
  333. jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
  334. test src6q, mmsize-1
  335. jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
  336. %if ARCH_X86_32
  337. test src7m, mmsize-1
  338. %else
  339. test src7q, mmsize-1
  340. %endif
  341. jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
  342. %else
  343. pack_8ch_%2_to_%1_u_int %+ SUFFIX
  344. %endif
  345. sub src1q, srcq
  346. sub src2q, srcq
  347. sub src3q, srcq
  348. sub src4q, srcq
  349. sub src5q, srcq
  350. sub src6q, srcq
  351. %if ARCH_X86_64
  352. sub src7q, srcq
  353. %else
  354. mov src1m, src1q
  355. sub src7m, srcq
  356. %endif
  357. %if ARCH_X86_64
  358. %7 x,x,x,x,m9,x
  359. %elifidn %1, int32
  360. %define m9 [flt2p31]
  361. %else
  362. %define m9 [flt2pm31]
  363. %endif
  364. .loop:
  365. mov%3 m0, [srcq ]
  366. mov%3 m1, [srcq+src1q]
  367. mov%3 m2, [srcq+src2q]
  368. mov%3 m3, [srcq+src3q]
  369. %if ARCH_X86_32
  370. mov src7q, src7m
  371. %endif
  372. mov%3 m4, [srcq+src4q]
  373. mov%3 m5, [srcq+src5q]
  374. mov%3 m6, [srcq+src6q]
  375. mov%3 m7, [srcq+src7q]
  376. %if ARCH_X86_64
  377. TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, 8
  378. %6 m0,m1,x,x,m9,m8
  379. %6 m2,m3,x,x,m9,m8
  380. %6 m4,m5,x,x,m9,m8
  381. %6 m6,m7,x,x,m9,m8
  382. mov%3 [dstq], m0
  383. %else
  384. mov dstq, dstm
  385. TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, [rsp], [rsp+16], 1
  386. %6 m0,m1,x,x,m9,m2
  387. mova m2, [rsp]
  388. mov%3 [dstq], m0
  389. %6 m2,m3,x,x,m9,m0
  390. %6 m4,m5,x,x,m9,m0
  391. %6 m6,m7,x,x,m9,m0
  392. %endif
  393. mov%3 [dstq+16], m1
  394. mov%3 [dstq+32], m2
  395. mov%3 [dstq+48], m3
  396. mov%3 [dstq+64], m4
  397. mov%3 [dstq+80], m5
  398. mov%3 [dstq+96], m6
  399. mov%3 [dstq+112], m7
  400. add srcq, mmsize
  401. add dstq, mmsize*8
  402. %if ARCH_X86_32
  403. mov dstm, dstq
  404. mov src1q, src1m
  405. %endif
  406. sub lend, mmsize/4
  407. jg .loop
  408. REP_RET
  409. %endmacro
  410. %macro INT16_TO_INT32_N 6
  411. pxor m2, m2
  412. pxor m3, m3
  413. punpcklwd m2, m1
  414. punpckhwd m3, m1
  415. SWAP 4,0
  416. pxor m0, m0
  417. pxor m1, m1
  418. punpcklwd m0, m4
  419. punpckhwd m1, m4
  420. %endmacro
  421. %macro INT32_TO_INT16_N 6
  422. psrad m0, 16
  423. psrad m1, 16
  424. psrad m2, 16
  425. psrad m3, 16
  426. packssdw m0, m1
  427. packssdw m2, m3
  428. SWAP 1,2
  429. %endmacro
  430. %macro INT32_TO_FLOAT_INIT 6
  431. mova %5, [flt2pm31]
  432. %endmacro
  433. %macro INT32_TO_FLOAT_N 6
  434. cvtdq2ps %1, %1
  435. cvtdq2ps %2, %2
  436. mulps %1, %1, %5
  437. mulps %2, %2, %5
  438. %endmacro
  439. %macro FLOAT_TO_INT32_INIT 6
  440. mova %5, [flt2p31]
  441. %endmacro
  442. %macro FLOAT_TO_INT32_N 6
  443. mulps %1, %5
  444. mulps %2, %5
  445. cvtps2dq %6, %1
  446. cmpps %1, %1, %5, 5
  447. paddd %1, %6
  448. cvtps2dq %6, %2
  449. cmpps %2, %2, %5, 5
  450. paddd %2, %6
  451. %endmacro
  452. %macro INT16_TO_FLOAT_INIT 6
  453. mova m5, [flt2pm31]
  454. %endmacro
  455. %macro INT16_TO_FLOAT_N 6
  456. INT16_TO_INT32_N %1,%2,%3,%4,%5,%6
  457. cvtdq2ps m0, m0
  458. cvtdq2ps m1, m1
  459. cvtdq2ps m2, m2
  460. cvtdq2ps m3, m3
  461. mulps m0, m0, m5
  462. mulps m1, m1, m5
  463. mulps m2, m2, m5
  464. mulps m3, m3, m5
  465. %endmacro
  466. %macro FLOAT_TO_INT16_INIT 6
  467. mova m5, [flt2p15]
  468. %endmacro
  469. %macro FLOAT_TO_INT16_N 6
  470. mulps m0, m5
  471. mulps m1, m5
  472. mulps m2, m5
  473. mulps m3, m5
  474. cvtps2dq m0, m0
  475. cvtps2dq m1, m1
  476. packssdw m0, m1
  477. cvtps2dq m1, m2
  478. cvtps2dq m3, m3
  479. packssdw m1, m3
  480. %endmacro
  481. %macro NOP_N 0-6
  482. %endmacro
  483. INIT_MMX mmx
  484. CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
  485. CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
  486. CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
  487. CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
  488. PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
  489. PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
  490. INIT_XMM sse
  491. PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
  492. PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
  493. INIT_XMM sse2
  494. CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
  495. CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
  496. CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
  497. CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
  498. PACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
  499. PACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
  500. PACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
  501. PACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
  502. PACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
  503. PACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
  504. PACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
  505. PACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
  506. UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
  507. UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
  508. UNPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
  509. UNPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
  510. UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
  511. UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
  512. UNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
  513. UNPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
  514. CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  515. CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  516. CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  517. CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  518. CONV float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  519. CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  520. CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
  521. CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
  522. PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  523. PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  524. PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  525. PACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  526. PACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  527. PACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  528. PACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
  529. PACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
  530. UNPACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  531. UNPACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  532. UNPACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  533. UNPACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  534. UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  535. UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  536. UNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
  537. UNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
  538. PACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  539. PACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  540. PACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  541. PACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  542. %if HAVE_ALIGNED_STACK
  543. PACK_8CH float, float, u, 2, 2, NOP_N, NOP_N
  544. PACK_8CH float, float, a, 2, 2, NOP_N, NOP_N
  545. PACK_8CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  546. PACK_8CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  547. PACK_8CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  548. PACK_8CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  549. %endif
  550. INIT_XMM ssse3
  551. UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
  552. UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
  553. UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
  554. UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
  555. UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  556. UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  557. %if HAVE_AVX_EXTERNAL
  558. INIT_XMM avx
  559. PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
  560. PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
  561. PACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  562. PACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  563. PACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  564. PACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  565. %if HAVE_ALIGNED_STACK
  566. PACK_8CH float, float, u, 2, 2, NOP_N, NOP_N
  567. PACK_8CH float, float, a, 2, 2, NOP_N, NOP_N
  568. PACK_8CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  569. PACK_8CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  570. PACK_8CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  571. PACK_8CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  572. %endif
  573. INIT_YMM avx
  574. CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  575. CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  576. %endif
  577. %if HAVE_AVX2_EXTERNAL
  578. INIT_YMM avx2
  579. CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  580. CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  581. %endif