You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

680 lines
18KB

  1. ;******************************************************************************
  2. ;* MMX/SSSE3-optimized functions for H264 chroma MC
  3. ;* Copyright (c) 2005 Zoltan Hidvegi <hzoli -a- hzoli -d- com>,
  4. ;* 2005-2008 Loren Merritt
  5. ;*
  6. ;* This file is part of Libav.
  7. ;*
  8. ;* Libav is free software; you can redistribute it and/or
  9. ;* modify it under the terms of the GNU Lesser General Public
  10. ;* License as published by the Free Software Foundation; either
  11. ;* version 2.1 of the License, or (at your option) any later version.
  12. ;*
  13. ;* Libav is distributed in the hope that it will be useful,
  14. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. ;* Lesser General Public License for more details.
  17. ;*
  18. ;* You should have received a copy of the GNU Lesser General Public
  19. ;* License along with Libav; if not, write to the Free Software
  20. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. ;******************************************************************************
  22. %include "x86inc.asm"
  23. %include "x86util.asm"
  24. SECTION_RODATA
  25. rnd_rv40_2d_tbl: times 4 dw 0
  26. times 4 dw 16
  27. times 4 dw 32
  28. times 4 dw 16
  29. times 4 dw 32
  30. times 4 dw 28
  31. times 4 dw 32
  32. times 4 dw 28
  33. times 4 dw 0
  34. times 4 dw 32
  35. times 4 dw 16
  36. times 4 dw 32
  37. times 4 dw 32
  38. times 4 dw 28
  39. times 4 dw 32
  40. times 4 dw 28
  41. rnd_rv40_1d_tbl: times 4 dw 0
  42. times 4 dw 2
  43. times 4 dw 4
  44. times 4 dw 2
  45. times 4 dw 4
  46. times 4 dw 3
  47. times 4 dw 4
  48. times 4 dw 3
  49. times 4 dw 0
  50. times 4 dw 4
  51. times 4 dw 2
  52. times 4 dw 4
  53. times 4 dw 4
  54. times 4 dw 3
  55. times 4 dw 4
  56. times 4 dw 3
  57. cextern pw_3
  58. cextern pw_4
  59. cextern pw_8
  60. cextern pw_28
  61. cextern pw_32
  62. cextern pw_64
  63. SECTION .text
  64. %macro mv0_pixels_mc8 0
  65. lea r4, [r2*2 ]
  66. .next4rows
  67. movq mm0, [r1 ]
  68. movq mm1, [r1+r2]
  69. add r1, r4
  70. CHROMAMC_AVG mm0, [r0 ]
  71. CHROMAMC_AVG mm1, [r0+r2]
  72. movq [r0 ], mm0
  73. movq [r0+r2], mm1
  74. add r0, r4
  75. movq mm0, [r1 ]
  76. movq mm1, [r1+r2]
  77. add r1, r4
  78. CHROMAMC_AVG mm0, [r0 ]
  79. CHROMAMC_AVG mm1, [r0+r2]
  80. movq [r0 ], mm0
  81. movq [r0+r2], mm1
  82. add r0, r4
  83. sub r3d, 4
  84. jne .next4rows
  85. %endmacro
  86. %macro chroma_mc8_mmx_func 3
  87. %ifidn %2, rv40
  88. %ifdef PIC
  89. %define rnd_1d_rv40 r8
  90. %define rnd_2d_rv40 r8
  91. %define extra_regs 2
  92. %else ; no-PIC
  93. %define rnd_1d_rv40 rnd_rv40_1d_tbl
  94. %define rnd_2d_rv40 rnd_rv40_2d_tbl
  95. %define extra_regs 1
  96. %endif ; PIC
  97. %else
  98. %define extra_regs 0
  99. %endif ; rv40
  100. ; put/avg_h264_chroma_mc8_mmx_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/,
  101. ; int stride, int h, int mx, int my)
  102. cglobal %1_%2_chroma_mc8_%3, 6, 7 + extra_regs, 0
  103. %if ARCH_X86_64
  104. movsxd r2, r2d
  105. %endif
  106. mov r6d, r5d
  107. or r6d, r4d
  108. jne .at_least_one_non_zero
  109. ; mx == 0 AND my == 0 - no filter needed
  110. mv0_pixels_mc8
  111. REP_RET
  112. .at_least_one_non_zero
  113. %ifidn %2, rv40
  114. %if ARCH_X86_64
  115. mov r7, r5
  116. and r7, 6 ; &~1 for mx/my=[0,7]
  117. lea r7, [r7*4+r4]
  118. sar r7d, 1
  119. %define rnd_bias r7
  120. %define dest_reg r0
  121. %else ; x86-32
  122. mov r0, r5
  123. and r0, 6 ; &~1 for mx/my=[0,7]
  124. lea r0, [r0*4+r4]
  125. sar r0d, 1
  126. %define rnd_bias r0
  127. %define dest_reg r5
  128. %endif
  129. %else ; vc1, h264
  130. %define rnd_bias 0
  131. %define dest_reg r0
  132. %endif
  133. test r5d, r5d
  134. mov r6, 1
  135. je .my_is_zero
  136. test r4d, r4d
  137. mov r6, r2 ; dxy = x ? 1 : stride
  138. jne .both_non_zero
  139. .my_is_zero
  140. ; mx == 0 XOR my == 0 - 1 dimensional filter only
  141. or r4d, r5d ; x + y
  142. %ifidn %2, rv40
  143. %ifdef PIC
  144. lea r8, [rnd_rv40_1d_tbl]
  145. %endif
  146. %if ARCH_X86_64 == 0
  147. mov r5, r0m
  148. %endif
  149. %endif
  150. movd m5, r4d
  151. movq m4, [pw_8]
  152. movq m6, [rnd_1d_%2+rnd_bias*8] ; mm6 = rnd >> 3
  153. punpcklwd m5, m5
  154. punpckldq m5, m5 ; mm5 = B = x
  155. pxor m7, m7
  156. psubw m4, m5 ; mm4 = A = 8-x
  157. .next1drow
  158. movq m0, [r1 ] ; mm0 = src[0..7]
  159. movq m2, [r1+r6] ; mm1 = src[1..8]
  160. movq m1, m0
  161. movq m3, m2
  162. punpcklbw m0, m7
  163. punpckhbw m1, m7
  164. punpcklbw m2, m7
  165. punpckhbw m3, m7
  166. pmullw m0, m4 ; [mm0,mm1] = A * src[0..7]
  167. pmullw m1, m4
  168. pmullw m2, m5 ; [mm2,mm3] = B * src[1..8]
  169. pmullw m3, m5
  170. paddw m0, m6
  171. paddw m1, m6
  172. paddw m0, m2
  173. paddw m1, m3
  174. psrlw m0, 3
  175. psrlw m1, 3
  176. packuswb m0, m1
  177. CHROMAMC_AVG m0, [dest_reg]
  178. movq [dest_reg], m0 ; dst[0..7] = (A * src[0..7] + B * src[1..8] + (rnd >> 3)) >> 3
  179. add dest_reg, r2
  180. add r1, r2
  181. dec r3d
  182. jne .next1drow
  183. REP_RET
  184. .both_non_zero ; general case, bilinear
  185. movd m4, r4d ; x
  186. movd m6, r5d ; y
  187. %ifidn %2, rv40
  188. %ifdef PIC
  189. lea r8, [rnd_rv40_2d_tbl]
  190. %endif
  191. %if ARCH_X86_64 == 0
  192. mov r5, r0m
  193. %endif
  194. %endif
  195. mov r6, rsp ; backup stack pointer
  196. and rsp, ~(mmsize-1) ; align stack
  197. sub rsp, 16 ; AA and DD
  198. punpcklwd m4, m4
  199. punpcklwd m6, m6
  200. punpckldq m4, m4 ; mm4 = x words
  201. punpckldq m6, m6 ; mm6 = y words
  202. movq m5, m4
  203. pmullw m4, m6 ; mm4 = x * y
  204. psllw m5, 3
  205. psllw m6, 3
  206. movq m7, m5
  207. paddw m7, m6
  208. movq [rsp+8], m4 ; DD = x * y
  209. psubw m5, m4 ; mm5 = B = 8x - xy
  210. psubw m6, m4 ; mm6 = C = 8y - xy
  211. paddw m4, [pw_64]
  212. psubw m4, m7 ; mm4 = A = xy - (8x+8y) + 64
  213. pxor m7, m7
  214. movq [rsp ], m4
  215. movq m0, [r1 ] ; mm0 = src[0..7]
  216. movq m1, [r1+1] ; mm1 = src[1..8]
  217. .next2drow
  218. add r1, r2
  219. movq m2, m0
  220. movq m3, m1
  221. punpckhbw m0, m7
  222. punpcklbw m1, m7
  223. punpcklbw m2, m7
  224. punpckhbw m3, m7
  225. pmullw m0, [rsp]
  226. pmullw m2, [rsp]
  227. pmullw m1, m5
  228. pmullw m3, m5
  229. paddw m2, m1 ; mm2 = A * src[0..3] + B * src[1..4]
  230. paddw m3, m0 ; mm3 = A * src[4..7] + B * src[5..8]
  231. movq m0, [r1]
  232. movq m1, m0
  233. punpcklbw m0, m7
  234. punpckhbw m1, m7
  235. pmullw m0, m6
  236. pmullw m1, m6
  237. paddw m2, m0
  238. paddw m3, m1 ; [mm2,mm3] += C * src[0..7]
  239. movq m1, [r1+1]
  240. movq m0, m1
  241. movq m4, m1
  242. punpcklbw m0, m7
  243. punpckhbw m4, m7
  244. pmullw m0, [rsp+8]
  245. pmullw m4, [rsp+8]
  246. paddw m2, m0
  247. paddw m3, m4 ; [mm2,mm3] += D * src[1..8]
  248. movq m0, [r1]
  249. paddw m2, [rnd_2d_%2+rnd_bias*8]
  250. paddw m3, [rnd_2d_%2+rnd_bias*8]
  251. psrlw m2, 6
  252. psrlw m3, 6
  253. packuswb m2, m3
  254. CHROMAMC_AVG m2, [dest_reg]
  255. movq [dest_reg], m2 ; dst[0..7] = ([mm2,mm3] + rnd) >> 6
  256. add dest_reg, r2
  257. dec r3d
  258. jne .next2drow
  259. mov rsp, r6 ; restore stack pointer
  260. RET
  261. %endmacro
  262. %macro chroma_mc4_mmx_func 3
  263. %define extra_regs 0
  264. %ifidn %2, rv40
  265. %ifdef PIC
  266. %define extra_regs 1
  267. %endif ; PIC
  268. %endif ; rv40
  269. cglobal %1_%2_chroma_mc4_%3, 6, 6 + extra_regs, 0
  270. %if ARCH_X86_64
  271. movsxd r2, r2d
  272. %endif
  273. pxor m7, m7
  274. movd m2, r4d ; x
  275. movd m3, r5d ; y
  276. movq m4, [pw_8]
  277. movq m5, [pw_8]
  278. punpcklwd m2, m2
  279. punpcklwd m3, m3
  280. punpcklwd m2, m2
  281. punpcklwd m3, m3
  282. psubw m4, m2
  283. psubw m5, m3
  284. %ifidn %2, rv40
  285. %ifdef PIC
  286. lea r6, [rnd_rv40_2d_tbl]
  287. %define rnd_2d_rv40 r6
  288. %else
  289. %define rnd_2d_rv40 rnd_rv40_2d_tbl
  290. %endif
  291. and r5, 6 ; &~1 for mx/my=[0,7]
  292. lea r5, [r5*4+r4]
  293. sar r5d, 1
  294. %define rnd_bias r5
  295. %else ; vc1, h264
  296. %define rnd_bias 0
  297. %endif
  298. movd m0, [r1 ]
  299. movd m6, [r1+1]
  300. add r1, r2
  301. punpcklbw m0, m7
  302. punpcklbw m6, m7
  303. pmullw m0, m4
  304. pmullw m6, m2
  305. paddw m6, m0
  306. .next2rows
  307. movd m0, [r1 ]
  308. movd m1, [r1+1]
  309. add r1, r2
  310. punpcklbw m0, m7
  311. punpcklbw m1, m7
  312. pmullw m0, m4
  313. pmullw m1, m2
  314. paddw m1, m0
  315. movq m0, m1
  316. pmullw m6, m5
  317. pmullw m1, m3
  318. paddw m6, [rnd_2d_%2+rnd_bias*8]
  319. paddw m1, m6
  320. psrlw m1, 6
  321. packuswb m1, m1
  322. CHROMAMC_AVG4 m1, m6, [r0]
  323. movd [r0], m1
  324. add r0, r2
  325. movd m6, [r1 ]
  326. movd m1, [r1+1]
  327. add r1, r2
  328. punpcklbw m6, m7
  329. punpcklbw m1, m7
  330. pmullw m6, m4
  331. pmullw m1, m2
  332. paddw m1, m6
  333. movq m6, m1
  334. pmullw m0, m5
  335. pmullw m1, m3
  336. paddw m0, [rnd_2d_%2+rnd_bias*8]
  337. paddw m1, m0
  338. psrlw m1, 6
  339. packuswb m1, m1
  340. CHROMAMC_AVG4 m1, m0, [r0]
  341. movd [r0], m1
  342. add r0, r2
  343. sub r3d, 2
  344. jnz .next2rows
  345. REP_RET
  346. %endmacro
  347. %macro chroma_mc2_mmx_func 3
  348. cglobal %1_%2_chroma_mc2_%3, 6, 7, 0
  349. %if ARCH_X86_64
  350. movsxd r2, r2d
  351. %endif
  352. mov r6d, r4d
  353. shl r4d, 16
  354. sub r4d, r6d
  355. add r4d, 8
  356. imul r5d, r4d ; x*y<<16 | y*(8-x)
  357. shl r4d, 3
  358. sub r4d, r5d ; x*(8-y)<<16 | (8-x)*(8-y)
  359. movd m5, r4d
  360. movd m6, r5d
  361. punpckldq m5, m5 ; mm5 = {A,B,A,B}
  362. punpckldq m6, m6 ; mm6 = {C,D,C,D}
  363. pxor m7, m7
  364. movd m2, [r1]
  365. punpcklbw m2, m7
  366. pshufw m2, m2, 0x94 ; mm0 = src[0,1,1,2]
  367. .nextrow
  368. add r1, r2
  369. movq m1, m2
  370. pmaddwd m1, m5 ; mm1 = A * src[0,1] + B * src[1,2]
  371. movd m0, [r1]
  372. punpcklbw m0, m7
  373. pshufw m0, m0, 0x94 ; mm0 = src[0,1,1,2]
  374. movq m2, m0
  375. pmaddwd m0, m6
  376. paddw m1, [rnd_2d_%2]
  377. paddw m1, m0 ; mm1 += C * src[0,1] + D * src[1,2]
  378. psrlw m1, 6
  379. packssdw m1, m7
  380. packuswb m1, m7
  381. CHROMAMC_AVG4 m1, m3, [r0]
  382. movd r5d, m1
  383. mov [r0], r5w
  384. add r0, r2
  385. sub r3d, 1
  386. jnz .nextrow
  387. REP_RET
  388. %endmacro
  389. %define rnd_1d_h264 pw_4
  390. %define rnd_2d_h264 pw_32
  391. %define rnd_1d_vc1 pw_3
  392. %define rnd_2d_vc1 pw_28
  393. %macro NOTHING 2-3
  394. %endmacro
  395. %macro DIRECT_AVG 2
  396. PAVG %1, %2
  397. %endmacro
  398. %macro COPY_AVG 3
  399. movd %2, %3
  400. PAVG %1, %2
  401. %endmacro
  402. INIT_MMX
  403. %define CHROMAMC_AVG NOTHING
  404. %define CHROMAMC_AVG4 NOTHING
  405. chroma_mc8_mmx_func put, h264, mmx_rnd
  406. chroma_mc8_mmx_func put, vc1, mmx_nornd
  407. chroma_mc8_mmx_func put, rv40, mmx
  408. chroma_mc4_mmx_func put, h264, mmx
  409. chroma_mc4_mmx_func put, rv40, mmx
  410. chroma_mc2_mmx_func put, h264, mmx2
  411. %define CHROMAMC_AVG DIRECT_AVG
  412. %define CHROMAMC_AVG4 COPY_AVG
  413. %define PAVG pavgb
  414. chroma_mc8_mmx_func avg, h264, mmx2_rnd
  415. chroma_mc8_mmx_func avg, vc1, mmx2_nornd
  416. chroma_mc8_mmx_func avg, rv40, mmx2
  417. chroma_mc4_mmx_func avg, h264, mmx2
  418. chroma_mc4_mmx_func avg, rv40, mmx2
  419. chroma_mc2_mmx_func avg, h264, mmx2
  420. %define PAVG pavgusb
  421. chroma_mc8_mmx_func avg, h264, 3dnow_rnd
  422. chroma_mc8_mmx_func avg, vc1, 3dnow_nornd
  423. chroma_mc8_mmx_func avg, rv40, 3dnow
  424. chroma_mc4_mmx_func avg, h264, 3dnow
  425. chroma_mc4_mmx_func avg, rv40, 3dnow
  426. %macro chroma_mc8_ssse3_func 3
  427. cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
  428. %if ARCH_X86_64
  429. movsxd r2, r2d
  430. %endif
  431. mov r6d, r5d
  432. or r6d, r4d
  433. jne .at_least_one_non_zero
  434. ; mx == 0 AND my == 0 - no filter needed
  435. mv0_pixels_mc8
  436. REP_RET
  437. .at_least_one_non_zero
  438. test r5d, r5d
  439. je .my_is_zero
  440. test r4d, r4d
  441. je .mx_is_zero
  442. ; general case, bilinear
  443. mov r6d, r4d
  444. shl r4d, 8
  445. sub r4, r6
  446. mov r6, 8
  447. add r4, 8 ; x*288+8 = x<<8 | (8-x)
  448. sub r6d, r5d
  449. imul r6, r4 ; (8-y)*(x*255+8) = (8-y)*x<<8 | (8-y)*(8-x)
  450. imul r4d, r5d ; y *(x*255+8) = y *x<<8 | y *(8-x)
  451. movd m7, r6d
  452. movd m6, r4d
  453. movdqa m5, [rnd_2d_%2]
  454. movq m0, [r1 ]
  455. movq m1, [r1+1]
  456. pshuflw m7, m7, 0
  457. pshuflw m6, m6, 0
  458. punpcklbw m0, m1
  459. movlhps m7, m7
  460. movlhps m6, m6
  461. .next2rows
  462. movq m1, [r1+r2*1 ]
  463. movq m2, [r1+r2*1+1]
  464. movq m3, [r1+r2*2 ]
  465. movq m4, [r1+r2*2+1]
  466. lea r1, [r1+r2*2]
  467. punpcklbw m1, m2
  468. movdqa m2, m1
  469. punpcklbw m3, m4
  470. movdqa m4, m3
  471. pmaddubsw m0, m7
  472. pmaddubsw m1, m6
  473. pmaddubsw m2, m7
  474. pmaddubsw m3, m6
  475. paddw m0, m5
  476. paddw m2, m5
  477. paddw m1, m0
  478. paddw m3, m2
  479. psrlw m1, 6
  480. movdqa m0, m4
  481. psrlw m3, 6
  482. %ifidn %1, avg
  483. movq m2, [r0 ]
  484. movhps m2, [r0+r2]
  485. %endif
  486. packuswb m1, m3
  487. CHROMAMC_AVG m1, m2
  488. movq [r0 ], m1
  489. movhps [r0+r2], m1
  490. sub r3d, 2
  491. lea r0, [r0+r2*2]
  492. jg .next2rows
  493. REP_RET
  494. .my_is_zero
  495. mov r5d, r4d
  496. shl r4d, 8
  497. add r4, 8
  498. sub r4, r5 ; 255*x+8 = x<<8 | (8-x)
  499. movd m7, r4d
  500. movdqa m6, [rnd_1d_%2]
  501. pshuflw m7, m7, 0
  502. movlhps m7, m7
  503. .next2xrows
  504. movq m0, [r1 ]
  505. movq m1, [r1 +1]
  506. movq m2, [r1+r2 ]
  507. movq m3, [r1+r2+1]
  508. punpcklbw m0, m1
  509. punpcklbw m2, m3
  510. pmaddubsw m0, m7
  511. pmaddubsw m2, m7
  512. %ifidn %1, avg
  513. movq m4, [r0 ]
  514. movhps m4, [r0+r2]
  515. %endif
  516. paddw m0, m6
  517. paddw m2, m6
  518. psrlw m0, 3
  519. psrlw m2, 3
  520. packuswb m0, m2
  521. CHROMAMC_AVG m0, m4
  522. movq [r0 ], m0
  523. movhps [r0+r2], m0
  524. sub r3d, 2
  525. lea r0, [r0+r2*2]
  526. lea r1, [r1+r2*2]
  527. jg .next2xrows
  528. REP_RET
  529. .mx_is_zero
  530. mov r4d, r5d
  531. shl r5d, 8
  532. add r5, 8
  533. sub r5, r4 ; 255*y+8 = y<<8 | (8-y)
  534. movd m7, r5d
  535. movdqa m6, [rnd_1d_%2]
  536. pshuflw m7, m7, 0
  537. movlhps m7, m7
  538. .next2yrows
  539. movq m0, [r1 ]
  540. movq m1, [r1+r2 ]
  541. movdqa m2, m1
  542. movq m3, [r1+r2*2]
  543. lea r1, [r1+r2*2]
  544. punpcklbw m0, m1
  545. punpcklbw m2, m3
  546. pmaddubsw m0, m7
  547. pmaddubsw m2, m7
  548. %ifidn %1, avg
  549. movq m4, [r0 ]
  550. movhps m4, [r0+r2]
  551. %endif
  552. paddw m0, m6
  553. paddw m2, m6
  554. psrlw m0, 3
  555. psrlw m2, 3
  556. packuswb m0, m2
  557. CHROMAMC_AVG m0, m4
  558. movq [r0 ], m0
  559. movhps [r0+r2], m0
  560. sub r3d, 2
  561. lea r0, [r0+r2*2]
  562. jg .next2yrows
  563. REP_RET
  564. %endmacro
  565. %macro chroma_mc4_ssse3_func 3
  566. cglobal %1_%2_chroma_mc4_%3, 6, 7, 0
  567. %if ARCH_X86_64
  568. movsxd r2, r2d
  569. %endif
  570. mov r6, r4
  571. shl r4d, 8
  572. sub r4d, r6d
  573. mov r6, 8
  574. add r4d, 8 ; x*288+8
  575. sub r6d, r5d
  576. imul r6d, r4d ; (8-y)*(x*255+8) = (8-y)*x<<8 | (8-y)*(8-x)
  577. imul r4d, r5d ; y *(x*255+8) = y *x<<8 | y *(8-x)
  578. movd m7, r6d
  579. movd m6, r4d
  580. movq m5, [pw_32]
  581. movd m0, [r1 ]
  582. pshufw m7, m7, 0
  583. punpcklbw m0, [r1+1]
  584. pshufw m6, m6, 0
  585. .next2rows
  586. movd m1, [r1+r2*1 ]
  587. movd m3, [r1+r2*2 ]
  588. punpcklbw m1, [r1+r2*1+1]
  589. punpcklbw m3, [r1+r2*2+1]
  590. lea r1, [r1+r2*2]
  591. movq m2, m1
  592. movq m4, m3
  593. pmaddubsw m0, m7
  594. pmaddubsw m1, m6
  595. pmaddubsw m2, m7
  596. pmaddubsw m3, m6
  597. paddw m0, m5
  598. paddw m2, m5
  599. paddw m1, m0
  600. paddw m3, m2
  601. psrlw m1, 6
  602. movq m0, m4
  603. psrlw m3, 6
  604. packuswb m1, m1
  605. packuswb m3, m3
  606. CHROMAMC_AVG m1, [r0 ]
  607. CHROMAMC_AVG m3, [r0+r2]
  608. movd [r0 ], m1
  609. movd [r0+r2], m3
  610. sub r3d, 2
  611. lea r0, [r0+r2*2]
  612. jg .next2rows
  613. REP_RET
  614. %endmacro
  615. %define CHROMAMC_AVG NOTHING
  616. INIT_XMM
  617. chroma_mc8_ssse3_func put, h264, ssse3_rnd
  618. chroma_mc8_ssse3_func put, vc1, ssse3_nornd
  619. INIT_MMX
  620. chroma_mc4_ssse3_func put, h264, ssse3
  621. %define CHROMAMC_AVG DIRECT_AVG
  622. %define PAVG pavgb
  623. INIT_XMM
  624. chroma_mc8_ssse3_func avg, h264, ssse3_rnd
  625. chroma_mc8_ssse3_func avg, vc1, ssse3_nornd
  626. INIT_MMX
  627. chroma_mc4_ssse3_func avg, h264, ssse3