You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

682 lines
17KB

  1. ;******************************************************************************
  2. ;* MMX/SSSE3-optimized functions for H264 chroma MC
  3. ;* Copyright (c) 2005 Zoltan Hidvegi <hzoli -a- hzoli -d- com>,
  4. ;* 2005-2008 Loren Merritt
  5. ;*
  6. ;* This file is part of FFmpeg.
  7. ;*
  8. ;* FFmpeg is free software; you can redistribute it and/or
  9. ;* modify it under the terms of the GNU Lesser General Public
  10. ;* License as published by the Free Software Foundation; either
  11. ;* version 2.1 of the License, or (at your option) any later version.
  12. ;*
  13. ;* FFmpeg is distributed in the hope that it will be useful,
  14. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. ;* Lesser General Public License for more details.
  17. ;*
  18. ;* You should have received a copy of the GNU Lesser General Public
  19. ;* License along with FFmpeg; if not, write to the Free Software
  20. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. ;******************************************************************************
  22. %include "libavutil/x86/x86util.asm"
  23. SECTION_RODATA
  24. rnd_rv40_2d_tbl: times 4 dw 0
  25. times 4 dw 16
  26. times 4 dw 32
  27. times 4 dw 16
  28. times 4 dw 32
  29. times 4 dw 28
  30. times 4 dw 32
  31. times 4 dw 28
  32. times 4 dw 0
  33. times 4 dw 32
  34. times 4 dw 16
  35. times 4 dw 32
  36. times 4 dw 32
  37. times 4 dw 28
  38. times 4 dw 32
  39. times 4 dw 28
  40. rnd_rv40_1d_tbl: times 4 dw 0
  41. times 4 dw 2
  42. times 4 dw 4
  43. times 4 dw 2
  44. times 4 dw 4
  45. times 4 dw 3
  46. times 4 dw 4
  47. times 4 dw 3
  48. times 4 dw 0
  49. times 4 dw 4
  50. times 4 dw 2
  51. times 4 dw 4
  52. times 4 dw 4
  53. times 4 dw 3
  54. times 4 dw 4
  55. times 4 dw 3
  56. cextern pw_3
  57. cextern pw_4
  58. cextern pw_8
  59. cextern pw_28
  60. cextern pw_32
  61. cextern pw_64
  62. SECTION .text
  63. %macro mv0_pixels_mc8 0
  64. lea r4, [r2*2 ]
  65. .next4rows:
  66. movq mm0, [r1 ]
  67. movq mm1, [r1+r2]
  68. add r1, r4
  69. CHROMAMC_AVG mm0, [r0 ]
  70. CHROMAMC_AVG mm1, [r0+r2]
  71. movq [r0 ], mm0
  72. movq [r0+r2], mm1
  73. add r0, r4
  74. movq mm0, [r1 ]
  75. movq mm1, [r1+r2]
  76. add r1, r4
  77. CHROMAMC_AVG mm0, [r0 ]
  78. CHROMAMC_AVG mm1, [r0+r2]
  79. movq [r0 ], mm0
  80. movq [r0+r2], mm1
  81. add r0, r4
  82. sub r3d, 4
  83. jne .next4rows
  84. %endmacro
  85. %macro chroma_mc8_mmx_func 2-3
  86. %ifidn %2, rv40
  87. %ifdef PIC
  88. %define rnd_1d_rv40 r8
  89. %define rnd_2d_rv40 r8
  90. %define extra_regs 2
  91. %else ; no-PIC
  92. %define rnd_1d_rv40 rnd_rv40_1d_tbl
  93. %define rnd_2d_rv40 rnd_rv40_2d_tbl
  94. %define extra_regs 1
  95. %endif ; PIC
  96. %else
  97. %define extra_regs 0
  98. %endif ; rv40
  99. ; put/avg_h264_chroma_mc8_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/,
  100. ; int stride, int h, int mx, int my)
  101. cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
  102. %if ARCH_X86_64
  103. movsxd r2, r2d
  104. %endif
  105. mov r6d, r5d
  106. or r6d, r4d
  107. jne .at_least_one_non_zero
  108. ; mx == 0 AND my == 0 - no filter needed
  109. mv0_pixels_mc8
  110. REP_RET
  111. .at_least_one_non_zero:
  112. %ifidn %2, rv40
  113. %if ARCH_X86_64
  114. mov r7, r5
  115. and r7, 6 ; &~1 for mx/my=[0,7]
  116. lea r7, [r7*4+r4]
  117. sar r7d, 1
  118. %define rnd_bias r7
  119. %define dest_reg r0
  120. %else ; x86-32
  121. mov r0, r5
  122. and r0, 6 ; &~1 for mx/my=[0,7]
  123. lea r0, [r0*4+r4]
  124. sar r0d, 1
  125. %define rnd_bias r0
  126. %define dest_reg r5
  127. %endif
  128. %else ; vc1, h264
  129. %define rnd_bias 0
  130. %define dest_reg r0
  131. %endif
  132. test r5d, r5d
  133. mov r6, 1
  134. je .my_is_zero
  135. test r4d, r4d
  136. mov r6, r2 ; dxy = x ? 1 : stride
  137. jne .both_non_zero
  138. .my_is_zero:
  139. ; mx == 0 XOR my == 0 - 1 dimensional filter only
  140. or r4d, r5d ; x + y
  141. %ifidn %2, rv40
  142. %ifdef PIC
  143. lea r8, [rnd_rv40_1d_tbl]
  144. %endif
  145. %if ARCH_X86_64 == 0
  146. mov r5, r0m
  147. %endif
  148. %endif
  149. movd m5, r4d
  150. movq m4, [pw_8]
  151. movq m6, [rnd_1d_%2+rnd_bias*8] ; mm6 = rnd >> 3
  152. punpcklwd m5, m5
  153. punpckldq m5, m5 ; mm5 = B = x
  154. pxor m7, m7
  155. psubw m4, m5 ; mm4 = A = 8-x
  156. .next1drow:
  157. movq m0, [r1 ] ; mm0 = src[0..7]
  158. movq m2, [r1+r6] ; mm1 = src[1..8]
  159. movq m1, m0
  160. movq m3, m2
  161. punpcklbw m0, m7
  162. punpckhbw m1, m7
  163. punpcklbw m2, m7
  164. punpckhbw m3, m7
  165. pmullw m0, m4 ; [mm0,mm1] = A * src[0..7]
  166. pmullw m1, m4
  167. pmullw m2, m5 ; [mm2,mm3] = B * src[1..8]
  168. pmullw m3, m5
  169. paddw m0, m6
  170. paddw m1, m6
  171. paddw m0, m2
  172. paddw m1, m3
  173. psrlw m0, 3
  174. psrlw m1, 3
  175. packuswb m0, m1
  176. CHROMAMC_AVG m0, [dest_reg]
  177. movq [dest_reg], m0 ; dst[0..7] = (A * src[0..7] + B * src[1..8] + (rnd >> 3)) >> 3
  178. add dest_reg, r2
  179. add r1, r2
  180. dec r3d
  181. jne .next1drow
  182. REP_RET
  183. .both_non_zero: ; general case, bilinear
  184. movd m4, r4d ; x
  185. movd m6, r5d ; y
  186. %ifidn %2, rv40
  187. %ifdef PIC
  188. lea r8, [rnd_rv40_2d_tbl]
  189. %endif
  190. %if ARCH_X86_64 == 0
  191. mov r5, r0m
  192. %endif
  193. %endif
  194. mov r6, rsp ; backup stack pointer
  195. and rsp, ~(mmsize-1) ; align stack
  196. sub rsp, 16 ; AA and DD
  197. punpcklwd m4, m4
  198. punpcklwd m6, m6
  199. punpckldq m4, m4 ; mm4 = x words
  200. punpckldq m6, m6 ; mm6 = y words
  201. movq m5, m4
  202. pmullw m4, m6 ; mm4 = x * y
  203. psllw m5, 3
  204. psllw m6, 3
  205. movq m7, m5
  206. paddw m7, m6
  207. movq [rsp+8], m4 ; DD = x * y
  208. psubw m5, m4 ; mm5 = B = 8x - xy
  209. psubw m6, m4 ; mm6 = C = 8y - xy
  210. paddw m4, [pw_64]
  211. psubw m4, m7 ; mm4 = A = xy - (8x+8y) + 64
  212. pxor m7, m7
  213. movq [rsp ], m4
  214. movq m0, [r1 ] ; mm0 = src[0..7]
  215. movq m1, [r1+1] ; mm1 = src[1..8]
  216. .next2drow:
  217. add r1, r2
  218. movq m2, m0
  219. movq m3, m1
  220. punpckhbw m0, m7
  221. punpcklbw m1, m7
  222. punpcklbw m2, m7
  223. punpckhbw m3, m7
  224. pmullw m0, [rsp]
  225. pmullw m2, [rsp]
  226. pmullw m1, m5
  227. pmullw m3, m5
  228. paddw m2, m1 ; mm2 = A * src[0..3] + B * src[1..4]
  229. paddw m3, m0 ; mm3 = A * src[4..7] + B * src[5..8]
  230. movq m0, [r1]
  231. movq m1, m0
  232. punpcklbw m0, m7
  233. punpckhbw m1, m7
  234. pmullw m0, m6
  235. pmullw m1, m6
  236. paddw m2, m0
  237. paddw m3, m1 ; [mm2,mm3] += C * src[0..7]
  238. movq m1, [r1+1]
  239. movq m0, m1
  240. movq m4, m1
  241. punpcklbw m0, m7
  242. punpckhbw m4, m7
  243. pmullw m0, [rsp+8]
  244. pmullw m4, [rsp+8]
  245. paddw m2, m0
  246. paddw m3, m4 ; [mm2,mm3] += D * src[1..8]
  247. movq m0, [r1]
  248. paddw m2, [rnd_2d_%2+rnd_bias*8]
  249. paddw m3, [rnd_2d_%2+rnd_bias*8]
  250. psrlw m2, 6
  251. psrlw m3, 6
  252. packuswb m2, m3
  253. CHROMAMC_AVG m2, [dest_reg]
  254. movq [dest_reg], m2 ; dst[0..7] = ([mm2,mm3] + rnd) >> 6
  255. add dest_reg, r2
  256. dec r3d
  257. jne .next2drow
  258. mov rsp, r6 ; restore stack pointer
  259. RET
  260. %endmacro
  261. %macro chroma_mc4_mmx_func 2
  262. %define extra_regs 0
  263. %ifidn %2, rv40
  264. %ifdef PIC
  265. %define extra_regs 1
  266. %endif ; PIC
  267. %endif ; rv40
  268. cglobal %1_%2_chroma_mc4, 6, 6 + extra_regs, 0
  269. %if ARCH_X86_64
  270. movsxd r2, r2d
  271. %endif
  272. pxor m7, m7
  273. movd m2, r4d ; x
  274. movd m3, r5d ; y
  275. movq m4, [pw_8]
  276. movq m5, [pw_8]
  277. punpcklwd m2, m2
  278. punpcklwd m3, m3
  279. punpcklwd m2, m2
  280. punpcklwd m3, m3
  281. psubw m4, m2
  282. psubw m5, m3
  283. %ifidn %2, rv40
  284. %ifdef PIC
  285. lea r6, [rnd_rv40_2d_tbl]
  286. %define rnd_2d_rv40 r6
  287. %else
  288. %define rnd_2d_rv40 rnd_rv40_2d_tbl
  289. %endif
  290. and r5, 6 ; &~1 for mx/my=[0,7]
  291. lea r5, [r5*4+r4]
  292. sar r5d, 1
  293. %define rnd_bias r5
  294. %else ; vc1, h264
  295. %define rnd_bias 0
  296. %endif
  297. movd m0, [r1 ]
  298. movd m6, [r1+1]
  299. add r1, r2
  300. punpcklbw m0, m7
  301. punpcklbw m6, m7
  302. pmullw m0, m4
  303. pmullw m6, m2
  304. paddw m6, m0
  305. .next2rows:
  306. movd m0, [r1 ]
  307. movd m1, [r1+1]
  308. add r1, r2
  309. punpcklbw m0, m7
  310. punpcklbw m1, m7
  311. pmullw m0, m4
  312. pmullw m1, m2
  313. paddw m1, m0
  314. movq m0, m1
  315. pmullw m6, m5
  316. pmullw m1, m3
  317. paddw m6, [rnd_2d_%2+rnd_bias*8]
  318. paddw m1, m6
  319. psrlw m1, 6
  320. packuswb m1, m1
  321. CHROMAMC_AVG4 m1, m6, [r0]
  322. movd [r0], m1
  323. add r0, r2
  324. movd m6, [r1 ]
  325. movd m1, [r1+1]
  326. add r1, r2
  327. punpcklbw m6, m7
  328. punpcklbw m1, m7
  329. pmullw m6, m4
  330. pmullw m1, m2
  331. paddw m1, m6
  332. movq m6, m1
  333. pmullw m0, m5
  334. pmullw m1, m3
  335. paddw m0, [rnd_2d_%2+rnd_bias*8]
  336. paddw m1, m0
  337. psrlw m1, 6
  338. packuswb m1, m1
  339. CHROMAMC_AVG4 m1, m0, [r0]
  340. movd [r0], m1
  341. add r0, r2
  342. sub r3d, 2
  343. jnz .next2rows
  344. REP_RET
  345. %endmacro
  346. %macro chroma_mc2_mmx_func 2
  347. cglobal %1_%2_chroma_mc2, 6, 7, 0
  348. %if ARCH_X86_64
  349. movsxd r2, r2d
  350. %endif
  351. mov r6d, r4d
  352. shl r4d, 16
  353. sub r4d, r6d
  354. add r4d, 8
  355. imul r5d, r4d ; x*y<<16 | y*(8-x)
  356. shl r4d, 3
  357. sub r4d, r5d ; x*(8-y)<<16 | (8-x)*(8-y)
  358. movd m5, r4d
  359. movd m6, r5d
  360. punpckldq m5, m5 ; mm5 = {A,B,A,B}
  361. punpckldq m6, m6 ; mm6 = {C,D,C,D}
  362. pxor m7, m7
  363. movd m2, [r1]
  364. punpcklbw m2, m7
  365. pshufw m2, m2, 0x94 ; mm0 = src[0,1,1,2]
  366. .nextrow:
  367. add r1, r2
  368. movq m1, m2
  369. pmaddwd m1, m5 ; mm1 = A * src[0,1] + B * src[1,2]
  370. movd m0, [r1]
  371. punpcklbw m0, m7
  372. pshufw m0, m0, 0x94 ; mm0 = src[0,1,1,2]
  373. movq m2, m0
  374. pmaddwd m0, m6
  375. paddw m1, [rnd_2d_%2]
  376. paddw m1, m0 ; mm1 += C * src[0,1] + D * src[1,2]
  377. psrlw m1, 6
  378. packssdw m1, m7
  379. packuswb m1, m7
  380. CHROMAMC_AVG4 m1, m3, [r0]
  381. movd r5d, m1
  382. mov [r0], r5w
  383. add r0, r2
  384. sub r3d, 1
  385. jnz .nextrow
  386. REP_RET
  387. %endmacro
  388. %define rnd_1d_h264 pw_4
  389. %define rnd_2d_h264 pw_32
  390. %define rnd_1d_vc1 pw_3
  391. %define rnd_2d_vc1 pw_28
  392. %macro NOTHING 2-3
  393. %endmacro
  394. %macro DIRECT_AVG 2
  395. PAVG %1, %2
  396. %endmacro
  397. %macro COPY_AVG 3
  398. movd %2, %3
  399. PAVG %1, %2
  400. %endmacro
  401. INIT_MMX mmx
  402. %define CHROMAMC_AVG NOTHING
  403. %define CHROMAMC_AVG4 NOTHING
  404. chroma_mc8_mmx_func put, h264, _rnd
  405. chroma_mc8_mmx_func put, vc1, _nornd
  406. chroma_mc8_mmx_func put, rv40
  407. chroma_mc4_mmx_func put, h264
  408. chroma_mc4_mmx_func put, rv40
  409. INIT_MMX mmxext
  410. chroma_mc2_mmx_func put, h264
  411. %define CHROMAMC_AVG DIRECT_AVG
  412. %define CHROMAMC_AVG4 COPY_AVG
  413. %define PAVG pavgb
  414. chroma_mc8_mmx_func avg, h264, _rnd
  415. chroma_mc8_mmx_func avg, vc1, _nornd
  416. chroma_mc8_mmx_func avg, rv40
  417. chroma_mc4_mmx_func avg, h264
  418. chroma_mc4_mmx_func avg, rv40
  419. chroma_mc2_mmx_func avg, h264
  420. %define PAVG pavgusb
  421. INIT_MMX 3dnow
  422. chroma_mc8_mmx_func avg, h264, _rnd
  423. chroma_mc8_mmx_func avg, vc1, _nornd
  424. chroma_mc8_mmx_func avg, rv40
  425. chroma_mc4_mmx_func avg, h264
  426. chroma_mc4_mmx_func avg, rv40
  427. %macro chroma_mc8_ssse3_func 2-3
  428. cglobal %1_%2_chroma_mc8%3, 6, 7, 8
  429. %if ARCH_X86_64
  430. movsxd r2, r2d
  431. %endif
  432. mov r6d, r5d
  433. or r6d, r4d
  434. jne .at_least_one_non_zero
  435. ; mx == 0 AND my == 0 - no filter needed
  436. mv0_pixels_mc8
  437. REP_RET
  438. .at_least_one_non_zero:
  439. test r5d, r5d
  440. je .my_is_zero
  441. test r4d, r4d
  442. je .mx_is_zero
  443. ; general case, bilinear
  444. mov r6d, r4d
  445. shl r4d, 8
  446. sub r4, r6
  447. mov r6, 8
  448. add r4, 8 ; x*288+8 = x<<8 | (8-x)
  449. sub r6d, r5d
  450. imul r6, r4 ; (8-y)*(x*255+8) = (8-y)*x<<8 | (8-y)*(8-x)
  451. imul r4d, r5d ; y *(x*255+8) = y *x<<8 | y *(8-x)
  452. movd m7, r6d
  453. movd m6, r4d
  454. movdqa m5, [rnd_2d_%2]
  455. movq m0, [r1 ]
  456. movq m1, [r1+1]
  457. pshuflw m7, m7, 0
  458. pshuflw m6, m6, 0
  459. punpcklbw m0, m1
  460. movlhps m7, m7
  461. movlhps m6, m6
  462. .next2rows:
  463. movq m1, [r1+r2*1 ]
  464. movq m2, [r1+r2*1+1]
  465. movq m3, [r1+r2*2 ]
  466. movq m4, [r1+r2*2+1]
  467. lea r1, [r1+r2*2]
  468. punpcklbw m1, m2
  469. movdqa m2, m1
  470. punpcklbw m3, m4
  471. movdqa m4, m3
  472. pmaddubsw m0, m7
  473. pmaddubsw m1, m6
  474. pmaddubsw m2, m7
  475. pmaddubsw m3, m6
  476. paddw m0, m5
  477. paddw m2, m5
  478. paddw m1, m0
  479. paddw m3, m2
  480. psrlw m1, 6
  481. movdqa m0, m4
  482. psrlw m3, 6
  483. %ifidn %1, avg
  484. movq m2, [r0 ]
  485. movhps m2, [r0+r2]
  486. %endif
  487. packuswb m1, m3
  488. CHROMAMC_AVG m1, m2
  489. movq [r0 ], m1
  490. movhps [r0+r2], m1
  491. sub r3d, 2
  492. lea r0, [r0+r2*2]
  493. jg .next2rows
  494. REP_RET
  495. .my_is_zero:
  496. mov r5d, r4d
  497. shl r4d, 8
  498. add r4, 8
  499. sub r4, r5 ; 255*x+8 = x<<8 | (8-x)
  500. movd m7, r4d
  501. movdqa m6, [rnd_1d_%2]
  502. pshuflw m7, m7, 0
  503. movlhps m7, m7
  504. .next2xrows:
  505. movq m0, [r1 ]
  506. movq m1, [r1 +1]
  507. movq m2, [r1+r2 ]
  508. movq m3, [r1+r2+1]
  509. punpcklbw m0, m1
  510. punpcklbw m2, m3
  511. pmaddubsw m0, m7
  512. pmaddubsw m2, m7
  513. %ifidn %1, avg
  514. movq m4, [r0 ]
  515. movhps m4, [r0+r2]
  516. %endif
  517. paddw m0, m6
  518. paddw m2, m6
  519. psrlw m0, 3
  520. psrlw m2, 3
  521. packuswb m0, m2
  522. CHROMAMC_AVG m0, m4
  523. movq [r0 ], m0
  524. movhps [r0+r2], m0
  525. sub r3d, 2
  526. lea r0, [r0+r2*2]
  527. lea r1, [r1+r2*2]
  528. jg .next2xrows
  529. REP_RET
  530. .mx_is_zero:
  531. mov r4d, r5d
  532. shl r5d, 8
  533. add r5, 8
  534. sub r5, r4 ; 255*y+8 = y<<8 | (8-y)
  535. movd m7, r5d
  536. movdqa m6, [rnd_1d_%2]
  537. pshuflw m7, m7, 0
  538. movlhps m7, m7
  539. .next2yrows:
  540. movq m0, [r1 ]
  541. movq m1, [r1+r2 ]
  542. movdqa m2, m1
  543. movq m3, [r1+r2*2]
  544. lea r1, [r1+r2*2]
  545. punpcklbw m0, m1
  546. punpcklbw m2, m3
  547. pmaddubsw m0, m7
  548. pmaddubsw m2, m7
  549. %ifidn %1, avg
  550. movq m4, [r0 ]
  551. movhps m4, [r0+r2]
  552. %endif
  553. paddw m0, m6
  554. paddw m2, m6
  555. psrlw m0, 3
  556. psrlw m2, 3
  557. packuswb m0, m2
  558. CHROMAMC_AVG m0, m4
  559. movq [r0 ], m0
  560. movhps [r0+r2], m0
  561. sub r3d, 2
  562. lea r0, [r0+r2*2]
  563. jg .next2yrows
  564. REP_RET
  565. %endmacro
  566. %macro chroma_mc4_ssse3_func 2
  567. cglobal %1_%2_chroma_mc4, 6, 7, 0
  568. %if ARCH_X86_64
  569. movsxd r2, r2d
  570. %endif
  571. mov r6, r4
  572. shl r4d, 8
  573. sub r4d, r6d
  574. mov r6, 8
  575. add r4d, 8 ; x*288+8
  576. sub r6d, r5d
  577. imul r6d, r4d ; (8-y)*(x*255+8) = (8-y)*x<<8 | (8-y)*(8-x)
  578. imul r4d, r5d ; y *(x*255+8) = y *x<<8 | y *(8-x)
  579. movd m7, r6d
  580. movd m6, r4d
  581. movq m5, [pw_32]
  582. movd m0, [r1 ]
  583. pshufw m7, m7, 0
  584. punpcklbw m0, [r1+1]
  585. pshufw m6, m6, 0
  586. .next2rows:
  587. movd m1, [r1+r2*1 ]
  588. movd m3, [r1+r2*2 ]
  589. punpcklbw m1, [r1+r2*1+1]
  590. punpcklbw m3, [r1+r2*2+1]
  591. lea r1, [r1+r2*2]
  592. movq m2, m1
  593. movq m4, m3
  594. pmaddubsw m0, m7
  595. pmaddubsw m1, m6
  596. pmaddubsw m2, m7
  597. pmaddubsw m3, m6
  598. paddw m0, m5
  599. paddw m2, m5
  600. paddw m1, m0
  601. paddw m3, m2
  602. psrlw m1, 6
  603. movq m0, m4
  604. psrlw m3, 6
  605. packuswb m1, m1
  606. packuswb m3, m3
  607. CHROMAMC_AVG m1, [r0 ]
  608. CHROMAMC_AVG m3, [r0+r2]
  609. movd [r0 ], m1
  610. movd [r0+r2], m3
  611. sub r3d, 2
  612. lea r0, [r0+r2*2]
  613. jg .next2rows
  614. REP_RET
  615. %endmacro
  616. %define CHROMAMC_AVG NOTHING
  617. INIT_XMM ssse3
  618. chroma_mc8_ssse3_func put, h264, _rnd
  619. chroma_mc8_ssse3_func put, vc1, _nornd
  620. INIT_MMX ssse3
  621. chroma_mc4_ssse3_func put, h264
  622. %define CHROMAMC_AVG DIRECT_AVG
  623. %define PAVG pavgb
  624. INIT_XMM ssse3
  625. chroma_mc8_ssse3_func avg, h264, _rnd
  626. chroma_mc8_ssse3_func avg, vc1, _nornd
  627. INIT_MMX ssse3
  628. chroma_mc4_ssse3_func avg, h264