You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

672 lines
17KB

  1. ;******************************************************************************
  2. ;* MMX/SSSE3-optimized functions for H264 chroma MC
  3. ;* Copyright (c) 2005 Zoltan Hidvegi <hzoli -a- hzoli -d- com>,
  4. ;* 2005-2008 Loren Merritt
  5. ;*
  6. ;* This file is part of FFmpeg.
  7. ;*
  8. ;* FFmpeg is free software; you can redistribute it and/or
  9. ;* modify it under the terms of the GNU Lesser General Public
  10. ;* License as published by the Free Software Foundation; either
  11. ;* version 2.1 of the License, or (at your option) any later version.
  12. ;*
  13. ;* FFmpeg is distributed in the hope that it will be useful,
  14. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. ;* Lesser General Public License for more details.
  17. ;*
  18. ;* You should have received a copy of the GNU Lesser General Public
  19. ;* License along with FFmpeg; if not, write to the Free Software
  20. ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. ;******************************************************************************
  22. %include "x86inc.asm"
  23. %include "x86util.asm"
  24. SECTION_RODATA
  25. rnd_rv40_2d_tbl: times 4 dw 0
  26. times 4 dw 16
  27. times 4 dw 32
  28. times 4 dw 16
  29. times 4 dw 32
  30. times 4 dw 28
  31. times 4 dw 32
  32. times 4 dw 28
  33. times 4 dw 0
  34. times 4 dw 32
  35. times 4 dw 16
  36. times 4 dw 32
  37. times 4 dw 32
  38. times 4 dw 28
  39. times 4 dw 32
  40. times 4 dw 28
  41. rnd_rv40_1d_tbl: times 4 dw 0
  42. times 4 dw 2
  43. times 4 dw 4
  44. times 4 dw 2
  45. times 4 dw 4
  46. times 4 dw 3
  47. times 4 dw 4
  48. times 4 dw 3
  49. times 4 dw 0
  50. times 4 dw 4
  51. times 4 dw 2
  52. times 4 dw 4
  53. times 4 dw 4
  54. times 4 dw 3
  55. times 4 dw 4
  56. times 4 dw 3
  57. cextern pw_3
  58. cextern pw_4
  59. cextern pw_8
  60. cextern pw_28
  61. cextern pw_32
  62. cextern pw_64
  63. SECTION .text
  64. %macro mv0_pixels_mc8 0
  65. lea r4, [r2*2 ]
  66. .next4rows
  67. movq mm0, [r1 ]
  68. movq mm1, [r1+r2]
  69. CHROMAMC_AVG mm0, [r0 ]
  70. CHROMAMC_AVG mm1, [r0+r2]
  71. movq [r0 ], mm0
  72. movq [r0+r2], mm1
  73. add r0, r4
  74. add r1, r4
  75. movq mm0, [r1 ]
  76. movq mm1, [r1+r2]
  77. CHROMAMC_AVG mm0, [r0 ]
  78. CHROMAMC_AVG mm1, [r0+r2]
  79. add r1, r4
  80. movq [r0 ], mm0
  81. movq [r0+r2], mm1
  82. add r0, r4
  83. sub r3d, 4
  84. jne .next4rows
  85. %endmacro
  86. %macro chroma_mc8_mmx_func 3
  87. ; put/avg_h264_chroma_mc8_mmx_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/,
  88. ; int stride, int h, int mx, int my)
  89. cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
  90. %ifdef ARCH_X86_64
  91. movsxd r2, r2d
  92. %endif
  93. mov r6d, r5d
  94. or r6d, r4d
  95. jne .at_least_one_non_zero
  96. ; mx == 0 AND my == 0 - no filter needed
  97. mv0_pixels_mc8
  98. REP_RET
  99. .at_least_one_non_zero
  100. %ifidn %2, rv40
  101. %ifdef PIC
  102. %define rnd_1d_rv40 r11
  103. %define rnd_2d_rv40 r11
  104. %else ; no-PIC
  105. %define rnd_1d_rv40 rnd_rv40_1d_tbl
  106. %define rnd_2d_rv40 rnd_rv40_2d_tbl
  107. %endif
  108. %ifdef ARCH_X86_64
  109. mov r10, r5
  110. and r10, 6 ; &~1 for mx/my=[0,7]
  111. lea r10, [r10*4+r4]
  112. sar r10d, 1
  113. %define rnd_bias r10
  114. %define dest_reg r0
  115. %else ; x86-32
  116. mov r0, r5
  117. and r0, 6 ; &~1 for mx/my=[0,7]
  118. lea r0, [r0*4+r4]
  119. sar r0d, 1
  120. %define rnd_bias r0
  121. %define dest_reg r5
  122. %endif
  123. %else ; vc1, h264
  124. %define rnd_bias 0
  125. %define dest_reg r0
  126. %endif
  127. test r5d, r5d
  128. mov r6, 1
  129. je .my_is_zero
  130. test r4d, r4d
  131. mov r6, r2 ; dxy = x ? 1 : stride
  132. jne .both_non_zero
  133. .my_is_zero
  134. ; mx == 0 XOR my == 0 - 1 dimensional filter only
  135. or r4d, r5d ; x + y
  136. %ifidn %2, rv40
  137. %ifdef PIC
  138. lea r11, [rnd_rv40_1d_tbl]
  139. %endif
  140. %ifndef ARCH_X86_64
  141. mov r5, r0m
  142. %endif
  143. %endif
  144. movd m5, r4d
  145. movq m4, [pw_8]
  146. movq m6, [rnd_1d_%2+rnd_bias*8] ; mm6 = rnd >> 3
  147. punpcklwd m5, m5
  148. punpckldq m5, m5 ; mm5 = B = x
  149. pxor m7, m7
  150. psubw m4, m5 ; mm4 = A = 8-x
  151. .next1drow
  152. movq m0, [r1 ] ; mm0 = src[0..7]
  153. movq m2, [r1+r6] ; mm1 = src[1..8]
  154. movq m1, m0
  155. movq m3, m2
  156. punpcklbw m0, m7
  157. punpckhbw m1, m7
  158. punpcklbw m2, m7
  159. punpckhbw m3, m7
  160. pmullw m0, m4 ; [mm0,mm1] = A * src[0..7]
  161. pmullw m1, m4
  162. pmullw m2, m5 ; [mm2,mm3] = B * src[1..8]
  163. pmullw m3, m5
  164. paddw m0, m6
  165. paddw m1, m6
  166. paddw m0, m2
  167. paddw m1, m3
  168. psrlw m0, 3
  169. psrlw m1, 3
  170. packuswb m0, m1
  171. CHROMAMC_AVG m0, [dest_reg]
  172. movq [dest_reg], m0 ; dst[0..7] = (A * src[0..7] + B * src[1..8] + (rnd >> 3)) >> 3
  173. add dest_reg, r2
  174. add r1, r2
  175. dec r3d
  176. jne .next1drow
  177. REP_RET
  178. .both_non_zero ; general case, bilinear
  179. movd m4, r4d ; x
  180. movd m6, r5d ; y
  181. %ifidn %2, rv40
  182. %ifdef PIC
  183. lea r11, [rnd_rv40_2d_tbl]
  184. %endif
  185. %ifndef ARCH_X86_64
  186. mov r5, r0m
  187. %endif
  188. %endif
  189. mov r6, rsp ; backup stack pointer
  190. and rsp, ~(mmsize-1) ; align stack
  191. sub rsp, 16 ; AA and DD
  192. punpcklwd m4, m4
  193. punpcklwd m6, m6
  194. punpckldq m4, m4 ; mm4 = x words
  195. punpckldq m6, m6 ; mm6 = y words
  196. movq m5, m4
  197. pmullw m4, m6 ; mm4 = x * y
  198. psllw m5, 3
  199. psllw m6, 3
  200. movq m7, m5
  201. paddw m7, m6
  202. movq [rsp+8], m4 ; DD = x * y
  203. psubw m5, m4 ; mm5 = B = 8x - xy
  204. psubw m6, m4 ; mm6 = C = 8y - xy
  205. paddw m4, [pw_64]
  206. psubw m4, m7 ; mm4 = A = xy - (8x+8y) + 64
  207. pxor m7, m7
  208. movq [rsp ], m4
  209. movq m0, [r1 ] ; mm0 = src[0..7]
  210. movq m1, [r1+1] ; mm1 = src[1..8]
  211. .next2drow
  212. add r1, r2
  213. movq m2, m0
  214. movq m3, m1
  215. punpckhbw m0, m7
  216. punpcklbw m1, m7
  217. punpcklbw m2, m7
  218. punpckhbw m3, m7
  219. pmullw m0, [rsp]
  220. pmullw m2, [rsp]
  221. pmullw m1, m5
  222. pmullw m3, m5
  223. paddw m2, m1 ; mm2 = A * src[0..3] + B * src[1..4]
  224. paddw m3, m0 ; mm3 = A * src[4..7] + B * src[5..8]
  225. movq m0, [r1]
  226. movq m1, m0
  227. punpcklbw m0, m7
  228. punpckhbw m1, m7
  229. pmullw m0, m6
  230. pmullw m1, m6
  231. paddw m2, m0
  232. paddw m3, m1 ; [mm2,mm3] += C * src[0..7]
  233. movq m1, [r1+1]
  234. movq m0, m1
  235. movq m4, m1
  236. punpcklbw m0, m7
  237. punpckhbw m4, m7
  238. pmullw m0, [rsp+8]
  239. pmullw m4, [rsp+8]
  240. paddw m2, m0
  241. paddw m3, m4 ; [mm2,mm3] += D * src[1..8]
  242. movq m0, [r1]
  243. paddw m2, [rnd_2d_%2+rnd_bias*8]
  244. paddw m3, [rnd_2d_%2+rnd_bias*8]
  245. psrlw m2, 6
  246. psrlw m3, 6
  247. packuswb m2, m3
  248. CHROMAMC_AVG m2, [dest_reg]
  249. movq [dest_reg], m2 ; dst[0..7] = ([mm2,mm3] + rnd) >> 6
  250. add dest_reg, r2
  251. dec r3d
  252. jne .next2drow
  253. mov rsp, r6 ; restore stack pointer
  254. RET
  255. %endmacro
  256. %macro chroma_mc4_mmx_func 3
  257. cglobal %1_%2_chroma_mc4_%3, 6, 6, 0
  258. %ifdef ARCH_X86_64
  259. movsxd r2, r2d
  260. %endif
  261. pxor m7, m7
  262. movd m2, r4d ; x
  263. movd m3, r5d ; y
  264. movq m4, [pw_8]
  265. movq m5, [pw_8]
  266. punpcklwd m2, m2
  267. punpcklwd m3, m3
  268. punpcklwd m2, m2
  269. punpcklwd m3, m3
  270. psubw m4, m2
  271. psubw m5, m3
  272. %ifidn %2, rv40
  273. %ifdef PIC
  274. lea r11, [rnd_rv40_2d_tbl]
  275. %define rnd_2d_rv40 r11
  276. %else
  277. %define rnd_2d_rv40 rnd_rv40_2d_tbl
  278. %endif
  279. and r5, 6 ; &~1 for mx/my=[0,7]
  280. lea r5, [r5*4+r4]
  281. sar r5d, 1
  282. %define rnd_bias r5
  283. %else ; vc1, h264
  284. %define rnd_bias 0
  285. %endif
  286. movd m0, [r1 ]
  287. movd m6, [r1+1]
  288. add r1, r2
  289. punpcklbw m0, m7
  290. punpcklbw m6, m7
  291. pmullw m0, m4
  292. pmullw m6, m2
  293. paddw m6, m0
  294. .next2rows
  295. movd m0, [r1 ]
  296. movd m1, [r1+1]
  297. add r1, r2
  298. punpcklbw m0, m7
  299. punpcklbw m1, m7
  300. pmullw m0, m4
  301. pmullw m1, m2
  302. paddw m1, m0
  303. movq m0, m1
  304. pmullw m6, m5
  305. pmullw m1, m3
  306. paddw m6, [rnd_2d_%2+rnd_bias*8]
  307. paddw m1, m6
  308. psrlw m1, 6
  309. packuswb m1, m1
  310. CHROMAMC_AVG4 m1, m6, [r0]
  311. movd [r0], m1
  312. add r0, r2
  313. movd m6, [r1 ]
  314. movd m1, [r1+1]
  315. add r1, r2
  316. punpcklbw m6, m7
  317. punpcklbw m1, m7
  318. pmullw m6, m4
  319. pmullw m1, m2
  320. paddw m1, m6
  321. movq m6, m1
  322. pmullw m0, m5
  323. pmullw m1, m3
  324. paddw m0, [rnd_2d_%2+rnd_bias*8]
  325. paddw m1, m0
  326. psrlw m1, 6
  327. packuswb m1, m1
  328. CHROMAMC_AVG4 m1, m0, [r0]
  329. movd [r0], m1
  330. add r0, r2
  331. sub r3d, 2
  332. jnz .next2rows
  333. REP_RET
  334. %endmacro
  335. %macro chroma_mc2_mmx_func 3
  336. cglobal %1_%2_chroma_mc2_%3, 6, 7, 0
  337. %ifdef ARCH_X86_64
  338. movsxd r2, r2d
  339. %endif
  340. mov r6d, r4d
  341. shl r4d, 16
  342. sub r4d, r6d
  343. add r4d, 8
  344. imul r5d, r4d ; x*y<<16 | y*(8-x)
  345. shl r4d, 3
  346. sub r4d, r5d ; x*(8-y)<<16 | (8-x)*(8-y)
  347. movd m5, r4d
  348. movd m6, r5d
  349. punpckldq m5, m5 ; mm5 = {A,B,A,B}
  350. punpckldq m6, m6 ; mm6 = {C,D,C,D}
  351. pxor m7, m7
  352. movd m2, [r1]
  353. punpcklbw m2, m7
  354. pshufw m2, m2, 0x94 ; mm0 = src[0,1,1,2]
  355. .nextrow
  356. add r1, r2
  357. movq m1, m2
  358. pmaddwd m1, m5 ; mm1 = A * src[0,1] + B * src[1,2]
  359. movd m0, [r1]
  360. punpcklbw m0, m7
  361. pshufw m0, m0, 0x94 ; mm0 = src[0,1,1,2]
  362. movq m2, m0
  363. pmaddwd m0, m6
  364. paddw m1, [rnd_2d_%2]
  365. paddw m1, m0 ; mm1 += C * src[0,1] + D * src[1,2]
  366. psrlw m1, 6
  367. packssdw m1, m7
  368. packuswb m1, m7
  369. CHROMAMC_AVG4 m1, m3, [r0]
  370. movd r5d, m1
  371. mov [r0], r5w
  372. add r0, r2
  373. sub r3d, 1
  374. jnz .nextrow
  375. REP_RET
  376. %endmacro
  377. %define rnd_1d_h264 pw_4
  378. %define rnd_2d_h264 pw_32
  379. %define rnd_1d_vc1 pw_3
  380. %define rnd_2d_vc1 pw_28
  381. %macro NOTHING 2-3
  382. %endmacro
  383. %macro DIRECT_AVG 2
  384. PAVG %1, %2
  385. %endmacro
  386. %macro COPY_AVG 3
  387. movd %2, %3
  388. PAVG %1, %2
  389. %endmacro
  390. INIT_MMX
  391. %define CHROMAMC_AVG NOTHING
  392. %define CHROMAMC_AVG4 NOTHING
  393. chroma_mc8_mmx_func put, h264, mmx_rnd
  394. chroma_mc8_mmx_func put, vc1, mmx_nornd
  395. chroma_mc8_mmx_func put, rv40, mmx
  396. chroma_mc4_mmx_func put, h264, mmx
  397. chroma_mc4_mmx_func put, rv40, mmx
  398. chroma_mc2_mmx_func put, h264, mmx2
  399. %define CHROMAMC_AVG DIRECT_AVG
  400. %define CHROMAMC_AVG4 COPY_AVG
  401. %define PAVG pavgb
  402. chroma_mc8_mmx_func avg, h264, mmx2_rnd
  403. chroma_mc8_mmx_func avg, vc1, mmx2_nornd
  404. chroma_mc8_mmx_func avg, rv40, mmx2
  405. chroma_mc4_mmx_func avg, h264, mmx2
  406. chroma_mc4_mmx_func avg, rv40, mmx2
  407. chroma_mc2_mmx_func avg, h264, mmx2
  408. %define PAVG pavgusb
  409. chroma_mc8_mmx_func avg, h264, 3dnow_rnd
  410. chroma_mc8_mmx_func avg, vc1, 3dnow_nornd
  411. chroma_mc8_mmx_func avg, rv40, 3dnow
  412. chroma_mc4_mmx_func avg, h264, 3dnow
  413. chroma_mc4_mmx_func avg, rv40, 3dnow
  414. %macro chroma_mc8_ssse3_func 3
  415. cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
  416. %ifdef ARCH_X86_64
  417. movsxd r2, r2d
  418. %endif
  419. mov r6d, r5d
  420. or r6d, r4d
  421. jne .at_least_one_non_zero
  422. ; mx == 0 AND my == 0 - no filter needed
  423. mv0_pixels_mc8
  424. REP_RET
  425. .at_least_one_non_zero
  426. test r5d, r5d
  427. je .my_is_zero
  428. test r4d, r4d
  429. je .mx_is_zero
  430. ; general case, bilinear
  431. mov r6d, r4d
  432. shl r4d, 8
  433. sub r4, r6
  434. add r4, 8 ; x*288+8 = x<<8 | (8-x)
  435. mov r6, 8
  436. sub r6d, r5d
  437. imul r6, r4 ; (8-y)*(x*255+8) = (8-y)*x<<8 | (8-y)*(8-x)
  438. imul r4d, r5d ; y *(x*255+8) = y *x<<8 | y *(8-x)
  439. movd m7, r6d
  440. movd m6, r4d
  441. movdqa m5, [rnd_2d_%2]
  442. pshuflw m7, m7, 0
  443. pshuflw m6, m6, 0
  444. movlhps m7, m7
  445. movlhps m6, m6
  446. movq m0, [r1 ]
  447. movq m1, [r1 +1]
  448. punpcklbw m0, m1
  449. add r1, r2
  450. .next2rows
  451. movq m1, [r1 ]
  452. movq m2, [r1 +1]
  453. movq m3, [r1+r2 ]
  454. movq m4, [r1+r2+1]
  455. lea r1, [r1+r2*2]
  456. punpcklbw m1, m2
  457. punpcklbw m3, m4
  458. movdqa m2, m1
  459. movdqa m4, m3
  460. pmaddubsw m0, m7
  461. pmaddubsw m1, m6
  462. pmaddubsw m2, m7
  463. pmaddubsw m3, m6
  464. paddw m0, m5
  465. paddw m2, m5
  466. paddw m1, m0
  467. paddw m3, m2
  468. movdqa m0, m4
  469. psrlw m1, 6
  470. psrlw m3, 6
  471. %ifidn %1, avg
  472. movq m2, [r0 ]
  473. movhps m2, [r0+r2]
  474. %endif
  475. packuswb m1, m3
  476. CHROMAMC_AVG m1, m2
  477. movq [r0 ], m1
  478. movhps [r0+r2], m1
  479. sub r3d, 2
  480. lea r0, [r0+r2*2]
  481. jg .next2rows
  482. REP_RET
  483. .my_is_zero
  484. mov r5d, r4d
  485. shl r4d, 8
  486. add r4, 8
  487. sub r4, r5 ; 255*x+8 = x<<8 | (8-x)
  488. movd m7, r4d
  489. movq m6, [rnd_1d_%2]
  490. pshuflw m7, m7, 0
  491. movlhps m6, m6
  492. movlhps m7, m7
  493. .next2xrows
  494. movq m0, [r1 ]
  495. movq m1, [r1 +1]
  496. movq m2, [r1+r2 ]
  497. movq m3, [r1+r2+1]
  498. punpcklbw m0, m1
  499. punpcklbw m2, m3
  500. pmaddubsw m0, m7
  501. pmaddubsw m2, m7
  502. %ifidn %1, avg
  503. movq m4, [r0 ]
  504. movhps m4, [r0+r2]
  505. %endif
  506. paddw m0, m6
  507. paddw m2, m6
  508. psrlw m0, 3
  509. psrlw m2, 3
  510. packuswb m0, m2
  511. CHROMAMC_AVG m0, m4
  512. movq [r0 ], m0
  513. movhps [r0+r2], m0
  514. sub r3d, 2
  515. lea r0, [r0+r2*2]
  516. lea r1, [r1+r2*2]
  517. jg .next2xrows
  518. REP_RET
  519. .mx_is_zero
  520. mov r4d, r5d
  521. shl r5d, 8
  522. add r5, 8
  523. sub r5, r4 ; 255*y+8 = y<<8 | (8-y)
  524. movd m7, r5d
  525. movq m6, [rnd_1d_%2]
  526. pshuflw m7, m7, 0
  527. movlhps m6, m6
  528. movlhps m7, m7
  529. .next2yrows
  530. movq m0, [r1 ]
  531. movq m1, [r1+r2 ]
  532. movdqa m2, m1
  533. movq m3, [r1+r2*2]
  534. punpcklbw m0, m1
  535. punpcklbw m2, m3
  536. pmaddubsw m0, m7
  537. pmaddubsw m2, m7
  538. %ifidn %1, avg
  539. movq m4, [r0 ]
  540. movhps m4, [r0+r2]
  541. %endif
  542. paddw m0, m6
  543. paddw m2, m6
  544. psrlw m0, 3
  545. psrlw m2, 3
  546. packuswb m0, m2
  547. CHROMAMC_AVG m0, m4
  548. movq [r0 ], m0
  549. movhps [r0+r2], m0
  550. sub r3d, 2
  551. lea r0, [r0+r2*2]
  552. lea r1, [r1+r2*2]
  553. jg .next2yrows
  554. REP_RET
  555. %endmacro
  556. %macro chroma_mc4_ssse3_func 3
  557. cglobal %1_%2_chroma_mc4_%3, 6, 7, 0
  558. %ifdef ARCH_X86_64
  559. movsxd r2, r2d
  560. %endif
  561. mov r6, r4
  562. shl r4d, 8
  563. sub r4d, r6d
  564. add r4d, 8 ; x*288+8
  565. mov r6, 8
  566. sub r6d, r5d
  567. imul r6d, r4d ; (8-y)*(x*255+8) = (8-y)*x<<8 | (8-y)*(8-x)
  568. imul r4d, r5d ; y *(x*255+8) = y *x<<8 | y *(8-x)
  569. movd m7, r6d
  570. movd m6, r4d
  571. movq m5, [pw_32]
  572. pshufw m7, m7, 0
  573. pshufw m6, m6, 0
  574. movd m0, [r1 ]
  575. punpcklbw m0, [r1 +1]
  576. add r1, r2
  577. .next2rows
  578. movd m1, [r1 ]
  579. movd m3, [r1+r2 ]
  580. punpcklbw m1, [r1 +1]
  581. punpcklbw m3, [r1+r2+1]
  582. lea r1, [r1+r2*2]
  583. movq m2, m1
  584. movq m4, m3
  585. pmaddubsw m0, m7
  586. pmaddubsw m1, m6
  587. pmaddubsw m2, m7
  588. pmaddubsw m3, m6
  589. paddw m0, m5
  590. paddw m2, m5
  591. paddw m1, m0
  592. paddw m3, m2
  593. movq m0, m4
  594. psrlw m1, 6
  595. psrlw m3, 6
  596. packuswb m1, m1
  597. packuswb m3, m3
  598. CHROMAMC_AVG m1, [r0 ]
  599. CHROMAMC_AVG m3, [r0+r2]
  600. movd [r0 ], m1
  601. movd [r0+r2], m3
  602. sub r3d, 2
  603. lea r0, [r0+r2*2]
  604. jg .next2rows
  605. REP_RET
  606. %endmacro
  607. %define CHROMAMC_AVG NOTHING
  608. INIT_XMM
  609. chroma_mc8_ssse3_func put, h264, ssse3_rnd
  610. chroma_mc8_ssse3_func put, vc1, ssse3_nornd
  611. INIT_MMX
  612. chroma_mc4_ssse3_func put, h264, ssse3
  613. %define CHROMAMC_AVG DIRECT_AVG
  614. %define PAVG pavgb
  615. INIT_XMM
  616. chroma_mc8_ssse3_func avg, h264, ssse3_rnd
  617. chroma_mc8_ssse3_func avg, vc1, ssse3_nornd
  618. INIT_MMX
  619. chroma_mc4_ssse3_func avg, h264, ssse3