| @@ -282,18 +282,18 @@ cextern pb_3 | |||||
| ; int8_t *tc0) | ; int8_t *tc0) | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| %macro DEBLOCK_LUMA 0 | %macro DEBLOCK_LUMA 0 | ||||
| cglobal deblock_v_luma_8, 5,5,10 | |||||
| cglobal deblock_v_luma_8, 5,5,10, pix_, stride_, alpha_, beta_, base3_ | |||||
| movd m8, [r4] ; tc0 | movd m8, [r4] ; tc0 | ||||
| lea r4, [r1*3] | |||||
| dec r2d ; alpha-1 | |||||
| lea r4, [stride_q*3] | |||||
| dec alpha_d ; alpha-1 | |||||
| neg r4 | neg r4 | ||||
| dec r3d ; beta-1 | |||||
| add r4, r0 ; pix-3*stride | |||||
| dec beta_d ; beta-1 | |||||
| add base3_q, pix_q ; pix-3*stride | |||||
| mova m0, [r4+r1] ; p1 | |||||
| mova m1, [r4+2*r1] ; p0 | |||||
| mova m2, [r0] ; q0 | |||||
| mova m3, [r0+r1] ; q1 | |||||
| mova m0, [base3_q + stride_q] ; p1 | |||||
| mova m1, [base3_q + 2*stride_q] ; p0 | |||||
| mova m2, [pix_q] ; q0 | |||||
| mova m3, [pix_q + stride_q] ; q1 | |||||
| LOAD_MASK r2d, r3d | LOAD_MASK r2d, r3d | ||||
| punpcklbw m8, m8 | punpcklbw m8, m8 | ||||
| @@ -303,24 +303,24 @@ cglobal deblock_v_luma_8, 5,5,10 | |||||
| pandn m9, m7 | pandn m9, m7 | ||||
| pand m8, m9 | pand m8, m9 | ||||
| movdqa m3, [r4] ; p2 | |||||
| movdqa m3, [base3_q] ; p2 | |||||
| DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1 | DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1 | ||||
| pand m6, m9 | pand m6, m9 | ||||
| psubb m7, m8, m6 | psubb m7, m8, m6 | ||||
| pand m6, m8 | pand m6, m8 | ||||
| LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4 | |||||
| LUMA_Q1 m0, m3, [base3_q], [base3_q + stride_q], m6, m4 | |||||
| movdqa m4, [r0+2*r1] ; q2 | |||||
| movdqa m4, [pix_q + 2*stride_q] ; q2 | |||||
| DIFF_GT2 m2, m4, m5, m6, m3 ; |q2-q0| > beta-1 | DIFF_GT2 m2, m4, m5, m6, m3 ; |q2-q0| > beta-1 | ||||
| pand m6, m9 | pand m6, m9 | ||||
| pand m8, m6 | pand m8, m6 | ||||
| psubb m7, m6 | psubb m7, m6 | ||||
| mova m3, [r0+r1] | |||||
| LUMA_Q1 m3, m4, [r0+2*r1], [r0+r1], m8, m6 | |||||
| mova m3, [pix_q + stride_q] | |||||
| LUMA_Q1 m3, m4, [pix_q + 2*stride_q], [pix_q + stride_q], m8, m6 | |||||
| DEBLOCK_P0_Q0 | DEBLOCK_P0_Q0 | ||||
| mova [r4+2*r1], m1 | |||||
| mova [r0], m2 | |||||
| mova [base3_q + 2*stride_q], m1 | |||||
| mova [pix_q], m2 | |||||
| RET | RET | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||