Browse Source

H264: change weight/biweight functions to take a height argument.

Neon parts by Mans Rullgard <mans@mansr.com>.
tags/n0.9
Ronald S. Bultje 14 years ago
parent
commit
c2d337429c
10 changed files with 337 additions and 592 deletions
  1. +21
    -56
      libavcodec/arm/h264dsp_init_arm.c
  2. +25
    -61
      libavcodec/arm/h264dsp_neon.S
  3. +53
    -73
      libavcodec/h264.c
  4. +8
    -20
      libavcodec/h264dsp.c
  5. +6
    -4
      libavcodec/h264dsp.h
  6. +13
    -15
      libavcodec/h264dsp_template.c
  7. +19
    -25
      libavcodec/ppc/h264_altivec.c
  8. +79
    -131
      libavcodec/x86/h264_weight.asm
  9. +50
    -95
      libavcodec/x86/h264_weight_10bit.asm
  10. +63
    -112
      libavcodec/x86/h264dsp_mmx.c

+ 21
- 56
libavcodec/arm/h264dsp_init_arm.c View File

@@ -32,47 +32,22 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0); int beta, int8_t *tc0);


void ff_weight_h264_pixels_16x16_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_16x8_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_8x16_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_8x8_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_8x4_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_4x8_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_4x4_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_4x2_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height,
int log2_den, int weight, int offset);
void ff_weight_h264_pixels_8_neon(uint8_t *dst, int stride, int height,
int log2_den, int weight, int offset);
void ff_weight_h264_pixels_4_neon(uint8_t *dst, int stride, int height,
int log2_den, int weight, int offset);


void ff_biweight_h264_pixels_16x16_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_16x8_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_8x16_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_8x8_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_8x4_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_4x8_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_4x4_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_4x2_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, int stride,
int height, int log2_den, int weightd,
int weights, int offset);
void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, int stride,
int height, int log2_den, int weightd,
int weights, int offset);
void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, int stride,
int height, int log2_den, int weightd,
int weights, int offset);


void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
@@ -100,23 +75,13 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const i
c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;


c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16x16_neon;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_16x8_neon;
c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_8x16_neon;
c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels_8x8_neon;
c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels_8x4_neon;
c->weight_h264_pixels_tab[5] = ff_weight_h264_pixels_4x8_neon;
c->weight_h264_pixels_tab[6] = ff_weight_h264_pixels_4x4_neon;
c->weight_h264_pixels_tab[7] = ff_weight_h264_pixels_4x2_neon;
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_4_neon;


c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16x16_neon;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_16x8_neon;
c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_8x16_neon;
c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels_8x8_neon;
c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels_8x4_neon;
c->biweight_h264_pixels_tab[5] = ff_biweight_h264_pixels_4x8_neon;
c->biweight_h264_pixels_tab[6] = ff_biweight_h264_pixels_4x4_neon;
c->biweight_h264_pixels_tab[7] = ff_biweight_h264_pixels_4x2_neon;
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;


c->h264_idct_add = ff_h264_idct_add_neon; c->h264_idct_add = ff_h264_idct_add_neon;
c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;


+ 25
- 61
libavcodec/arm/h264dsp_neon.S View File

@@ -1592,7 +1592,7 @@ endfunc
vdup.8 d1, r5 vdup.8 d1, r5
vmov q2, q8 vmov q2, q8
vmov q3, q8 vmov q3, q8
1: subs ip, ip, #2
1: subs r3, r3, #2
vld1.8 {d20-d21},[r0,:128], r2 vld1.8 {d20-d21},[r0,:128], r2
\macd q2, d0, d20 \macd q2, d0, d20
pld [r0] pld [r0]
@@ -1632,7 +1632,7 @@ endfunc
vdup.8 d1, r5 vdup.8 d1, r5
vmov q1, q8 vmov q1, q8
vmov q10, q8 vmov q10, q8
1: subs ip, ip, #2
1: subs r3, r3, #2
vld1.8 {d4},[r0,:64], r2 vld1.8 {d4},[r0,:64], r2
\macd q1, d0, d4 \macd q1, d0, d4
pld [r0] pld [r0]
@@ -1662,7 +1662,7 @@ endfunc
vdup.8 d1, r5 vdup.8 d1, r5
vmov q1, q8 vmov q1, q8
vmov q10, q8 vmov q10, q8
1: subs ip, ip, #4
1: subs r3, r3, #4
vld1.32 {d4[0]},[r0,:32], r2 vld1.32 {d4[0]},[r0,:32], r2
vld1.32 {d4[1]},[r0,:32], r2 vld1.32 {d4[1]},[r0,:32], r2
\macd q1, d0, d4 \macd q1, d0, d4
@@ -1700,16 +1700,17 @@ endfunc
.endm .endm


.macro biweight_func w .macro biweight_func w
function biweight_h264_pixels_\w\()_neon
function ff_biweight_h264_pixels_\w\()_neon, export=1
push {r4-r6, lr} push {r4-r6, lr}
add r4, sp, #16
ldr r12, [sp, #16]
add r4, sp, #20
ldm r4, {r4-r6} ldm r4, {r4-r6}
lsr lr, r4, #31 lsr lr, r4, #31
add r6, r6, #1 add r6, r6, #1
eors lr, lr, r5, lsr #30 eors lr, lr, r5, lsr #30
orr r6, r6, #1 orr r6, r6, #1
vdup.16 q9, r3
lsl r6, r6, r3
vdup.16 q9, r12
lsl r6, r6, r12
vmvn q9, q9 vmvn q9, q9
vdup.16 q8, r6 vdup.16 q8, r6
mov r6, r0 mov r6, r0
@@ -1730,34 +1731,15 @@ function biweight_h264_pixels_\w\()_neon
endfunc endfunc
.endm .endm


.macro biweight_entry w, h, b=1
function ff_biweight_h264_pixels_\w\()x\h\()_neon, export=1
mov ip, #\h
.if \b
b biweight_h264_pixels_\w\()_neon
.endif
endfunc
.endm

biweight_entry 16, 8
biweight_entry 16, 16, b=0
biweight_func 16 biweight_func 16

biweight_entry 8, 16
biweight_entry 8, 4
biweight_entry 8, 8, b=0
biweight_func 8 biweight_func 8

biweight_entry 4, 8
biweight_entry 4, 2
biweight_entry 4, 4, b=0
biweight_func 4 biweight_func 4


@ Weighted prediction @ Weighted prediction


.macro weight_16 add .macro weight_16 add
vdup.8 d0, r3
1: subs ip, ip, #2
vdup.8 d0, r12
1: subs r2, r2, #2
vld1.8 {d20-d21},[r0,:128], r1 vld1.8 {d20-d21},[r0,:128], r1
vmull.u8 q2, d0, d20 vmull.u8 q2, d0, d20
pld [r0] pld [r0]
@@ -1785,8 +1767,8 @@ endfunc
.endm .endm


.macro weight_8 add .macro weight_8 add
vdup.8 d0, r3
1: subs ip, ip, #2
vdup.8 d0, r12
1: subs r2, r2, #2
vld1.8 {d4},[r0,:64], r1 vld1.8 {d4},[r0,:64], r1
vmull.u8 q1, d0, d4 vmull.u8 q1, d0, d4
pld [r0] pld [r0]
@@ -1806,10 +1788,10 @@ endfunc
.endm .endm


.macro weight_4 add .macro weight_4 add
vdup.8 d0, r3
vdup.8 d0, r12
vmov q1, q8 vmov q1, q8
vmov q10, q8 vmov q10, q8
1: subs ip, ip, #4
1: subs r2, r2, #4
vld1.32 {d4[0]},[r0,:32], r1 vld1.32 {d4[0]},[r0,:32], r1
vld1.32 {d4[1]},[r0,:32], r1 vld1.32 {d4[1]},[r0,:32], r1
vmull.u8 q1, d0, d4 vmull.u8 q1, d0, d4
@@ -1842,50 +1824,32 @@ endfunc
.endm .endm


.macro weight_func w .macro weight_func w
function weight_h264_pixels_\w\()_neon
function ff_weight_h264_pixels_\w\()_neon, export=1
push {r4, lr} push {r4, lr}
ldr r4, [sp, #8]
cmp r2, #1
lsl r4, r4, r2
ldr r12, [sp, #8]
ldr r4, [sp, #12]
cmp r3, #1
lsl r4, r4, r3
vdup.16 q8, r4 vdup.16 q8, r4
mov r4, r0 mov r4, r0
ble 20f ble 20f
rsb lr, r2, #1
rsb lr, r3, #1
vdup.16 q9, lr vdup.16 q9, lr
cmp r3, #0
cmp r12, #0
blt 10f blt 10f
weight_\w vhadd.s16 weight_\w vhadd.s16
10: rsb r3, r3, #0
10: rsb r12, r12, #0
weight_\w vhsub.s16 weight_\w vhsub.s16
20: rsb lr, r2, #0
20: rsb lr, r3, #0
vdup.16 q9, lr vdup.16 q9, lr
cmp r3, #0
cmp r12, #0
blt 10f blt 10f
weight_\w vadd.s16 weight_\w vadd.s16
10: rsb r3, r3, #0
10: rsb r12, r12, #0
weight_\w vsub.s16 weight_\w vsub.s16
endfunc endfunc
.endm .endm


.macro weight_entry w, h, b=1
function ff_weight_h264_pixels_\w\()x\h\()_neon, export=1
mov ip, #\h
.if \b
b weight_h264_pixels_\w\()_neon
.endif
endfunc
.endm

weight_entry 16, 8
weight_entry 16, 16, b=0
weight_func 16 weight_func 16

weight_entry 8, 16
weight_entry 8, 4
weight_entry 8, 8, b=0
weight_func 8 weight_func 8

weight_entry 4, 8
weight_entry 4, 2
weight_entry 4, 4, b=0
weight_func 4 weight_func 4

+ 53
- 73
libavcodec/h264.c View File

@@ -438,7 +438,8 @@ static void chroma_dc_dct_c(DCTELEM *block){
} }
#endif #endif


static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
int height, int delta, int list,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int src_x_offset, int src_y_offset, int src_x_offset, int src_y_offset,
qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op, qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
@@ -518,16 +519,16 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, (16>>!(CHROMA422)) + 1, (mx>>3), (my>>ysh), pic_width>>1, pic_height>>!(CHROMA422)); s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, (16>>!(CHROMA422)) + 1, (mx>>3), (my>>ysh), pic_width>>1, pic_height>>!(CHROMA422));
src_cb= s->edge_emu_buffer; src_cb= s->edge_emu_buffer;
} }
chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height << !!(CHROMA422), mx&7, (my << !!(CHROMA422)) &7);
chroma_op(dest_cb, src_cb, h->mb_uvlinesize, height >> !(CHROMA422), mx&7, (my << !!(CHROMA422)) &7);


if(emu){ if(emu){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, (16>>!(CHROMA422)) + 1, (mx>>3), (my>>ysh), pic_width>>1, pic_height>>!(CHROMA422)); s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, (16>>!(CHROMA422)) + 1, (mx>>3), (my>>ysh), pic_width>>1, pic_height>>!(CHROMA422));
src_cr= s->edge_emu_buffer; src_cr= s->edge_emu_buffer;
} }
chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height << !!(CHROMA422), mx&7, (my << !!(CHROMA422)) &7);
chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> !(CHROMA422), mx&7, (my << !!(CHROMA422)) &7);
} }


static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
static inline void mc_part_std(H264Context *h, int n, int square, int height, int delta,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int x_offset, int y_offset, int x_offset, int y_offset,
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
@@ -553,7 +554,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei


if(list0){ if(list0){
Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
mc_dir_part(h, ref, n, square, height, delta, 0,
dest_y, dest_cb, dest_cr, x_offset, y_offset, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_op, chroma_op, pixel_shift, chroma444); qpix_op, chroma_op, pixel_shift, chroma444);


@@ -563,13 +564,13 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei


if(list1){ if(list1){
Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ]; Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
mc_dir_part(h, ref, n, square, height, delta, 1,
dest_y, dest_cb, dest_cr, x_offset, y_offset, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_op, chroma_op, pixel_shift, chroma444); qpix_op, chroma_op, pixel_shift, chroma444);
} }
} }


static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
static inline void mc_part_weighted(H264Context *h, int n, int square, int height, int delta,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int x_offset, int y_offset, int x_offset, int y_offset,
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
@@ -577,17 +578,21 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
int list0, int list1, int pixel_shift, int chroma444){ int list0, int list1, int pixel_shift, int chroma444){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
int chroma_height;


dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
if(chroma444){ if(chroma444){
chroma_height = height;
chroma_weight_avg = luma_weight_avg; chroma_weight_avg = luma_weight_avg;
chroma_weight_op = luma_weight_op; chroma_weight_op = luma_weight_op;
dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
} else if (CHROMA422) { } else if (CHROMA422) {
chroma_height = height;
dest_cb += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize; dest_cb += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
dest_cr += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize; dest_cr += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
}else{ }else{
chroma_height = height >> 1;
dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
} }
@@ -603,78 +608,53 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
int refn0 = h->ref_cache[0][ scan8[n] ]; int refn0 = h->ref_cache[0][ scan8[n] ];
int refn1 = h->ref_cache[1][ scan8[n] ]; int refn1 = h->ref_cache[1][ scan8[n] ];


mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0,
dest_y, dest_cb, dest_cr, dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444); x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1,
tmp_y, tmp_cb, tmp_cr, tmp_y, tmp_cb, tmp_cr,
x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444); x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);


if(h->use_weight == 2){ if(h->use_weight == 2){
int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1]; int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
int weight1 = 64 - weight0; int weight1 = 64 - weight0;
luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
if (CHROMA422) {
chroma_weight_avg(dest_cb + chroma_height * h->mb_uvlinesize,
tmp_cb + chroma_height * h->mb_uvlinesize,
h->mb_uvlinesize, 5, weight0, weight1, 0);
chroma_weight_avg(dest_cr + chroma_height * h->mb_uvlinesize,
tmp_cr + chroma_height * h->mb_uvlinesize,
h->mb_uvlinesize, 5, weight0, weight1, 0);
}
luma_weight_avg( dest_y, tmp_y, h-> mb_linesize,
height, 5, weight0, weight1, 0);
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize,
chroma_height, 5, weight0, weight1, 0);
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize,
chroma_height, 5, weight0, weight1, 0);
}else{ }else{
luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, h->luma_log2_weight_denom,
h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0], h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]); h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0], h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]); h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0], h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]); h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
if (CHROMA422) {
chroma_weight_avg(dest_cb + chroma_height * h->mb_uvlinesize,
tmp_cb + chroma_height * h->mb_uvlinesize,
h->mb_uvlinesize, h->chroma_log2_weight_denom,
h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
chroma_weight_avg(dest_cr + chroma_height * h->mb_uvlinesize,
tmp_cr + chroma_height * h->mb_uvlinesize,
h->mb_uvlinesize, h->chroma_log2_weight_denom,
h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
}
} }
}else{ }else{
int list = list1 ? 1 : 0; int list = list1 ? 1 : 0;
int refn = h->ref_cache[list][ scan8[n] ]; int refn = h->ref_cache[list][ scan8[n] ];
Picture *ref= &h->ref_list[list][refn]; Picture *ref= &h->ref_list[list][refn];
mc_dir_part(h, ref, n, square, chroma_height, delta, list,
mc_dir_part(h, ref, n, square, height, delta, list,
dest_y, dest_cb, dest_cr, x_offset, y_offset, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put, chroma_put, pixel_shift, chroma444); qpix_put, chroma_put, pixel_shift, chroma444);


luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
luma_weight_op(dest_y, h->mb_linesize, height, h->luma_log2_weight_denom,
h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]); h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
if(h->use_weight_chroma){ if(h->use_weight_chroma){
chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]); h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]); h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
if (CHROMA422) {
chroma_weight_op(dest_cb + chroma_height * h->mb_uvlinesize,
h->mb_uvlinesize, h->chroma_log2_weight_denom,
h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
chroma_weight_op(dest_cr + chroma_height * h->mb_uvlinesize,
h->mb_uvlinesize, h->chroma_log2_weight_denom,
h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
}
} }
} }
} }


static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
static inline void mc_part(H264Context *h, int n, int square, int height, int delta,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int x_offset, int y_offset, int x_offset, int y_offset,
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
@@ -684,12 +664,12 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height,
if((h->use_weight==2 && list0 && list1 if((h->use_weight==2 && list0 && list1
&& (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32)) && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
|| h->use_weight==1) || h->use_weight==1)
mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
mc_part_weighted(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put, x_offset, y_offset, qpix_put, chroma_put,
weight_op[0], weight_op[3], weight_avg[0],
weight_avg[3], list0, list1, pixel_shift, chroma444);
weight_op[0], weight_op[1], weight_avg[0],
weight_avg[1], list0, list1, pixel_shift, chroma444);
else else
mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
mc_part_std(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put, qpix_avg, x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
chroma_avg, list0, list1, pixel_shift, chroma444); chroma_avg, list0, list1, pixel_shift, chroma444);
} }
@@ -731,31 +711,31 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
prefetch_motion(h, 0, pixel_shift, chroma444); prefetch_motion(h, 0, pixel_shift, chroma444);


if(IS_16X16(mb_type)){ if(IS_16X16(mb_type)){
mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
mc_part(h, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
weight_op, weight_avg, weight_op, weight_avg,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma444);
}else if(IS_16X8(mb_type)){ }else if(IS_16X8(mb_type)){
mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
mc_part(h, 0, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
&weight_op[1], &weight_avg[1],
weight_op, weight_avg,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma444);
mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
mc_part(h, 8, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
&weight_op[1], &weight_avg[1],
weight_op, weight_avg,
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
pixel_shift, chroma444); pixel_shift, chroma444);
}else if(IS_8X16(mb_type)){ }else if(IS_8X16(mb_type)){
mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
mc_part(h, 0, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[2], &weight_avg[2],
&weight_op[1], &weight_avg[1],
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma444);
mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
mc_part(h, 4, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[2], &weight_avg[2],
&weight_op[1], &weight_avg[1],
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
pixel_shift, chroma444); pixel_shift, chroma444);
}else{ }else{
@@ -770,31 +750,31 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
int y_offset= (i&2)<<1; int y_offset= (i&2)<<1;


if(IS_SUB_8X8(sub_mb_type)){ if(IS_SUB_8X8(sub_mb_type)){
mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
mc_part(h, n, 1, 8, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[3], &weight_avg[3],
&weight_op[1], &weight_avg[1],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma444);
}else if(IS_SUB_8X4(sub_mb_type)){ }else if(IS_SUB_8X4(sub_mb_type)){
mc_part(h, n , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
mc_part(h, n , 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
&weight_op[4], &weight_avg[4],
&weight_op[1], &weight_avg[1],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma444);
mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
mc_part(h, n+2, 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
&weight_op[4], &weight_avg[4],
&weight_op[1], &weight_avg[1],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma444);
}else if(IS_SUB_4X8(sub_mb_type)){ }else if(IS_SUB_4X8(sub_mb_type)){
mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
mc_part(h, n , 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[5], &weight_avg[5],
&weight_op[2], &weight_avg[2],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma444);
mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
mc_part(h, n+1, 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[5], &weight_avg[5],
&weight_op[2], &weight_avg[2],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma444);
}else{ }else{
@@ -803,9 +783,9 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
for(j=0; j<4; j++){ for(j=0; j<4; j++){
int sub_x_offset= x_offset + 2*(j&1); int sub_x_offset= x_offset + 2*(j&1);
int sub_y_offset= y_offset + (j&2); int sub_y_offset= y_offset + (j&2);
mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
mc_part(h, n+j, 1, 4, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[6], &weight_avg[6],
&weight_op[2], &weight_avg[2],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma444);
} }


+ 8
- 20
libavcodec/h264dsp.c View File

@@ -64,26 +64,14 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_fo
else\ else\
c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\ c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\
\ \
c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\
c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\
c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels8x16, depth);\
c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels8x8, depth);\
c->weight_h264_pixels_tab[4]= FUNC(weight_h264_pixels8x4, depth);\
c->weight_h264_pixels_tab[5]= FUNC(weight_h264_pixels4x8, depth);\
c->weight_h264_pixels_tab[6]= FUNC(weight_h264_pixels4x4, depth);\
c->weight_h264_pixels_tab[7]= FUNC(weight_h264_pixels4x2, depth);\
c->weight_h264_pixels_tab[8]= FUNC(weight_h264_pixels2x4, depth);\
c->weight_h264_pixels_tab[9]= FUNC(weight_h264_pixels2x2, depth);\
c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16x16, depth);\
c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels16x8, depth);\
c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels8x16, depth);\
c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels8x8, depth);\
c->biweight_h264_pixels_tab[4]= FUNC(biweight_h264_pixels8x4, depth);\
c->biweight_h264_pixels_tab[5]= FUNC(biweight_h264_pixels4x8, depth);\
c->biweight_h264_pixels_tab[6]= FUNC(biweight_h264_pixels4x4, depth);\
c->biweight_h264_pixels_tab[7]= FUNC(biweight_h264_pixels4x2, depth);\
c->biweight_h264_pixels_tab[8]= FUNC(biweight_h264_pixels2x4, depth);\
c->biweight_h264_pixels_tab[9]= FUNC(biweight_h264_pixels2x2, depth);\
c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16, depth);\
c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels8, depth);\
c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels4, depth);\
c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels2, depth);\
c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16, depth);\
c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels8, depth);\
c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels4, depth);\
c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels2, depth);\
\ \
c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\ c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\
c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\ c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\


+ 6
- 4
libavcodec/h264dsp.h View File

@@ -31,16 +31,18 @@
#include "dsputil.h" #include "dsputil.h"


//typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); //typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset);
typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset);
typedef void (*h264_weight_func)(uint8_t *block, int stride, int height,
int log2_denom, int weight, int offset);
typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int height,
int log2_denom, int weightd, int weights, int offset);


/** /**
* Context for storing H.264 DSP functions * Context for storing H.264 DSP functions
*/ */
typedef struct H264DSPContext{ typedef struct H264DSPContext{
/* weighted MC */ /* weighted MC */
h264_weight_func weight_h264_pixels_tab[10];
h264_biweight_func biweight_h264_pixels_tab[10];
h264_weight_func weight_h264_pixels_tab[4];
h264_biweight_func biweight_h264_pixels_tab[4];


/* loop filter */ /* loop filter */
void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);


+ 13
- 15
libavcodec/h264dsp_template.c View File

@@ -29,14 +29,16 @@


#define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom ) #define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom )
#define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) #define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
#define H264_WEIGHT(W,H) \
static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *_block, int stride, int log2_denom, int weight, int offset){ \
#define H264_WEIGHT(W) \
static void FUNCC(weight_h264_pixels ## W)(uint8_t *_block, int stride, int height, \
int log2_denom, int weight, int offset) \
{ \
int y; \ int y; \
pixel *block = (pixel*)_block; \ pixel *block = (pixel*)_block; \
stride /= sizeof(pixel); \ stride /= sizeof(pixel); \
offset <<= (log2_denom + (BIT_DEPTH-8)); \ offset <<= (log2_denom + (BIT_DEPTH-8)); \
if(log2_denom) offset += 1<<(log2_denom-1); \ if(log2_denom) offset += 1<<(log2_denom-1); \
for(y=0; y<H; y++, block += stride){ \
for (y = 0; y < height; y++, block += stride) { \
op_scale1(0); \ op_scale1(0); \
op_scale1(1); \ op_scale1(1); \
if(W==2) continue; \ if(W==2) continue; \
@@ -58,14 +60,16 @@ static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *_block, int stride
op_scale1(15); \ op_scale1(15); \
} \ } \
} \ } \
static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_src, int stride, int log2_denom, int weightd, int weights, int offset){ \
static void FUNCC(biweight_h264_pixels ## W)(uint8_t *_dst, uint8_t *_src, int stride, int height, \
int log2_denom, int weightd, int weights, int offset) \
{ \
int y; \ int y; \
pixel *dst = (pixel*)_dst; \ pixel *dst = (pixel*)_dst; \
pixel *src = (pixel*)_src; \ pixel *src = (pixel*)_src; \
stride /= sizeof(pixel); \ stride /= sizeof(pixel); \
offset <<= (BIT_DEPTH-8); \ offset <<= (BIT_DEPTH-8); \
offset = ((offset + 1) | 1) << log2_denom; \ offset = ((offset + 1) | 1) << log2_denom; \
for(y=0; y<H; y++, dst += stride, src += stride){ \
for (y = 0; y < height; y++, dst += stride, src += stride) { \
op_scale2(0); \ op_scale2(0); \
op_scale2(1); \ op_scale2(1); \
if(W==2) continue; \ if(W==2) continue; \
@@ -88,16 +92,10 @@ static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_
} \ } \
} }


H264_WEIGHT(16,16)
H264_WEIGHT(16,8)
H264_WEIGHT(8,16)
H264_WEIGHT(8,8)
H264_WEIGHT(8,4)
H264_WEIGHT(4,8)
H264_WEIGHT(4,4)
H264_WEIGHT(4,2)
H264_WEIGHT(2,4)
H264_WEIGHT(2,2)
H264_WEIGHT(16)
H264_WEIGHT(8)
H264_WEIGHT(4)
H264_WEIGHT(2)


#undef op_scale1 #undef op_scale1
#undef op_scale2 #undef op_scale2


+ 19
- 25
libavcodec/ppc/h264_altivec.c View File

@@ -843,7 +843,8 @@ static void h264_h_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha,
} }


static av_always_inline static av_always_inline
void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset, int w, int h)
void weight_h264_W_altivec(uint8_t *block, int stride, int height,
int log2_denom, int weight, int offset, int w)
{ {
int y, aligned; int y, aligned;
vec_u8 vblock; vec_u8 vblock;
@@ -864,7 +865,7 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei
voffset = vec_splat(vtemp, 5); voffset = vec_splat(vtemp, 5);
aligned = !((unsigned long)block & 0xf); aligned = !((unsigned long)block & 0xf);


for (y=0; y<h; y++) {
for (y = 0; y < height; y++) {
vblock = vec_ld(0, block); vblock = vec_ld(0, block);


v0 = (vec_s16)vec_mergeh(zero_u8v, vblock); v0 = (vec_s16)vec_mergeh(zero_u8v, vblock);
@@ -888,8 +889,8 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei
} }


static av_always_inline static av_always_inline
void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom,
int weightd, int weights, int offset, int w, int h)
void biweight_h264_W_altivec(uint8_t *dst, uint8_t *src, int stride, int height,
int log2_denom, int weightd, int weights, int offset, int w)
{ {
int y, dst_aligned, src_aligned; int y, dst_aligned, src_aligned;
vec_u8 vsrc, vdst; vec_u8 vsrc, vdst;
@@ -912,7 +913,7 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
dst_aligned = !((unsigned long)dst & 0xf); dst_aligned = !((unsigned long)dst & 0xf);
src_aligned = !((unsigned long)src & 0xf); src_aligned = !((unsigned long)src & 0xf);


for (y=0; y<h; y++) {
for (y = 0; y < height; y++) {
vdst = vec_ld(0, dst); vdst = vec_ld(0, dst);
vsrc = vec_ld(0, src); vsrc = vec_ld(0, src);


@@ -952,19 +953,18 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
} }
} }


#define H264_WEIGHT(W,H) \
static void ff_weight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
weight_h264_WxH_altivec(block, stride, log2_denom, weight, offset, W, H); \
#define H264_WEIGHT(W) \
static void ff_weight_h264_pixels ## W ## _altivec(uint8_t *block, int stride, int height, \
int log2_denom, int weight, int offset){ \
weight_h264_WxH_altivec(block, stride, height, log2_denom, weight, offset, W); \
}\ }\
static void ff_biweight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
biweight_h264_WxH_altivec(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \
static void ff_biweight_h264_pixels ## W ## _altivec(uint8_t *dst, uint8_t *src, int stride, int height, \
int log2_denom, int weightd, int weights, int offset){ \
biweight_h264_WxH_altivec(dst, src, stride, height, log2_denom, weightd, weights, offset, W); \
} }


H264_WEIGHT(16,16)
H264_WEIGHT(16, 8)
H264_WEIGHT( 8,16)
H264_WEIGHT( 8, 8)
H264_WEIGHT( 8, 4)
H264_WEIGHT(16)
H264_WEIGHT( 8)


void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
const int high_bit_depth = avctx->bits_per_raw_sample > 8; const int high_bit_depth = avctx->bits_per_raw_sample > 8;
@@ -1015,16 +1015,10 @@ void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chrom
c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec; c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec;
c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec; c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec;


c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16x16_altivec;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels16x8_altivec;
c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec;
c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec;
c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec;
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec;
c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec;
c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec;
c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec;
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16_altivec;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels8_altivec;
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_altivec;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_altivec;
} }
} }
} }

+ 79
- 131
libavcodec/x86/h264_weight.asm View File

@@ -28,21 +28,20 @@ SECTION .text
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; biweight pred: ; biweight pred:
; ;
; void h264_biweight_16x16_sse2(uint8_t *dst, uint8_t *src, int stride,
; int log2_denom, int weightd, int weights,
; int offset);
; void h264_biweight_16_sse2(uint8_t *dst, uint8_t *src, int stride,
; int height, int log2_denom, int weightd,
; int weights, int offset);
; and ; and
; void h264_weight_16x16_sse2(uint8_t *dst, int stride,
; int log2_denom, int weight,
; int offset);
; void h264_weight_16_sse2(uint8_t *dst, int stride, int height,
; int log2_denom, int weight, int offset);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------


%macro WEIGHT_SETUP 0 %macro WEIGHT_SETUP 0
add r4, r4
inc r4
movd m3, r3d
movd m5, r4d
movd m6, r2d
add r5, r5
inc r5
movd m3, r4d
movd m5, r5d
movd m6, r3d
pslld m5, m6 pslld m5, m6
psrld m5, 1 psrld m5, 1
%if mmsize == 16 %if mmsize == 16
@@ -71,60 +70,41 @@ SECTION .text
packuswb m0, m1 packuswb m0, m1
%endmacro %endmacro


%macro WEIGHT_FUNC_DBL_MM 1
cglobal h264_weight_16x%1_mmx2, 5, 5, 0
INIT_MMX
cglobal h264_weight_16_mmx2, 6, 6, 0
WEIGHT_SETUP WEIGHT_SETUP
mov r2, %1
%if %1 == 16
.nextrow .nextrow
WEIGHT_OP 0, 4 WEIGHT_OP 0, 4
mova [r0 ], m0 mova [r0 ], m0
WEIGHT_OP 8, 12 WEIGHT_OP 8, 12
mova [r0+8], m0 mova [r0+8], m0
add r0, r1 add r0, r1
dec r2
dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_weight_16x16_mmx2.nextrow)
%endif
%endmacro


INIT_MMX
WEIGHT_FUNC_DBL_MM 16
WEIGHT_FUNC_DBL_MM 8

%macro WEIGHT_FUNC_MM 4
cglobal h264_weight_%1x%2_%4, 7, 7, %3
%macro WEIGHT_FUNC_MM 3
cglobal h264_weight_%1_%3, 6, 6, %2
WEIGHT_SETUP WEIGHT_SETUP
mov r2, %2
%if %2 == 16
.nextrow .nextrow
WEIGHT_OP 0, mmsize/2 WEIGHT_OP 0, mmsize/2
mova [r0], m0 mova [r0], m0
add r0, r1 add r0, r1
dec r2
dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_weight_%1x16_%4.nextrow)
%endif
%endmacro %endmacro


INIT_MMX INIT_MMX
WEIGHT_FUNC_MM 8, 16, 0, mmx2
WEIGHT_FUNC_MM 8, 8, 0, mmx2
WEIGHT_FUNC_MM 8, 4, 0, mmx2
WEIGHT_FUNC_MM 8, 0, mmx2
INIT_XMM INIT_XMM
WEIGHT_FUNC_MM 16, 16, 8, sse2
WEIGHT_FUNC_MM 16, 8, 8, sse2
WEIGHT_FUNC_MM 16, 8, sse2


%macro WEIGHT_FUNC_HALF_MM 5
cglobal h264_weight_%1x%2_%5, 5, 5, %4
%macro WEIGHT_FUNC_HALF_MM 3
cglobal h264_weight_%1_%3, 6, 6, %2
WEIGHT_SETUP WEIGHT_SETUP
mov r2, %2/2
sar r2d, 1
lea r3, [r1*2] lea r3, [r1*2]
%if %2 == mmsize
.nextrow .nextrow
WEIGHT_OP 0, r1 WEIGHT_OP 0, r1
movh [r0], m0 movh [r0], m0
@@ -135,31 +115,34 @@ cglobal h264_weight_%1x%2_%5, 5, 5, %4
movh [r0+r1], m0 movh [r0+r1], m0
%endif %endif
add r0, r3 add r0, r3
dec r2
dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_weight_%1x%3_%5.nextrow)
%endif
%endmacro %endmacro


INIT_MMX INIT_MMX
WEIGHT_FUNC_HALF_MM 4, 8, 8, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 4, 8, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 2, 8, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 0, mmx2
INIT_XMM INIT_XMM
WEIGHT_FUNC_HALF_MM 8, 16, 16, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 8, 16, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 8, sse2


%macro BIWEIGHT_SETUP 0 %macro BIWEIGHT_SETUP 0
add r6, 1
or r6, 1
add r3, 1
movd m3, r4d
movd m4, r5d
movd m5, r6d
movd m6, r3d
%ifdef ARCH_X86_64
%define off_regd r11d
%else
%define off_regd r3d
%endif
mov off_regd, r7m
add off_regd, 1
or off_regd, 1
add r4, 1
movd m3, r5d
movd m4, r6d
movd m5, off_regd
movd m6, r4d
pslld m5, m6 pslld m5, m6
psrld m5, 1 psrld m5, 1
%if mmsize == 16 %if mmsize == 16
@@ -195,11 +178,10 @@ WEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
packuswb m0, m1 packuswb m0, m1
%endmacro %endmacro


%macro BIWEIGHT_FUNC_DBL_MM 1
cglobal h264_biweight_16x%1_mmx2, 7, 7, 0
INIT_MMX
cglobal h264_biweight_16_mmx2, 7, 7, 0
BIWEIGHT_SETUP BIWEIGHT_SETUP
mov r3, %1
%if %1 == 16
movifnidn r3d, r3m
.nextrow .nextrow
BIWEIGHT_STEPA 0, 1, 0 BIWEIGHT_STEPA 0, 1, 0
BIWEIGHT_STEPA 1, 2, 4 BIWEIGHT_STEPA 1, 2, 4
@@ -211,23 +193,14 @@ cglobal h264_biweight_16x%1_mmx2, 7, 7, 0
mova [r0+8], m0 mova [r0+8], m0
add r0, r2 add r0, r2
add r1, r2 add r1, r2
dec r3
dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_biweight_16x16_mmx2.nextrow)
%endif
%endmacro


INIT_MMX
BIWEIGHT_FUNC_DBL_MM 16
BIWEIGHT_FUNC_DBL_MM 8

%macro BIWEIGHT_FUNC_MM 4
cglobal h264_biweight_%1x%2_%4, 7, 7, %3
%macro BIWEIGHT_FUNC_MM 3
cglobal h264_biweight_%1_%3, 7, 7, %2
BIWEIGHT_SETUP BIWEIGHT_SETUP
mov r3, %2
%if %2 == 16
movifnidn r3d, r3m
.nextrow .nextrow
BIWEIGHT_STEPA 0, 1, 0 BIWEIGHT_STEPA 0, 1, 0
BIWEIGHT_STEPA 1, 2, mmsize/2 BIWEIGHT_STEPA 1, 2, mmsize/2
@@ -235,28 +208,22 @@ cglobal h264_biweight_%1x%2_%4, 7, 7, %3
mova [r0], m0 mova [r0], m0
add r0, r2 add r0, r2
add r1, r2 add r1, r2
dec r3
dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_biweight_%1x16_%4.nextrow)
%endif
%endmacro %endmacro


INIT_MMX INIT_MMX
BIWEIGHT_FUNC_MM 8, 16, 0, mmx2
BIWEIGHT_FUNC_MM 8, 8, 0, mmx2
BIWEIGHT_FUNC_MM 8, 4, 0, mmx2
BIWEIGHT_FUNC_MM 8, 0, mmx2
INIT_XMM INIT_XMM
BIWEIGHT_FUNC_MM 16, 16, 8, sse2
BIWEIGHT_FUNC_MM 16, 8, 8, sse2
BIWEIGHT_FUNC_MM 16, 8, sse2


%macro BIWEIGHT_FUNC_HALF_MM 5
cglobal h264_biweight_%1x%2_%5, 7, 7, %4
%macro BIWEIGHT_FUNC_HALF_MM 3
cglobal h264_biweight_%1_%3, 7, 7, %2
BIWEIGHT_SETUP BIWEIGHT_SETUP
mov r3, %2/2
movifnidn r3d, r3m
sar r3, 1
lea r4, [r2*2] lea r4, [r2*2]
%if %2 == mmsize
.nextrow .nextrow
BIWEIGHT_STEPA 0, 1, 0 BIWEIGHT_STEPA 0, 1, 0
BIWEIGHT_STEPA 1, 2, r2 BIWEIGHT_STEPA 1, 2, r2
@@ -270,31 +237,30 @@ cglobal h264_biweight_%1x%2_%5, 7, 7, %4
%endif %endif
add r0, r4 add r0, r4
add r1, r4 add r1, r4
dec r3
dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_biweight_%1x%3_%5.nextrow)
%endif
%endmacro %endmacro


INIT_MMX INIT_MMX
BIWEIGHT_FUNC_HALF_MM 4, 8, 8, 0, mmx2
BIWEIGHT_FUNC_HALF_MM 4, 4, 8, 0, mmx2
BIWEIGHT_FUNC_HALF_MM 4, 2, 8, 0, mmx2
BIWEIGHT_FUNC_HALF_MM 4, 0, mmx2
INIT_XMM INIT_XMM
BIWEIGHT_FUNC_HALF_MM 8, 16, 16, 8, sse2
BIWEIGHT_FUNC_HALF_MM 8, 8, 16, 8, sse2
BIWEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
BIWEIGHT_FUNC_HALF_MM 8, 8, sse2


%macro BIWEIGHT_SSSE3_SETUP 0 %macro BIWEIGHT_SSSE3_SETUP 0
add r6, 1
or r6, 1
add r3, 1
movd m4, r4d
movd m0, r5d
movd m5, r6d
movd m6, r3d
%ifdef ARCH_X86_64
%define off_regd r11d
%else
%define off_regd r3d
%endif
mov off_regd, r7m
add off_regd, 1
or off_regd, 1
add r4, 1
movd m4, r5d
movd m0, r6d
movd m5, off_regd
movd m6, r4d
pslld m5, m6 pslld m5, m6
psrld m5, 1 psrld m5, 1
punpcklbw m4, m0 punpcklbw m4, m0
@@ -314,12 +280,11 @@ BIWEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
packuswb m0, m2 packuswb m0, m2
%endmacro %endmacro


%macro BIWEIGHT_SSSE3_16 1
cglobal h264_biweight_16x%1_ssse3, 7, 7, 8
INIT_XMM
cglobal h264_biweight_16_ssse3, 7, 7, 8
BIWEIGHT_SSSE3_SETUP BIWEIGHT_SSSE3_SETUP
mov r3, %1
movifnidn r3d, r3m


%if %1 == 16
.nextrow .nextrow
movh m0, [r0] movh m0, [r0]
movh m2, [r0+8] movh m2, [r0+8]
@@ -330,25 +295,17 @@ cglobal h264_biweight_16x%1_ssse3, 7, 7, 8
mova [r0], m0 mova [r0], m0
add r0, r2 add r0, r2
add r1, r2 add r1, r2
dec r3
dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_biweight_16x16_ssse3.nextrow)
%endif
%endmacro


INIT_XMM INIT_XMM
BIWEIGHT_SSSE3_16 16
BIWEIGHT_SSSE3_16 8

%macro BIWEIGHT_SSSE3_8 1
cglobal h264_biweight_8x%1_ssse3, 7, 7, 8
cglobal h264_biweight_8_ssse3, 7, 7, 8
BIWEIGHT_SSSE3_SETUP BIWEIGHT_SSSE3_SETUP
mov r3, %1/2
movifnidn r3d, r3m
sar r3, 1
lea r4, [r2*2] lea r4, [r2*2]


%if %1 == 16
.nextrow .nextrow
movh m0, [r0] movh m0, [r0]
movh m1, [r1] movh m1, [r1]
@@ -361,15 +318,6 @@ cglobal h264_biweight_8x%1_ssse3, 7, 7, 8
movhps [r0+r2], m0 movhps [r0+r2], m0
add r0, r4 add r0, r4
add r1, r4 add r1, r4
dec r3
dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_biweight_8x16_ssse3.nextrow)
%endif
%endmacro

INIT_XMM
BIWEIGHT_SSSE3_8 16
BIWEIGHT_SSSE3_8 8
BIWEIGHT_SSSE3_8 4

+ 50
- 95
libavcodec/x86/h264_weight_10bit.asm View File

@@ -36,33 +36,26 @@ cextern pw_1
SECTION .text SECTION .text


;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_weight(uint8_t *dst, int stride, int log2_denom,
; void h264_weight(uint8_t *dst, int stride, int height, int log2_denom,
; int weight, int offset); ; int weight, int offset);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%ifdef ARCH_X86_32
DECLARE_REG_TMP 2
%else
DECLARE_REG_TMP 10
%endif

%macro WEIGHT_PROLOGUE 1
mov t0, %1
%macro WEIGHT_PROLOGUE 0
.prologue .prologue
PROLOGUE 0,5,8
PROLOGUE 0,6,8
movifnidn r0, r0mp movifnidn r0, r0mp
movifnidn r1d, r1m movifnidn r1d, r1m
movifnidn r3d, r3m
movifnidn r4d, r4m movifnidn r4d, r4m
movifnidn r5d, r5m
%endmacro %endmacro


%macro WEIGHT_SETUP 1 %macro WEIGHT_SETUP 1
mova m0, [pw_1] mova m0, [pw_1]
movd m2, r2m
movd m2, r3m
pslld m0, m2 ; 1<<log2_denom pslld m0, m2 ; 1<<log2_denom
SPLATW m0, m0 SPLATW m0, m0
shl r4, 19 ; *8, move to upper half of dword
lea r4, [r4+r3*2+0x10000]
movd m3, r4d ; weight<<1 | 1+(offset<<(3))
shl r5, 19 ; *8, move to upper half of dword
lea r5, [r5+r4*2+0x10000]
movd m3, r5d ; weight<<1 | 1+(offset<<(3))
pshufd m3, m3, 0 pshufd m3, m3, 0
mova m4, [pw_pixel_max] mova m4, [pw_pixel_max]
paddw m2, [sq_1] ; log2_denom+1 paddw m2, [sq_1] ; log2_denom+1
@@ -96,8 +89,8 @@ DECLARE_REG_TMP 10
%endmacro %endmacro


%macro WEIGHT_FUNC_DBL 1 %macro WEIGHT_FUNC_DBL 1
cglobal h264_weight_16x16_10_%1
WEIGHT_PROLOGUE 16
cglobal h264_weight_16_10_%1
WEIGHT_PROLOGUE
WEIGHT_SETUP %1 WEIGHT_SETUP %1
.nextrow .nextrow
WEIGHT_OP %1, 0 WEIGHT_OP %1, 0
@@ -105,13 +98,9 @@ cglobal h264_weight_16x16_10_%1
WEIGHT_OP %1, 16 WEIGHT_OP %1, 16
mova [r0+16], m5 mova [r0+16], m5
add r0, r1 add r0, r1
dec t0
dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET

cglobal h264_weight_16x8_10_%1
mov t0, 8
jmp mangle(ff_h264_weight_16x16_10_%1.prologue)
%endmacro %endmacro


INIT_XMM INIT_XMM
@@ -120,24 +109,16 @@ WEIGHT_FUNC_DBL sse4




%macro WEIGHT_FUNC_MM 1 %macro WEIGHT_FUNC_MM 1
cglobal h264_weight_8x16_10_%1
WEIGHT_PROLOGUE 16
cglobal h264_weight_8_10_%1
WEIGHT_PROLOGUE
WEIGHT_SETUP %1 WEIGHT_SETUP %1
.nextrow .nextrow
WEIGHT_OP %1, 0 WEIGHT_OP %1, 0
mova [r0], m5 mova [r0], m5
add r0, r1 add r0, r1
dec t0
dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET

cglobal h264_weight_8x8_10_%1
mov t0, 8
jmp mangle(ff_h264_weight_8x16_10_%1.prologue)

cglobal h264_weight_8x4_10_%1
mov t0, 4
jmp mangle(ff_h264_weight_8x16_10_%1.prologue)
%endmacro %endmacro


INIT_XMM INIT_XMM
@@ -146,8 +127,9 @@ WEIGHT_FUNC_MM sse4




%macro WEIGHT_FUNC_HALF_MM 1 %macro WEIGHT_FUNC_HALF_MM 1
cglobal h264_weight_4x8_10_%1
WEIGHT_PROLOGUE 4
cglobal h264_weight_4_10_%1
WEIGHT_PROLOGUE
sar r2d, 1
WEIGHT_SETUP %1 WEIGHT_SETUP %1
lea r3, [r1*2] lea r3, [r1*2]
.nextrow .nextrow
@@ -155,17 +137,9 @@ cglobal h264_weight_4x8_10_%1
movh [r0], m5 movh [r0], m5
movhps [r0+r1], m5 movhps [r0+r1], m5
add r0, r3 add r0, r3
dec t0
dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET

cglobal h264_weight_4x4_10_%1
mov t0, 2
jmp mangle(ff_h264_weight_4x8_10_%1.prologue)

cglobal h264_weight_4x2_10_%1
mov t0, 1
jmp mangle(ff_h264_weight_4x8_10_%1.prologue)
%endmacro %endmacro


INIT_XMM INIT_XMM
@@ -174,40 +148,40 @@ WEIGHT_FUNC_HALF_MM sse4




;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int log2_denom,
; int weightd, int weights, int offset);
; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int height,
; int log2_denom, int weightd, int weights, int offset);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%ifdef ARCH_X86_32 %ifdef ARCH_X86_32
DECLARE_REG_TMP 2,3
DECLARE_REG_TMP 3
%else %else
DECLARE_REG_TMP 10,2
DECLARE_REG_TMP 10
%endif %endif


%macro BIWEIGHT_PROLOGUE 1
mov t0, %1
%macro BIWEIGHT_PROLOGUE 0
.prologue .prologue
PROLOGUE 0,7,8 PROLOGUE 0,7,8
movifnidn r0, r0mp movifnidn r0, r0mp
movifnidn r1, r1mp movifnidn r1, r1mp
movifnidn t1d, r2m
movifnidn r4d, r4m
movifnidn r2d, r2m
movifnidn r5d, r5m movifnidn r5d, r5m
movifnidn r6d, r6m movifnidn r6d, r6m
movifnidn t0d, r7m
%endmacro %endmacro


%macro BIWEIGHT_SETUP 1 %macro BIWEIGHT_SETUP 1
lea r6, [r6*4+1] ; (offset<<2)+1
or r6, 1
shl r5, 16
or r4, r5
movd m4, r4d ; weightd | weights
movd m5, r6d ; (offset+1)|1
movd m6, r3m ; log2_denom
lea t0, [t0*4+1] ; (offset<<2)+1
or t0, 1
shl r6, 16
or r5, r6
movd m4, r5d ; weightd | weights
movd m5, t0d ; (offset+1)|1
movd m6, r4m ; log2_denom
pslld m5, m6 ; (((offset<<2)+1)|1)<<log2_denom pslld m5, m6 ; (((offset<<2)+1)|1)<<log2_denom
paddd m6, [sq_1] paddd m6, [sq_1]
pshufd m4, m4, 0 pshufd m4, m4, 0
pshufd m5, m5, 0 pshufd m5, m5, 0
mova m3, [pw_pixel_max] mova m3, [pw_pixel_max]
movifnidn r3d, r3m
%ifnidn %1, sse4 %ifnidn %1, sse4
pxor m7, m7 pxor m7, m7
%endif %endif
@@ -243,23 +217,19 @@ DECLARE_REG_TMP 10,2
%endmacro %endmacro


%macro BIWEIGHT_FUNC_DBL 1 %macro BIWEIGHT_FUNC_DBL 1
cglobal h264_biweight_16x16_10_%1
BIWEIGHT_PROLOGUE 16
cglobal h264_biweight_16_10_%1
BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1 BIWEIGHT_SETUP %1
.nextrow .nextrow
BIWEIGHT %1, 0 BIWEIGHT %1, 0
mova [r0 ], m0 mova [r0 ], m0
BIWEIGHT %1, 16 BIWEIGHT %1, 16
mova [r0+16], m0 mova [r0+16], m0
add r0, t1
add r1, t1
dec t0
add r0, r2
add r1, r2
dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET

cglobal h264_biweight_16x8_10_%1
mov t0, 8
jmp mangle(ff_h264_biweight_16x16_10_%1.prologue)
%endmacro %endmacro


INIT_XMM INIT_XMM
@@ -267,25 +237,17 @@ BIWEIGHT_FUNC_DBL sse2
BIWEIGHT_FUNC_DBL sse4 BIWEIGHT_FUNC_DBL sse4


%macro BIWEIGHT_FUNC 1 %macro BIWEIGHT_FUNC 1
cglobal h264_biweight_8x16_10_%1
BIWEIGHT_PROLOGUE 16
cglobal h264_biweight_8_10_%1
BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1 BIWEIGHT_SETUP %1
.nextrow .nextrow
BIWEIGHT %1, 0 BIWEIGHT %1, 0
mova [r0], m0 mova [r0], m0
add r0, t1
add r1, t1
dec t0
add r0, r2
add r1, r2
dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET

cglobal h264_biweight_8x8_10_%1
mov t0, 8
jmp mangle(ff_h264_biweight_8x16_10_%1.prologue)

cglobal h264_biweight_8x4_10_%1
mov t0, 4
jmp mangle(ff_h264_biweight_8x16_10_%1.prologue)
%endmacro %endmacro


INIT_XMM INIT_XMM
@@ -293,27 +255,20 @@ BIWEIGHT_FUNC sse2
BIWEIGHT_FUNC sse4 BIWEIGHT_FUNC sse4


%macro BIWEIGHT_FUNC_HALF 1 %macro BIWEIGHT_FUNC_HALF 1
cglobal h264_biweight_4x8_10_%1
BIWEIGHT_PROLOGUE 4
cglobal h264_biweight_4_10_%1
BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1 BIWEIGHT_SETUP %1
lea r4, [t1*2]
sar r3d, 1
lea r4, [r2*2]
.nextrow .nextrow
BIWEIGHT %1, 0, t1
BIWEIGHT %1, 0, r2
movh [r0 ], m0 movh [r0 ], m0
movhps [r0+t1], m0
movhps [r0+r2], m0
add r0, r4 add r0, r4
add r1, r4 add r1, r4
dec t0
dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET

cglobal h264_biweight_4x4_10_%1
mov t0, 2
jmp mangle(ff_h264_biweight_4x8_10_%1.prologue)

cglobal h264_biweight_4x2_10_%1
mov t0, 1
jmp mangle(ff_h264_biweight_4x8_10_%1.prologue)
%endmacro %endmacro


INIT_XMM INIT_XMM


+ 63
- 112
libavcodec/x86/h264dsp_mmx.c View File

@@ -298,57 +298,47 @@ LF_IFUNC(v, luma_intra, 10, mmxext)
/***********************************/ /***********************************/
/* weighted prediction */ /* weighted prediction */


#define H264_WEIGHT(W, H, OPT) \
void ff_h264_weight_ ## W ## x ## H ## _ ## OPT(uint8_t *dst, \
int stride, int log2_denom, int weight, int offset);
#define H264_WEIGHT(W, OPT) \
void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, \
int stride, int height, int log2_denom, int weight, int offset);


#define H264_BIWEIGHT(W, H, OPT) \
void ff_h264_biweight_ ## W ## x ## H ## _ ## OPT(uint8_t *dst, \
uint8_t *src, int stride, int log2_denom, int weightd, \
#define H264_BIWEIGHT(W, OPT) \
void ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, \
uint8_t *src, int stride, int height, int log2_denom, int weightd, \
int weights, int offset); int weights, int offset);


#define H264_BIWEIGHT_MMX(W,H) \
H264_WEIGHT (W, H, mmx2) \
H264_BIWEIGHT(W, H, mmx2)

#define H264_BIWEIGHT_MMX_SSE(W,H) \
H264_BIWEIGHT_MMX(W, H) \
H264_WEIGHT (W, H, sse2) \
H264_BIWEIGHT (W, H, sse2) \
H264_BIWEIGHT (W, H, ssse3)

H264_BIWEIGHT_MMX_SSE(16, 16)
H264_BIWEIGHT_MMX_SSE(16, 8)
H264_BIWEIGHT_MMX_SSE( 8, 16)
H264_BIWEIGHT_MMX_SSE( 8, 8)
H264_BIWEIGHT_MMX_SSE( 8, 4)
H264_BIWEIGHT_MMX ( 4, 8)
H264_BIWEIGHT_MMX ( 4, 4)
H264_BIWEIGHT_MMX ( 4, 2)

#define H264_WEIGHT_10(W, H, DEPTH, OPT) \
void ff_h264_weight_ ## W ## x ## H ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \
int stride, int log2_denom, int weight, int offset);

#define H264_BIWEIGHT_10(W, H, DEPTH, OPT) \
void ff_h264_biweight_ ## W ## x ## H ## _ ## DEPTH ## _ ## OPT \
(uint8_t *dst, uint8_t *src, int stride, int log2_denom, \
#define H264_BIWEIGHT_MMX(W) \
H264_WEIGHT (W, mmx2) \
H264_BIWEIGHT(W, mmx2)

#define H264_BIWEIGHT_MMX_SSE(W) \
H264_BIWEIGHT_MMX(W) \
H264_WEIGHT (W, sse2) \
H264_BIWEIGHT (W, sse2) \
H264_BIWEIGHT (W, ssse3)

H264_BIWEIGHT_MMX_SSE(16)
H264_BIWEIGHT_MMX_SSE( 8)
H264_BIWEIGHT_MMX ( 4)

#define H264_WEIGHT_10(W, DEPTH, OPT) \
void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \
int stride, int height, int log2_denom, int weight, int offset);

#define H264_BIWEIGHT_10(W, DEPTH, OPT) \
void ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT \
(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, \
int weightd, int weights, int offset); int weightd, int weights, int offset);


#define H264_BIWEIGHT_10_SSE(W, H, DEPTH) \
H264_WEIGHT_10 (W, H, DEPTH, sse2) \
H264_WEIGHT_10 (W, H, DEPTH, sse4) \
H264_BIWEIGHT_10(W, H, DEPTH, sse2) \
H264_BIWEIGHT_10(W, H, DEPTH, sse4)

H264_BIWEIGHT_10_SSE(16, 16, 10)
H264_BIWEIGHT_10_SSE(16, 8, 10)
H264_BIWEIGHT_10_SSE( 8, 16, 10)
H264_BIWEIGHT_10_SSE( 8, 8, 10)
H264_BIWEIGHT_10_SSE( 8, 4, 10)
H264_BIWEIGHT_10_SSE( 4, 8, 10)
H264_BIWEIGHT_10_SSE( 4, 4, 10)
H264_BIWEIGHT_10_SSE( 4, 2, 10)
#define H264_BIWEIGHT_10_SSE(W, DEPTH) \
H264_WEIGHT_10 (W, DEPTH, sse2) \
H264_WEIGHT_10 (W, DEPTH, sse4) \
H264_BIWEIGHT_10(W, DEPTH, sse2) \
H264_BIWEIGHT_10(W, DEPTH, sse4)

H264_BIWEIGHT_10_SSE(16, 10)
H264_BIWEIGHT_10_SSE( 8, 10)
H264_BIWEIGHT_10_SSE( 4, 10)


void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
{ {
@@ -394,23 +384,13 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext;
#endif #endif
c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;

c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
c->weight_h264_pixels_tab[0]= ff_h264_weight_16_mmx2;
c->weight_h264_pixels_tab[1]= ff_h264_weight_8_mmx2;
c->weight_h264_pixels_tab[2]= ff_h264_weight_4_mmx2;

c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_mmx2;
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_mmx2;
c->biweight_h264_pixels_tab[2]= ff_h264_biweight_4_mmx2;


if (mm_flags&AV_CPU_FLAG_SSE2) { if (mm_flags&AV_CPU_FLAG_SSE2) {
c->h264_idct8_add = ff_h264_idct8_add_8_sse2; c->h264_idct8_add = ff_h264_idct8_add_8_sse2;
@@ -422,17 +402,11 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2; c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2;
c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2; c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2;


c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_sse2;
c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_sse2;
c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_sse2;
c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_sse2;
c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_sse2;
c->weight_h264_pixels_tab[0]= ff_h264_weight_16_sse2;
c->weight_h264_pixels_tab[1]= ff_h264_weight_8_sse2;


c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_sse2;
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_sse2;
c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_sse2;
c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2;
c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2;
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_sse2;
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_sse2;


#if HAVE_ALIGNED_STACK #if HAVE_ALIGNED_STACK
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2; c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2;
@@ -442,11 +416,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
#endif #endif
} }
if (mm_flags&AV_CPU_FLAG_SSSE3) { if (mm_flags&AV_CPU_FLAG_SSSE3) {
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3;
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_ssse3;
c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_ssse3;
c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3;
c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_ssse3;
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_ssse3;
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_ssse3;
} }
if (mm_flags&AV_CPU_FLAG_AVX) { if (mm_flags&AV_CPU_FLAG_AVX) {
#if HAVE_ALIGNED_STACK #if HAVE_ALIGNED_STACK
@@ -485,23 +456,13 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2; c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2;
#endif #endif


c->weight_h264_pixels_tab[0] = ff_h264_weight_16x16_10_sse2;
c->weight_h264_pixels_tab[1] = ff_h264_weight_16x8_10_sse2;
c->weight_h264_pixels_tab[2] = ff_h264_weight_8x16_10_sse2;
c->weight_h264_pixels_tab[3] = ff_h264_weight_8x8_10_sse2;
c->weight_h264_pixels_tab[4] = ff_h264_weight_8x4_10_sse2;
c->weight_h264_pixels_tab[5] = ff_h264_weight_4x8_10_sse2;
c->weight_h264_pixels_tab[6] = ff_h264_weight_4x4_10_sse2;
c->weight_h264_pixels_tab[7] = ff_h264_weight_4x2_10_sse2;

c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16x16_10_sse2;
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_16x8_10_sse2;
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_8x16_10_sse2;
c->biweight_h264_pixels_tab[3] = ff_h264_biweight_8x8_10_sse2;
c->biweight_h264_pixels_tab[4] = ff_h264_biweight_8x4_10_sse2;
c->biweight_h264_pixels_tab[5] = ff_h264_biweight_4x8_10_sse2;
c->biweight_h264_pixels_tab[6] = ff_h264_biweight_4x4_10_sse2;
c->biweight_h264_pixels_tab[7] = ff_h264_biweight_4x2_10_sse2;
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2;
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2;
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2;

c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2;


c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_sse2; c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_sse2;
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_sse2; c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_sse2;
@@ -513,23 +474,13 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
#endif #endif
} }
if (mm_flags&AV_CPU_FLAG_SSE4) { if (mm_flags&AV_CPU_FLAG_SSE4) {
c->weight_h264_pixels_tab[0] = ff_h264_weight_16x16_10_sse4;
c->weight_h264_pixels_tab[1] = ff_h264_weight_16x8_10_sse4;
c->weight_h264_pixels_tab[2] = ff_h264_weight_8x16_10_sse4;
c->weight_h264_pixels_tab[3] = ff_h264_weight_8x8_10_sse4;
c->weight_h264_pixels_tab[4] = ff_h264_weight_8x4_10_sse4;
c->weight_h264_pixels_tab[5] = ff_h264_weight_4x8_10_sse4;
c->weight_h264_pixels_tab[6] = ff_h264_weight_4x4_10_sse4;
c->weight_h264_pixels_tab[7] = ff_h264_weight_4x2_10_sse4;

c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16x16_10_sse4;
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_16x8_10_sse4;
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_8x16_10_sse4;
c->biweight_h264_pixels_tab[3] = ff_h264_biweight_8x8_10_sse4;
c->biweight_h264_pixels_tab[4] = ff_h264_biweight_8x4_10_sse4;
c->biweight_h264_pixels_tab[5] = ff_h264_biweight_4x8_10_sse4;
c->biweight_h264_pixels_tab[6] = ff_h264_biweight_4x4_10_sse4;
c->biweight_h264_pixels_tab[7] = ff_h264_biweight_4x2_10_sse4;
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;

c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
} }
#if HAVE_AVX #if HAVE_AVX
if (mm_flags&AV_CPU_FLAG_AVX) { if (mm_flags&AV_CPU_FLAG_AVX) {


Loading…
Cancel
Save