Using macro templates allows the vp[56]_adjust functions to be inlined instead of called through function pointers. The new function pointers enable optimised implementations of the filters. 4% faster VP6 decoding on Cortex-A8. Originally committed as revision 22992 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.6
| @@ -358,9 +358,9 @@ OBJS-$(CONFIG_VORBIS_DECODER) += vorbis_dec.o vorbis.o \ | |||
| OBJS-$(CONFIG_VORBIS_ENCODER) += vorbis_enc.o vorbis.o \ | |||
| vorbis_data.o | |||
| OBJS-$(CONFIG_VP3_DECODER) += vp3.o vp3dsp.o | |||
| OBJS-$(CONFIG_VP5_DECODER) += vp5.o vp56.o vp56data.o \ | |||
| OBJS-$(CONFIG_VP5_DECODER) += vp5.o vp56.o vp56data.o vp56dsp.o \ | |||
| vp3dsp.o | |||
| OBJS-$(CONFIG_VP6_DECODER) += vp6.o vp56.o vp56data.o \ | |||
| OBJS-$(CONFIG_VP6_DECODER) += vp6.o vp56.o vp56data.o vp56dsp.o \ | |||
| vp3dsp.o vp6dsp.o huffman.o | |||
| OBJS-$(CONFIG_VQA_DECODER) += vqavideo.o | |||
| OBJS-$(CONFIG_WAVPACK_DECODER) += wavpack.o | |||
| @@ -69,23 +69,6 @@ static int vp5_parse_header(VP56Context *s, const uint8_t *buf, int buf_size, | |||
| return 1; | |||
| } | |||
| /* Gives very similar result than the vp6 version except in a few cases */ | |||
| static int vp5_adjust(int v, int t) | |||
| { | |||
| int s2, s1 = v >> 31; | |||
| v ^= s1; | |||
| v -= s1; | |||
| v *= v < 2*t; | |||
| v -= t; | |||
| s2 = v >> 31; | |||
| v ^= s2; | |||
| v -= s2; | |||
| v = t - v; | |||
| v += s1; | |||
| v ^= s1; | |||
| return v; | |||
| } | |||
| static void vp5_parse_vector_adjustment(VP56Context *s, VP56mv *vect) | |||
| { | |||
| VP56RangeCoder *c = &s->c; | |||
| @@ -274,7 +257,6 @@ static av_cold int vp5_decode_init(AVCodecContext *avctx) | |||
| vp56_init(avctx, 1, 0); | |||
| s->vp56_coord_div = vp5_coord_div; | |||
| s->parse_vector_adjustment = vp5_parse_vector_adjustment; | |||
| s->adjust = vp5_adjust; | |||
| s->parse_coeff = vp5_parse_coeff; | |||
| s->default_models_init = vp5_default_models_init; | |||
| s->parse_vector_models = vp5_parse_vector_models; | |||
| @@ -300,27 +300,12 @@ static void vp56_add_predictors_dc(VP56Context *s, VP56Frame ref_frame) | |||
| } | |||
| } | |||
| static void vp56_edge_filter(VP56Context *s, uint8_t *yuv, | |||
| int pix_inc, int line_inc, int t) | |||
| { | |||
| int pix2_inc = 2 * pix_inc; | |||
| int i, v; | |||
| for (i=0; i<12; i++) { | |||
| v = (yuv[-pix2_inc] + 3*(yuv[0]-yuv[-pix_inc]) - yuv[pix_inc] + 4) >>3; | |||
| v = s->adjust(v, t); | |||
| yuv[-pix_inc] = av_clip_uint8(yuv[-pix_inc] + v); | |||
| yuv[0] = av_clip_uint8(yuv[0] - v); | |||
| yuv += line_inc; | |||
| } | |||
| } | |||
| static void vp56_deblock_filter(VP56Context *s, uint8_t *yuv, | |||
| int stride, int dx, int dy) | |||
| { | |||
| int t = vp56_filter_threshold[s->quantizer]; | |||
| if (dx) vp56_edge_filter(s, yuv + 10-dx , 1, stride, t); | |||
| if (dy) vp56_edge_filter(s, yuv + stride*(10-dy), stride, 1, t); | |||
| if (dx) s->vp56dsp.edge_filter_hor(yuv + 10-dx , stride, t); | |||
| if (dy) s->vp56dsp.edge_filter_ver(yuv + stride*(10-dy), stride, t); | |||
| } | |||
| static void vp56_mc(VP56Context *s, int b, int plane, uint8_t *src, | |||
| @@ -665,6 +650,7 @@ av_cold void vp56_init(AVCodecContext *avctx, int flip, int has_alpha) | |||
| if (avctx->idct_algo == FF_IDCT_AUTO) | |||
| avctx->idct_algo = FF_IDCT_VP3; | |||
| dsputil_init(&s->dsp, avctx); | |||
| ff_vp56dsp_init(&s->vp56dsp, avctx->codec->id); | |||
| ff_init_scantable(s->dsp.idct_permutation, &s->scantable,ff_zigzag_direct); | |||
| for (i=0; i<4; i++) | |||
| @@ -28,14 +28,13 @@ | |||
| #include "dsputil.h" | |||
| #include "get_bits.h" | |||
| #include "bytestream.h" | |||
| #include "vp56dsp.h" | |||
| typedef struct vp56_context VP56Context; | |||
| typedef struct vp56_mv VP56mv; | |||
| typedef void (*VP56ParseVectorAdjustment)(VP56Context *s, | |||
| VP56mv *vect); | |||
| typedef int (*VP56Adjust)(int v, int t); | |||
| typedef void (*VP56Filter)(VP56Context *s, uint8_t *dst, uint8_t *src, | |||
| int offset1, int offset2, int stride, | |||
| VP56mv mv, int mask, int select, int luma); | |||
| @@ -90,6 +89,7 @@ typedef struct { | |||
| struct vp56_context { | |||
| AVCodecContext *avctx; | |||
| DSPContext dsp; | |||
| VP56DSPContext vp56dsp; | |||
| ScanTable scantable; | |||
| AVFrame frames[4]; | |||
| AVFrame *framep[6]; | |||
| @@ -149,7 +149,6 @@ struct vp56_context { | |||
| const uint8_t *vp56_coord_div; | |||
| VP56ParseVectorAdjustment parse_vector_adjustment; | |||
| VP56Adjust adjust; | |||
| VP56Filter filter; | |||
| VP56ParseCoeff parse_coeff; | |||
| VP56DefaultModelsInit default_models_init; | |||
| @@ -0,0 +1,86 @@ | |||
| /* | |||
| * Copyright (c) 2006 Aurelien Jacobs <aurel@gnuage.org> | |||
| * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include <stdint.h> | |||
| #include "avcodec.h" | |||
| #include "vp56dsp.h" | |||
| /* Gives very similar result than the vp6 version except in a few cases */ | |||
| static int vp5_adjust(int v, int t) | |||
| { | |||
| int s2, s1 = v >> 31; | |||
| v ^= s1; | |||
| v -= s1; | |||
| v *= v < 2*t; | |||
| v -= t; | |||
| s2 = v >> 31; | |||
| v ^= s2; | |||
| v -= s2; | |||
| v = t - v; | |||
| v += s1; | |||
| v ^= s1; | |||
| return v; | |||
| } | |||
| static int vp6_adjust(int v, int t) | |||
| { | |||
| int V = v, s = v >> 31; | |||
| V ^= s; | |||
| V -= s; | |||
| if (V-t-1 >= (unsigned)(t-1)) | |||
| return v; | |||
| V = 2*t - V; | |||
| V += s; | |||
| V ^= s; | |||
| return V; | |||
| } | |||
| #define VP56_EDGE_FILTER(pfx, suf, pix_inc, line_inc) \ | |||
| static void pfx##_edge_filter_##suf(uint8_t *yuv, int stride, int t) \ | |||
| { \ | |||
| int pix2_inc = 2 * pix_inc; \ | |||
| int i, v; \ | |||
| \ | |||
| for (i=0; i<12; i++) { \ | |||
| v = (yuv[-pix2_inc] + 3*(yuv[0]-yuv[-pix_inc]) - yuv[pix_inc] + 4)>>3;\ | |||
| v = pfx##_adjust(v, t); \ | |||
| yuv[-pix_inc] = av_clip_uint8(yuv[-pix_inc] + v); \ | |||
| yuv[0] = av_clip_uint8(yuv[0] - v); \ | |||
| yuv += line_inc; \ | |||
| } \ | |||
| } | |||
| VP56_EDGE_FILTER(vp5, hor, 1, stride) | |||
| VP56_EDGE_FILTER(vp5, ver, stride, 1) | |||
| VP56_EDGE_FILTER(vp6, hor, 1, stride) | |||
| VP56_EDGE_FILTER(vp6, ver, stride, 1) | |||
| void ff_vp56dsp_init(VP56DSPContext *s, enum CodecID codec) | |||
| { | |||
| if (codec == CODEC_ID_VP5) { | |||
| s->edge_filter_hor = vp5_edge_filter_hor; | |||
| s->edge_filter_ver = vp5_edge_filter_ver; | |||
| } else { | |||
| s->edge_filter_hor = vp6_edge_filter_hor; | |||
| s->edge_filter_ver = vp6_edge_filter_ver; | |||
| } | |||
| } | |||
| @@ -0,0 +1,33 @@ | |||
| /* | |||
| * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef AVCODEC_VP56DSP_H | |||
| #define AVCODEC_VP56DSP_H | |||
| #include <stdint.h> | |||
| typedef struct VP56DSPContext { | |||
| void (*edge_filter_hor)(uint8_t *yuv, int stride, int t); | |||
| void (*edge_filter_ver)(uint8_t *yuv, int stride, int t); | |||
| } VP56DSPContext; | |||
| void ff_vp56dsp_init(VP56DSPContext *s, enum CodecID codec); | |||
| #endif /* AVCODEC_VP56DSP_H */ | |||
| @@ -481,19 +481,6 @@ static void vp6_parse_coeff(VP56Context *s) | |||
| } | |||
| } | |||
| static int vp6_adjust(int v, int t) | |||
| { | |||
| int V = v, s = v >> 31; | |||
| V ^= s; | |||
| V -= s; | |||
| if (V-t-1 >= (unsigned)(t-1)) | |||
| return v; | |||
| V = 2*t - V; | |||
| V += s; | |||
| V ^= s; | |||
| return V; | |||
| } | |||
| static int vp6_block_variance(uint8_t *src, int stride) | |||
| { | |||
| int sum = 0, square_sum = 0; | |||
| @@ -592,7 +579,6 @@ static av_cold int vp6_decode_init(AVCodecContext *avctx) | |||
| avctx->codec->id == CODEC_ID_VP6A); | |||
| s->vp56_coord_div = vp6_coord_div; | |||
| s->parse_vector_adjustment = vp6_parse_vector_adjustment; | |||
| s->adjust = vp6_adjust; | |||
| s->filter = vp6_filter; | |||
| s->default_models_init = vp6_default_models_init; | |||
| s->parse_vector_models = vp6_parse_vector_models; | |||