Merge remote-tracking branch 'qatar/master'

* qatar/master: (28 commits) dfa: use more meaningful return codes eatgv: check vector_bits eatgv: check motion vectors Mark a number of variables only used in av_dlog() calls as av_unused. dvdec: drop const qualifier from variable to eliminate a warning avcodec: Improve comment for thread_safe_callbacks to avoid misinterpretation. tests/utils: don't ignore the return value of fwrite() lavfi/formats: use sizeof(var) instead of sizeof(type). lavfi: remove avfilter_default_config_input_link() declaration lavfi: always enable the scale filter and depend on sws. vf_split: support user-specifiable number of outputs. avconv: remove stray useless comment. mpegmux: add stuffing to avoid incomplete PCM frames rtsp: avoid const warnings from strtol() call avserver: check return value of ftruncate() lagarith: make offset array type unsigned dfa: add some checks to ensure that decoder won't write past frame end aacps: NEON optimisations aacps: align some arrays aacps: move some loops to function pointers ... Conflicts: configure doc/filters.texi libavcodec/dfa.c libavcodec/eatgv.c libavfilter/Makefile libavfilter/allfilters.c libavfilter/avfilter.h libavfilter/formats.c libavfilter/vf_split.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
13 years ago · 715c8a5a50
--- a/configure
+++ b/configure
@@ -1697,6 +1697,7 @@ yadif_filter_deps="gpl"

 # libraries
 avdevice_deps="avcodec avformat"
 avfilter_deps="swscale"
 avformat_deps="avcodec"
 postproc_deps="gpl"

--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -2735,8 +2735,16 @@ faster due to better use of the memory cache.

@section split

 Pass on the input video to two outputs. Both outputs are identical to
 the input video.
 Split input video into several identical outputs.

 The filter accepts a single parameter which specifies the number of outputs. If
 unspecified, it defaults to 2.

 For example
@example
 ffmpeg -i INPUT -filter_complex split=5 OUTPUT
@end example
 will create 5 copies of the input video.

 For example:
@example
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -728,7 +728,6 @@ static int configure_video_filters(FilterGraph *fg)
    InputStream  *ist = fg->inputs[0]->ist;
    OutputStream *ost = fg->outputs[0]->ost;
    AVFilterContext *last_filter, *filter;
    /** filter graph containing all filters including input & output */
    AVCodecContext *codec = ost->st->codec;
    enum PixelFormat *pix_fmts = choose_pixel_fmts(ost);
    AVBufferSinkParams *buffersink_params = av_buffersink_params_alloc();
--- a/ffserver.c
+++ b/ffserver.c
@@ -2585,8 +2585,11 @@ static int http_start_receive_data(HTTPContext *c)
    if (c->stream->truncate) {
        /* truncate feed file */
        ffm_write_write_index(c->feed_fd, FFM_PACKET_SIZE);
        ftruncate(c->feed_fd, FFM_PACKET_SIZE);
        http_log("Truncating feed file '%s'\n", c->stream->feed_filename);
        if (ftruncate(c->feed_fd, FFM_PACKET_SIZE) < 0) {
            http_log("Error truncating feed file: %s\n", strerror(errno));
            return -1;
        }
    } else {
        if ((c->stream->feed_write_index = ffm_read_write_index(fd)) < 0) {
            http_log("Error reading write index from feed file: %s\n", strerror(errno));
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -57,7 +57,7 @@ OBJS-$(CONFIG_A64MULTI_ENCODER)        += a64multienc.o elbg.o
 OBJS-$(CONFIG_A64MULTI5_ENCODER)       += a64multienc.o elbg.o
 OBJS-$(CONFIG_AAC_DECODER)             += aacdec.o aactab.o aacsbr.o aacps.o \
                                          aacadtsdec.o mpeg4audio.o kbdwin.o \
                                          sbrdsp.o
                                          sbrdsp.o aacpsdsp.o
 OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aaccoder.o    \
                                          aacpsy.o aactab.o      \
                                          psymodel.o iirfilter.o \
--- a/libavcodec/aacps.c
+++ b/libavcodec/aacps.c
@@ -27,6 +27,7 @@
 #include "aacps.h"
 #include "aacps_tablegen.h"
 #include "aacpsdata.c"
 #include "dsputil.h"

 #define PS_BASELINE 0  ///< Operate in Baseline PS mode
                       ///< Baseline implies 10 or 20 stereo bands,
@@ -284,7 +285,7 @@ err:

 /** Split one subband into 2 subsubbands with a symmetric real filter.
 * The filter must have its non-center even coefficients equal to zero. */
 static void hybrid2_re(float (*in)[2], float (*out)[32][2], const float filter[7], int len, int reverse)
 static void hybrid2_re(float (*in)[2], float (*out)[32][2], const float filter[8], int len, int reverse)
 {
    int i, j;
    for (i = 0; i < len; i++, in++) {
@@ -304,26 +305,14 @@ static void hybrid2_re(float (*in)[2], float (*out)[32][2], const float filter[7
 }

 /** Split one subband into 6 subsubbands with a complex filter */
 static void hybrid6_cx(float (*in)[2], float (*out)[32][2], const float (*filter)[7][2], int len)
 static void hybrid6_cx(PSDSPContext *dsp, float (*in)[2], float (*out)[32][2], const float (*filter)[8][2], int len)
 {
    int i, j, ssb;
    int i;
    int N = 8;
    float temp[8][2];
    LOCAL_ALIGNED_16(float, temp, [8], [2]);

    for (i = 0; i < len; i++, in++) {
        for (ssb = 0; ssb < N; ssb++) {
            float sum_re = filter[ssb][6][0] * in[6][0], sum_im = filter[ssb][6][0] * in[6][1];
            for (j = 0; j < 6; j++) {
                float in0_re = in[j][0];
                float in0_im = in[j][1];
                float in1_re = in[12-j][0];
                float in1_im = in[12-j][1];
                sum_re += filter[ssb][j][0] * (in0_re + in1_re) - filter[ssb][j][1] * (in0_im - in1_im);
                sum_im += filter[ssb][j][0] * (in0_im + in1_im) + filter[ssb][j][1] * (in0_re - in1_re);
            }
            temp[ssb][0] = sum_re;
            temp[ssb][1] = sum_im;
        }
        dsp->hybrid_analysis(temp, in, filter, 1, N);
        out[0][i][0] = temp[6][0];
        out[0][i][1] = temp[6][1];
        out[1][i][0] = temp[7][0];
@@ -339,28 +328,18 @@ static void hybrid6_cx(float (*in)[2], float (*out)[32][2], const float (*filter
    }
 }

 static void hybrid4_8_12_cx(float (*in)[2], float (*out)[32][2], const float (*filter)[7][2], int N, int len)
 static void hybrid4_8_12_cx(PSDSPContext *dsp, float (*in)[2], float (*out)[32][2], const float (*filter)[8][2], int N, int len)
 {
    int i, j, ssb;
    int i;

    for (i = 0; i < len; i++, in++) {
        for (ssb = 0; ssb < N; ssb++) {
            float sum_re = filter[ssb][6][0] * in[6][0], sum_im = filter[ssb][6][0] * in[6][1];
            for (j = 0; j < 6; j++) {
                float in0_re = in[j][0];
                float in0_im = in[j][1];
                float in1_re = in[12-j][0];
                float in1_im = in[12-j][1];
                sum_re += filter[ssb][j][0] * (in0_re + in1_re) - filter[ssb][j][1] * (in0_im - in1_im);
                sum_im += filter[ssb][j][0] * (in0_im + in1_im) + filter[ssb][j][1] * (in0_re - in1_re);
            }
            out[ssb][i][0] = sum_re;
            out[ssb][i][1] = sum_im;
        }
        dsp->hybrid_analysis(out[0] + i, in, filter, 32, N);
    }
 }

 static void hybrid_analysis(float out[91][32][2], float in[5][44][2], float L[2][38][64], int is34, int len)
 static void hybrid_analysis(PSDSPContext *dsp, float out[91][32][2],
                            float in[5][44][2], float L[2][38][64],
                            int is34, int len)
 {
    int i, j;
    for (i = 0; i < 5; i++) {
@@ -370,27 +349,17 @@ static void hybrid_analysis(float out[91][32][2], float in[5][44][2], float L[2]
        }
    }
    if (is34) {
        hybrid4_8_12_cx(in[0], out,    f34_0_12, 12, len);
        hybrid4_8_12_cx(in[1], out+12, f34_1_8,   8, len);
        hybrid4_8_12_cx(in[2], out+20, f34_2_4,   4, len);
        hybrid4_8_12_cx(in[3], out+24, f34_2_4,   4, len);
        hybrid4_8_12_cx(in[4], out+28, f34_2_4,   4, len);
        for (i = 0; i < 59; i++) {
            for (j = 0; j < len; j++) {
                out[i+32][j][0] = L[0][j][i+5];
                out[i+32][j][1] = L[1][j][i+5];
            }
        }
        hybrid4_8_12_cx(dsp, in[0], out,    f34_0_12, 12, len);
        hybrid4_8_12_cx(dsp, in[1], out+12, f34_1_8,   8, len);
        hybrid4_8_12_cx(dsp, in[2], out+20, f34_2_4,   4, len);
        hybrid4_8_12_cx(dsp, in[3], out+24, f34_2_4,   4, len);
        hybrid4_8_12_cx(dsp, in[4], out+28, f34_2_4,   4, len);
        dsp->hybrid_analysis_ileave(out + 27, L, 5, len);
    } else {
        hybrid6_cx(in[0], out, f20_0_8, len);
        hybrid6_cx(dsp, in[0], out, f20_0_8, len);
        hybrid2_re(in[1], out+6, g1_Q2, len, 1);
        hybrid2_re(in[2], out+8, g1_Q2, len, 0);
        for (i = 0; i < 61; i++) {
            for (j = 0; j < len; j++) {
                out[i+10][j][0] = L[0][j][i+3];
                out[i+10][j][1] = L[1][j][i+3];
            }
        }
        dsp->hybrid_analysis_ileave(out + 7, L, 3, len);
    }
    //update in_buf
    for (i = 0; i < 5; i++) {
@@ -398,7 +367,8 @@ static void hybrid_analysis(float out[91][32][2], float in[5][44][2], float L[2]
    }
 }

 static void hybrid_synthesis(float out[2][38][64], float in[91][32][2], int is34, int len)
 static void hybrid_synthesis(PSDSPContext *dsp, float out[2][38][64],
                             float in[91][32][2], int is34, int len)
 {
    int i, n;
    if (is34) {
@@ -422,12 +392,7 @@ static void hybrid_synthesis(float out[2][38][64], float in[91][32][2], int is34
                out[1][n][4] += in[28+i][n][1];
            }
        }
        for (i = 0; i < 59; i++) {
            for (n = 0; n < len; n++) {
                out[0][n][i+5] = in[i+32][n][0];
                out[1][n][i+5] = in[i+32][n][1];
            }
        }
        dsp->hybrid_synthesis_deint(out, in + 27, 5, len);
    } else {
        for (n = 0; n < len; n++) {
            out[0][n][0] = in[0][n][0] + in[1][n][0] + in[2][n][0] +
@@ -439,12 +404,7 @@ static void hybrid_synthesis(float out[2][38][64], float in[91][32][2], int is34
            out[0][n][2] = in[8][n][0] + in[9][n][0];
            out[1][n][2] = in[8][n][1] + in[9][n][1];
        }
        for (i = 0; i < 61; i++) {
            for (n = 0; n < len; n++) {
                out[0][n][i+3] = in[i+10][n][0];
                out[1][n][i+3] = in[i+10][n][1];
            }
        }
        dsp->hybrid_synthesis_deint(out, in + 7, 3, len);
    }
 }

@@ -648,8 +608,8 @@ static void map_val_20_to_34(float par[PS_MAX_NR_IIDICC])

 static void decorrelation(PSContext *ps, float (*out)[32][2], const float (*s)[32][2], int is34)
 {
    float power[34][PS_QMF_TIME_SLOTS] = {{0}};
    float transient_gain[34][PS_QMF_TIME_SLOTS];
    LOCAL_ALIGNED_16(float, power, [34], [PS_QMF_TIME_SLOTS]);
    LOCAL_ALIGNED_16(float, transient_gain, [34], [PS_QMF_TIME_SLOTS]);
    float *peak_decay_nrg = ps->peak_decay_nrg;
    float *power_smooth = ps->power_smooth;
    float *peak_decay_diff_smooth = ps->peak_decay_diff_smooth;
@@ -661,10 +621,8 @@ static void decorrelation(PSContext *ps, float (*out)[32][2], const float (*s)[3
    const float a_smooth          = 0.25f; ///< Smoothing coefficient
    int i, k, m, n;
    int n0 = 0, nL = 32;
    static const int link_delay[] = { 3, 4, 5 };
    static const float a[] = { 0.65143905753106f,
                               0.56471812200776f,
                               0.48954165955695f };

    memset(power, 0, 34 * sizeof(*power));

    if (is34 != ps->is34bands_old) {
        memset(ps->peak_decay_nrg,         0, sizeof(ps->peak_decay_nrg));
@@ -674,11 +632,9 @@ static void decorrelation(PSContext *ps, float (*out)[32][2], const float (*s)[3
        memset(ps->ap_delay,               0, sizeof(ps->ap_delay));
    }

    for (n = n0; n < nL; n++) {
        for (k = 0; k < NR_BANDS[is34]; k++) {
            int i = k_to_i[k];
            power[i][n] += s[k][n][0] * s[k][n][0] + s[k][n][1] * s[k][n][1];
        }
    for (k = 0; k < NR_BANDS[is34]; k++) {
        int i = k_to_i[k];
        ps->dsp.add_squares(power[i], s[k], nL - n0);
    }

    //Transient detection
@@ -706,54 +662,31 @@ static void decorrelation(PSContext *ps, float (*out)[32][2], const float (*s)[3
    for (k = 0; k < NR_ALLPASS_BANDS[is34]; k++) {
        int b = k_to_i[k];
        float g_decay_slope = 1.f - DECAY_SLOPE * (k - DECAY_CUTOFF[is34]);
        float ag[PS_AP_LINKS];
        g_decay_slope = av_clipf(g_decay_slope, 0.f, 1.f);
        memcpy(delay[k], delay[k]+nL, PS_MAX_DELAY*sizeof(delay[k][0]));
        memcpy(delay[k]+PS_MAX_DELAY, s[k], numQMFSlots*sizeof(delay[k][0]));
        for (m = 0; m < PS_AP_LINKS; m++) {
            memcpy(ap_delay[k][m],   ap_delay[k][m]+numQMFSlots,           5*sizeof(ap_delay[k][m][0]));
            ag[m] = a[m] * g_decay_slope;
        }
        for (n = n0; n < nL; n++) {
            float in_re = delay[k][n+PS_MAX_DELAY-2][0] * phi_fract[is34][k][0] -
                          delay[k][n+PS_MAX_DELAY-2][1] * phi_fract[is34][k][1];
            float in_im = delay[k][n+PS_MAX_DELAY-2][0] * phi_fract[is34][k][1] +
                          delay[k][n+PS_MAX_DELAY-2][1] * phi_fract[is34][k][0];
            for (m = 0; m < PS_AP_LINKS; m++) {
                float a_re                = ag[m] * in_re;
                float a_im                = ag[m] * in_im;
                float link_delay_re       = ap_delay[k][m][n+5-link_delay[m]][0];
                float link_delay_im       = ap_delay[k][m][n+5-link_delay[m]][1];
                float fractional_delay_re = Q_fract_allpass[is34][k][m][0];
                float fractional_delay_im = Q_fract_allpass[is34][k][m][1];
                ap_delay[k][m][n+5][0] = in_re;
                ap_delay[k][m][n+5][1] = in_im;
                in_re = link_delay_re * fractional_delay_re - link_delay_im * fractional_delay_im - a_re;
                in_im = link_delay_re * fractional_delay_im + link_delay_im * fractional_delay_re - a_im;
                ap_delay[k][m][n+5][0] += ag[m] * in_re;
                ap_delay[k][m][n+5][1] += ag[m] * in_im;
            }
            out[k][n][0] = transient_gain[b][n] * in_re;
            out[k][n][1] = transient_gain[b][n] * in_im;
        }
        ps->dsp.decorrelate(out[k], delay[k] + PS_MAX_DELAY - 2, ap_delay[k],
                            phi_fract[is34][k], Q_fract_allpass[is34][k],
                            transient_gain[b], g_decay_slope, nL - n0);
    }
    for (; k < SHORT_DELAY_BAND[is34]; k++) {
        int i = k_to_i[k];
        memcpy(delay[k], delay[k]+nL, PS_MAX_DELAY*sizeof(delay[k][0]));
        memcpy(delay[k]+PS_MAX_DELAY, s[k], numQMFSlots*sizeof(delay[k][0]));
        for (n = n0; n < nL; n++) {
            //H = delay 14
            out[k][n][0] = transient_gain[k_to_i[k]][n] * delay[k][n+PS_MAX_DELAY-14][0];
            out[k][n][1] = transient_gain[k_to_i[k]][n] * delay[k][n+PS_MAX_DELAY-14][1];
        }
        //H = delay 14
        ps->dsp.mul_pair_single(out[k], delay[k] + PS_MAX_DELAY - 14,
                                transient_gain[i], nL - n0);
    }
    for (; k < NR_BANDS[is34]; k++) {
        int i = k_to_i[k];
        memcpy(delay[k], delay[k]+nL, PS_MAX_DELAY*sizeof(delay[k][0]));
        memcpy(delay[k]+PS_MAX_DELAY, s[k], numQMFSlots*sizeof(delay[k][0]));
        for (n = n0; n < nL; n++) {
            //H = delay 1
            out[k][n][0] = transient_gain[k_to_i[k]][n] * delay[k][n+PS_MAX_DELAY-1][0];
            out[k][n][1] = transient_gain[k_to_i[k]][n] * delay[k][n+PS_MAX_DELAY-1][1];
        }
        //H = delay 1
        ps->dsp.mul_pair_single(out[k], delay[k] + PS_MAX_DELAY - 1,
                                transient_gain[i], nL - n0);
    }
 }

@@ -797,7 +730,7 @@ static void remap20(int8_t (**p_par_mapped)[PS_MAX_NR_IIDICC],

 static void stereo_processing(PSContext *ps, float (*l)[32][2], float (*r)[32][2], int is34)
 {
    int e, b, k, n;
    int e, b, k;

    float (*H11)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H11;
    float (*H12)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H12;
@@ -909,78 +842,52 @@ static void stereo_processing(PSContext *ps, float (*l)[32][2], float (*r)[32][2
            H22[0][e+1][b] = h22;
        }
        for (k = 0; k < NR_BANDS[is34]; k++) {
            float h11r, h12r, h21r, h22r;
            float h11i, h12i, h21i, h22i;
            float h11r_step, h12r_step, h21r_step, h22r_step;
            float h11i_step, h12i_step, h21i_step, h22i_step;
            float h[2][4];
            float h_step[2][4];
            int start = ps->border_position[e];
            int stop  = ps->border_position[e+1];
            float width = 1.f / (stop - start);
            b = k_to_i[k];
            h11r = H11[0][e][b];
            h12r = H12[0][e][b];
            h21r = H21[0][e][b];
            h22r = H22[0][e][b];
            h[0][0] = H11[0][e][b];
            h[0][1] = H12[0][e][b];
            h[0][2] = H21[0][e][b];
            h[0][3] = H22[0][e][b];
            if (!PS_BASELINE && ps->enable_ipdopd) {
            //Is this necessary? ps_04_new seems unchanged
            if ((is34 && k <= 13 && k >= 9) || (!is34 && k <= 1)) {
                h11i = -H11[1][e][b];
                h12i = -H12[1][e][b];
                h21i = -H21[1][e][b];
                h22i = -H22[1][e][b];
                h[1][0] = -H11[1][e][b];
                h[1][1] = -H12[1][e][b];
                h[1][2] = -H21[1][e][b];
                h[1][3] = -H22[1][e][b];
            } else {
                h11i = H11[1][e][b];
                h12i = H12[1][e][b];
                h21i = H21[1][e][b];
                h22i = H22[1][e][b];
                h[1][0] = H11[1][e][b];
                h[1][1] = H12[1][e][b];
                h[1][2] = H21[1][e][b];
                h[1][3] = H22[1][e][b];
            }
            }
            //Interpolation
            h11r_step = (H11[0][e+1][b] - h11r) * width;
            h12r_step = (H12[0][e+1][b] - h12r) * width;
            h21r_step = (H21[0][e+1][b] - h21r) * width;
            h22r_step = (H22[0][e+1][b] - h22r) * width;
            h_step[0][0] = (H11[0][e+1][b] - h[0][0]) * width;
            h_step[0][1] = (H12[0][e+1][b] - h[0][1]) * width;
            h_step[0][2] = (H21[0][e+1][b] - h[0][2]) * width;
            h_step[0][3] = (H22[0][e+1][b] - h[0][3]) * width;
            if (!PS_BASELINE && ps->enable_ipdopd) {
                h11i_step = (H11[1][e+1][b] - h11i) * width;
                h12i_step = (H12[1][e+1][b] - h12i) * width;
                h21i_step = (H21[1][e+1][b] - h21i) * width;
                h22i_step = (H22[1][e+1][b] - h22i) * width;
            }
            for (n = start + 1; n <= stop; n++) {
                //l is s, r is d
                float l_re = l[k][n][0];
                float l_im = l[k][n][1];
                float r_re = r[k][n][0];
                float r_im = r[k][n][1];
                h11r += h11r_step;
                h12r += h12r_step;
                h21r += h21r_step;
                h22r += h22r_step;
                if (!PS_BASELINE && ps->enable_ipdopd) {
                    h11i += h11i_step;
                    h12i += h12i_step;
                    h21i += h21i_step;
                    h22i += h22i_step;

                    l[k][n][0] = h11r*l_re + h21r*r_re - h11i*l_im - h21i*r_im;
                    l[k][n][1] = h11r*l_im + h21r*r_im + h11i*l_re + h21i*r_re;
                    r[k][n][0] = h12r*l_re + h22r*r_re - h12i*l_im - h22i*r_im;
                    r[k][n][1] = h12r*l_im + h22r*r_im + h12i*l_re + h22i*r_re;
                } else {
                    l[k][n][0] = h11r*l_re + h21r*r_re;
                    l[k][n][1] = h11r*l_im + h21r*r_im;
                    r[k][n][0] = h12r*l_re + h22r*r_re;
                    r[k][n][1] = h12r*l_im + h22r*r_im;
                }
                h_step[1][0] = (H11[1][e+1][b] - h[1][0]) * width;
                h_step[1][1] = (H12[1][e+1][b] - h[1][1]) * width;
                h_step[1][2] = (H21[1][e+1][b] - h[1][2]) * width;
                h_step[1][3] = (H22[1][e+1][b] - h[1][3]) * width;
            }
            ps->dsp.stereo_interpolate[!PS_BASELINE && ps->enable_ipdopd](
                l[k] + start + 1, r[k] + start + 1,
                h, h_step, stop - start);
        }
    }
 }

 int ff_ps_apply(AVCodecContext *avctx, PSContext *ps, float L[2][38][64], float R[2][38][64], int top)
 {
    float Lbuf[91][32][2];
    float Rbuf[91][32][2];
    LOCAL_ALIGNED_16(float, Lbuf, [91], [32][2]);
    LOCAL_ALIGNED_16(float, Rbuf, [91], [32][2]);
    const int len = 32;
    int is34 = ps->is34bands;

@@ -989,11 +896,11 @@ int ff_ps_apply(AVCodecContext *avctx, PSContext *ps, float L[2][38][64], float
    if (top < NR_ALLPASS_BANDS[is34])
        memset(ps->ap_delay + top, 0, (NR_ALLPASS_BANDS[is34] - top)*sizeof(ps->ap_delay[0]));

    hybrid_analysis(Lbuf, ps->in_buf, L, is34, len);
    hybrid_analysis(&ps->dsp, Lbuf, ps->in_buf, L, is34, len);
    decorrelation(ps, Rbuf, Lbuf, is34);
    stereo_processing(ps, Lbuf, Rbuf, is34);
    hybrid_synthesis(L, Lbuf, is34, len);
    hybrid_synthesis(R, Rbuf, is34, len);
    hybrid_synthesis(&ps->dsp, L, Lbuf, is34, len);
    hybrid_synthesis(&ps->dsp, R, Rbuf, is34, len);

    return 0;
 }
@@ -1041,4 +948,5 @@ av_cold void ff_ps_init(void) {

 av_cold void ff_ps_ctx_init(PSContext *ps)
 {
    ff_psdsp_init(&ps->dsp);
 }
--- a/libavcodec/aacps.h
+++ b/libavcodec/aacps.h
@@ -24,6 +24,7 @@

 #include <stdint.h>

 #include "aacpsdsp.h"
 #include "avcodec.h"
 #include "get_bits.h"

@@ -60,18 +61,19 @@ typedef struct {
    int    is34bands;
    int    is34bands_old;

    float  in_buf[5][44][2];
    float  delay[PS_MAX_SSB][PS_QMF_TIME_SLOTS + PS_MAX_DELAY][2];
    float  ap_delay[PS_MAX_AP_BANDS][PS_AP_LINKS][PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2];
    float  peak_decay_nrg[34];
    float  power_smooth[34];
    float  peak_decay_diff_smooth[34];
    float  H11[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
    float  H12[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
    float  H21[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
    float  H22[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
    DECLARE_ALIGNED(16, float, in_buf)[5][44][2];
    DECLARE_ALIGNED(16, float, delay)[PS_MAX_SSB][PS_QMF_TIME_SLOTS + PS_MAX_DELAY][2];
    DECLARE_ALIGNED(16, float, ap_delay)[PS_MAX_AP_BANDS][PS_AP_LINKS][PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2];
    DECLARE_ALIGNED(16, float, peak_decay_nrg)[34];
    DECLARE_ALIGNED(16, float, power_smooth)[34];
    DECLARE_ALIGNED(16, float, peak_decay_diff_smooth)[34];
    DECLARE_ALIGNED(16, float, H11)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
    DECLARE_ALIGNED(16, float, H12)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
    DECLARE_ALIGNED(16, float, H21)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
    DECLARE_ALIGNED(16, float, H22)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
    int8_t opd_hist[PS_MAX_NR_IIDICC];
    int8_t ipd_hist[PS_MAX_NR_IIDICC];
    PSDSPContext dsp;
 } PSContext;

 void ff_ps_init(void);
--- a/libavcodec/aacps_tablegen.c
+++ b/libavcodec/aacps_tablegen.c
@@ -69,23 +69,23 @@ int main(void)
    write_float_3d_array(HB, 46, 8, 4);
    printf("};\n");

    printf("static const float f20_0_8[8][7][2] = {\n");
    write_float_3d_array(f20_0_8, 8, 7, 2);
    printf("static const DECLARE_ALIGNED(16, float, f20_0_8)[8][8][2] = {\n");
    write_float_3d_array(f20_0_8, 8, 8, 2);
    printf("};\n");
    printf("static const float f34_0_12[12][7][2] = {\n");
    write_float_3d_array(f34_0_12, 12, 7, 2);
    printf("static const DECLARE_ALIGNED(16, float, f34_0_12)[12][8][2] = {\n");
    write_float_3d_array(f34_0_12, 12, 8, 2);
    printf("};\n");
    printf("static const float f34_1_8[8][7][2] = {\n");
    write_float_3d_array(f34_1_8, 8, 7, 2);
    printf("static const DECLARE_ALIGNED(16, float, f34_1_8)[8][8][2] = {\n");
    write_float_3d_array(f34_1_8, 8, 8, 2);
    printf("};\n");
    printf("static const float f34_2_4[4][7][2] = {\n");
    write_float_3d_array(f34_2_4, 4, 7, 2);
    printf("static const DECLARE_ALIGNED(16, float, f34_2_4)[4][8][2] = {\n");
    write_float_3d_array(f34_2_4, 4, 8, 2);
    printf("};\n");

    printf("static const float Q_fract_allpass[2][50][3][2] = {\n");
    printf("static const DECLARE_ALIGNED(16, float, Q_fract_allpass)[2][50][3][2] = {\n");
    write_float_4d_array(Q_fract_allpass, 2, 50, 3, 2);
    printf("};\n");
    printf("static const float phi_fract[2][50][2] = {\n");
    printf("static const DECLARE_ALIGNED(16, float, phi_fract)[2][50][2] = {\n");
    write_float_3d_array(phi_fract, 2, 50, 2);
    printf("};\n");

--- a/libavcodec/aacps_tablegen.h
+++ b/libavcodec/aacps_tablegen.h
@@ -31,6 +31,7 @@
 #else
 #include "libavutil/common.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/mem.h"
 #define NR_ALLPASS_BANDS20 30
 #define NR_ALLPASS_BANDS34 50
 #define PS_AP_LINKS 3
@@ -38,12 +39,12 @@ static float pd_re_smooth[8*8*8];
 static float pd_im_smooth[8*8*8];
 static float HA[46][8][4];
 static float HB[46][8][4];
 static float f20_0_8 [ 8][7][2];
 static float f34_0_12[12][7][2];
 static float f34_1_8 [ 8][7][2];
 static float f34_2_4 [ 4][7][2];
 static float Q_fract_allpass[2][50][3][2];
 static float phi_fract[2][50][2];
 static DECLARE_ALIGNED(16, float, f20_0_8) [ 8][8][2];
 static DECLARE_ALIGNED(16, float, f34_0_12)[12][8][2];
 static DECLARE_ALIGNED(16, float, f34_1_8) [ 8][8][2];
 static DECLARE_ALIGNED(16, float, f34_2_4) [ 4][8][2];
 static DECLARE_ALIGNED(16, float, Q_fract_allpass)[2][50][3][2];
 static DECLARE_ALIGNED(16, float, phi_fract)[2][50][2];

 static const float g0_Q8[] = {
    0.00746082949812f, 0.02270420949825f, 0.04546865930473f, 0.07266113929591f,
@@ -65,7 +66,7 @@ static const float g2_Q4[] = {
     0.16486303567403f,  0.23279856662996f, 0.25f
 };

 static void make_filters_from_proto(float (*filter)[7][2], const float *proto, int bands)
 static void make_filters_from_proto(float (*filter)[8][2], const float *proto, int bands)
 {
    int q, n;
    for (q = 0; q < bands; q++) {
--- a/libavcodec/aacpsdsp.c
+++ b/libavcodec/aacpsdsp.c
@@ -0,0 +1,214 @@
 /*
 * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
 *
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

 #include "config.h"
 #include "libavutil/attributes.h"
 #include "aacpsdsp.h"

 static void ps_add_squares_c(float *dst, const float (*src)[2], int n)
 {
    int i;
    for (i = 0; i < n; i++)
        dst[i] += src[i][0] * src[i][0] + src[i][1] * src[i][1];
 }

 static void ps_mul_pair_single_c(float (*dst)[2], float (*src0)[2], float *src1,
                                 int n)
 {
    int i;
    for (i = 0; i < n; i++) {
        dst[i][0] = src0[i][0] * src1[i];
        dst[i][1] = src0[i][1] * src1[i];
    }
 }

 static void ps_hybrid_analysis_c(float (*out)[2], float (*in)[2],
                                 const float (*filter)[8][2],
                                 int stride, int n)
 {
    int i, j;

    for (i = 0; i < n; i++) {
        float sum_re = filter[i][6][0] * in[6][0];
        float sum_im = filter[i][6][0] * in[6][1];

        for (j = 0; j < 6; j++) {
            float in0_re = in[j][0];
            float in0_im = in[j][1];
            float in1_re = in[12-j][0];
            float in1_im = in[12-j][1];
            sum_re += filter[i][j][0] * (in0_re + in1_re) -
                      filter[i][j][1] * (in0_im - in1_im);
            sum_im += filter[i][j][0] * (in0_im + in1_im) +
                      filter[i][j][1] * (in0_re - in1_re);
        }
        out[i * stride][0] = sum_re;
        out[i * stride][1] = sum_im;
    }
 }

 static void ps_hybrid_analysis_ileave_c(float (*out)[32][2], float L[2][38][64],
                                        int i, int len)
 {
    int j;

    for (; i < 64; i++) {
        for (j = 0; j < len; j++) {
            out[i][j][0] = L[0][j][i];
            out[i][j][1] = L[1][j][i];
        }
    }
 }

 static void ps_hybrid_synthesis_deint_c(float out[2][38][64],
                                        float (*in)[32][2],
                                        int i, int len)
 {
    int n;

    for (; i < 64; i++) {
        for (n = 0; n < len; n++) {
            out[0][n][i] = in[i][n][0];
            out[1][n][i] = in[i][n][1];
        }
    }
 }

 static void ps_decorrelate_c(float (*out)[2], float (*delay)[2],
                             float (*ap_delay)[PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2],
                             const float phi_fract[2], float (*Q_fract)[2],
                             const float *transient_gain,
                             float g_decay_slope,
                             int len)
 {
    static const float a[] = { 0.65143905753106f,
                               0.56471812200776f,
                               0.48954165955695f };
    float ag[PS_AP_LINKS];
    int m, n;

    for (m = 0; m < PS_AP_LINKS; m++)
        ag[m] = a[m] * g_decay_slope;

    for (n = 0; n < len; n++) {
        float in_re = delay[n][0] * phi_fract[0] - delay[n][1] * phi_fract[1];
        float in_im = delay[n][0] * phi_fract[1] + delay[n][1] * phi_fract[0];
        for (m = 0; m < PS_AP_LINKS; m++) {
            float a_re                = ag[m] * in_re;
            float a_im                = ag[m] * in_im;
            float link_delay_re       = ap_delay[m][n+2-m][0];
            float link_delay_im       = ap_delay[m][n+2-m][1];
            float fractional_delay_re = Q_fract[m][0];
            float fractional_delay_im = Q_fract[m][1];
            float apd_re = in_re;
            float apd_im = in_im;
            in_re = link_delay_re * fractional_delay_re -
                    link_delay_im * fractional_delay_im - a_re;
            in_im = link_delay_re * fractional_delay_im +
                    link_delay_im * fractional_delay_re - a_im;
            ap_delay[m][n+5][0] = apd_re + ag[m] * in_re;
            ap_delay[m][n+5][1] = apd_im + ag[m] * in_im;
        }
        out[n][0] = transient_gain[n] * in_re;
        out[n][1] = transient_gain[n] * in_im;
    }
 }

 static void ps_stereo_interpolate_c(float (*l)[2], float (*r)[2],
                                    float h[2][4], float h_step[2][4],
                                    int len)
 {
    float h0 = h[0][0];
    float h1 = h[0][1];
    float h2 = h[0][2];
    float h3 = h[0][3];
    float hs0 = h_step[0][0];
    float hs1 = h_step[0][1];
    float hs2 = h_step[0][2];
    float hs3 = h_step[0][3];
    int n;

    for (n = 0; n < len; n++) {
        //l is s, r is d
        float l_re = l[n][0];
        float l_im = l[n][1];
        float r_re = r[n][0];
        float r_im = r[n][1];
        h0 += hs0;
        h1 += hs1;
        h2 += hs2;
        h3 += hs3;
        l[n][0] = h0 * l_re + h2 * r_re;
        l[n][1] = h0 * l_im + h2 * r_im;
        r[n][0] = h1 * l_re + h3 * r_re;
        r[n][1] = h1 * l_im + h3 * r_im;
    }
 }

 static void ps_stereo_interpolate_ipdopd_c(float (*l)[2], float (*r)[2],
                                           float h[2][4], float h_step[2][4],
                                           int len)
 {
    float h00  = h[0][0],      h10  = h[1][0];
    float h01  = h[0][1],      h11  = h[1][1];
    float h02  = h[0][2],      h12  = h[1][2];
    float h03  = h[0][3],      h13  = h[1][3];
    float hs00 = h_step[0][0], hs10 = h_step[1][0];
    float hs01 = h_step[0][1], hs11 = h_step[1][1];
    float hs02 = h_step[0][2], hs12 = h_step[1][2];
    float hs03 = h_step[0][3], hs13 = h_step[1][3];
    int n;

    for (n = 0; n < len; n++) {
        //l is s, r is d
        float l_re = l[n][0];
        float l_im = l[n][1];
        float r_re = r[n][0];
        float r_im = r[n][1];
        h00 += hs00;
        h01 += hs01;
        h02 += hs02;
        h03 += hs03;
        h10 += hs10;
        h11 += hs11;
        h12 += hs12;
        h13 += hs13;

        l[n][0] = h00 * l_re + h02 * r_re - h10 * l_im - h12 * r_im;
        l[n][1] = h00 * l_im + h02 * r_im + h10 * l_re + h12 * r_re;
        r[n][0] = h01 * l_re + h03 * r_re - h11 * l_im - h13 * r_im;
        r[n][1] = h01 * l_im + h03 * r_im + h11 * l_re + h13 * r_re;
    }
 }

 av_cold void ff_psdsp_init(PSDSPContext *s)
 {
    s->add_squares            = ps_add_squares_c;
    s->mul_pair_single        = ps_mul_pair_single_c;
    s->hybrid_analysis        = ps_hybrid_analysis_c;
    s->hybrid_analysis_ileave = ps_hybrid_analysis_ileave_c;
    s->hybrid_synthesis_deint = ps_hybrid_synthesis_deint_c;
    s->decorrelate            = ps_decorrelate_c;
    s->stereo_interpolate[0]  = ps_stereo_interpolate_c;
    s->stereo_interpolate[1]  = ps_stereo_interpolate_ipdopd_c;

    if (ARCH_ARM)
        ff_psdsp_init_arm(s);
 }
--- a/libavcodec/aacpsdsp.h
+++ b/libavcodec/aacpsdsp.h
@@ -0,0 +1,53 @@
 /*
 * Copyright (c) 2012 Mans Rullgard
 *
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

 #ifndef LIBAVCODEC_AACPSDSP_H
 #define LIBAVCODEC_AACPSDSP_H

 #define PS_QMF_TIME_SLOTS 32
 #define PS_AP_LINKS 3
 #define PS_MAX_AP_DELAY 5

 typedef struct PSDSPContext {
    void (*add_squares)(float *dst, const float (*src)[2], int n);
    void (*mul_pair_single)(float (*dst)[2], float (*src0)[2], float *src1,
                            int n);
    void (*hybrid_analysis)(float (*out)[2], float (*in)[2],
                            const float (*filter)[8][2],
                            int stride, int n);
    void (*hybrid_analysis_ileave)(float (*out)[32][2], float L[2][38][64],
                                   int i, int len);
    void (*hybrid_synthesis_deint)(float out[2][38][64], float (*in)[32][2],
                                   int i, int len);
    void (*decorrelate)(float (*out)[2], float (*delay)[2],
                        float (*ap_delay)[PS_QMF_TIME_SLOTS+PS_MAX_AP_DELAY][2],
                        const float phi_fract[2], float (*Q_fract)[2],
                        const float *transient_gain,
                        float g_decay_slope,
                        int len);
    void (*stereo_interpolate[2])(float (*l)[2], float (*r)[2],
                                  float h[2][4], float h_step[2][4],
                                  int len);
 } PSDSPContext;

 void ff_psdsp_init(PSDSPContext *s);
 void ff_psdsp_init_arm(PSDSPContext *s);

 #endif /* LIBAVCODEC_AACPSDSP_H */
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -1,7 +1,8 @@
 OBJS-$(CONFIG_AC3DSP)                  += arm/ac3dsp_init_arm.o         \
                                          arm/ac3dsp_arm.o

 OBJS-$(CONFIG_AAC_DECODER)             += arm/sbrdsp_init_arm.o
 OBJS-$(CONFIG_AAC_DECODER)             += arm/sbrdsp_init_arm.o         \
                                          arm/aacpsdsp_init_arm.o

 OBJS-$(CONFIG_DCA_DECODER)             += arm/dcadsp_init_arm.o         \

@@ -59,7 +60,8 @@ NEON-OBJS-$(CONFIG_H264PRED)           += arm/h264pred_neon.o           \

 NEON-OBJS-$(CONFIG_AC3DSP)             += arm/ac3dsp_neon.o

 NEON-OBJS-$(CONFIG_AAC_DECODER)        += arm/sbrdsp_neon.o
 NEON-OBJS-$(CONFIG_AAC_DECODER)        += arm/sbrdsp_neon.o             \
                                          arm/aacpsdsp_neon.o

 NEON-OBJS-$(CONFIG_DCA_DECODER)        += arm/dcadsp_neon.o             \
                                          arm/synth_filter_neon.o       \
--- a/libavcodec/arm/aacpsdsp_init_arm.c
+++ b/libavcodec/arm/aacpsdsp_init_arm.c
@@ -0,0 +1,57 @@
 /*
 * Copyright (c) 2012 Mans Rullgard
 *
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

 #include "config.h"

 #include "libavutil/arm/cpu.h"
 #include "libavutil/attributes.h"
 #include "libavcodec/aacpsdsp.h"

 void ff_ps_add_squares_neon(float *dst, const float (*src)[2], int n);
 void ff_ps_mul_pair_single_neon(float (*dst)[2], float (*src0)[2],
                                float *src1, int n);
 void ff_ps_hybrid_analysis_neon(float (*out)[2], float (*in)[2],
                                const float (*filter)[8][2],
                                int stride, int n);
 void ff_ps_hybrid_analysis_ileave_neon(float (*out)[32][2], float L[2][38][64],
                                       int i, int len);
 void ff_ps_hybrid_synthesis_deint_neon(float out[2][38][64], float (*in)[32][2],
                                       int i, int len);
 void ff_ps_decorrelate_neon(float (*out)[2], float (*delay)[2],
                            float (*ap_delay)[PS_QMF_TIME_SLOTS+PS_MAX_AP_DELAY][2],
                            const float phi_fract[2], float (*Q_fract)[2],
                            const float *transient_gain, float g_decay_slope,
                            int len);
 void ff_ps_stereo_interpolate_neon(float (*l)[2], float (*r)[2],
                                   float h[2][4], float h_step[2][4],
                                   int len);

 av_cold void ff_psdsp_init_arm(PSDSPContext *s)
 {
    int cpu_flags = av_get_cpu_flags();

    if (have_neon(cpu_flags)) {
        s->add_squares            = ff_ps_add_squares_neon;
        s->mul_pair_single        = ff_ps_mul_pair_single_neon;
        s->hybrid_synthesis_deint = ff_ps_hybrid_synthesis_deint_neon;
        s->hybrid_analysis        = ff_ps_hybrid_analysis_neon;
        s->stereo_interpolate[0]  = ff_ps_stereo_interpolate_neon;
    }
 }
--- a/libavcodec/arm/aacpsdsp_neon.S
+++ b/libavcodec/arm/aacpsdsp_neon.S
@@ -0,0 +1,272 @@
 /*
 * Copyright (c) 2012 Mans Rullgard
 *
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

 #include "asm.S"

 function ff_ps_add_squares_neon, export=1
        mov             r3,  r0
        sub             r2,  r2,  #4
        vld1.32         {q0},     [r1,:128]!
        vmul.f32        q0,  q0,  q0
        vld1.32         {q2},     [r1,:128]!
        vmul.f32        q2,  q2,  q2
        vld1.32         {q1},     [r0,:128]!
 1:
        vpadd.f32       d6,  d0,  d1
        vld1.32         {q0},     [r1,:128]!
        vpadd.f32       d7,  d4,  d5
        vmul.f32        q0,  q0,  q0
        vld1.32         {q2},     [r1,:128]!
        vadd.f32        q3,  q1,  q3
        vld1.32         {q1},     [r0,:128]!
        vmul.f32        q2,  q2,  q2
        vst1.32         {q3},     [r3,:128]!
        subs            r2,  r2,  #4
        bgt             1b
        vpadd.f32       d6,  d0,  d1
        vpadd.f32       d7,  d4,  d5
        vadd.f32        q1,  q1,  q3
        vst1.32         {q1},     [r3,:128]!
        bx              lr
 endfunc

 function ff_ps_mul_pair_single_neon, export=1
        sub             r3,  r3,  #4
        tst             r1,  #8
        bne             2f
        vld1.32         {q0},     [r1,:128]!
 1:
        vld1.32         {q3},     [r2,:128]!
        vmul.f32        d4,  d0,  d6[0]
        vmul.f32        d5,  d1,  d6[1]
        vld1.32         {q1},     [r1,:128]!
        vmul.f32        d6,  d2,  d7[0]
        vmul.f32        d7,  d3,  d7[1]
        vld1.32         {q0},     [r1,:128]!
        vst1.32         {q2,q3},  [r0,:128]!
        subs            r3,  r3,  #4
        bgt             1b
        vld1.32         {q3},     [r2,:128]!
        vmul.f32        d4,  d0,  d6[0]
        vmul.f32        d5,  d1,  d6[1]
        vld1.32         {q1},     [r1,:128]!
        vmul.f32        d6,  d2,  d7[0]
        vmul.f32        d7,  d3,  d7[1]
        vst1.32         {q2,q3},  [r0,:128]!
        bx              lr
 2:
        vld1.32         {d0},     [r1,:64]!
        vld1.32         {d1,d2},  [r1,:128]!
 1:
        vld1.32         {q3},     [r2,:128]!
        vmul.f32        d4,  d0,  d6[0]
        vmul.f32        d5,  d1,  d6[1]
        vld1.32         {d0,d1},  [r1,:128]!
        vmul.f32        d6,  d2,  d7[0]
        vmul.f32        d7,  d0,  d7[1]
        vmov            d0,  d1
        vld1.32         {d1,d2},  [r1,:128]!
        vst1.32         {q2,q3},  [r0,:128]!
        subs            r3,  r3,  #4
        bgt             1b
        vld1.32         {q3},     [r2,:128]!
        vmul.f32        d4,  d0,  d6[0]
        vmul.f32        d5,  d1,  d6[1]
        vld1.32         {d0},     [r1,:64]!
        vmul.f32        d6,  d2,  d7[0]
        vmul.f32        d7,  d0,  d7[1]
        vst1.32         {q2,q3},  [r0,:128]!
        bx              lr
 endfunc

 function ff_ps_hybrid_synthesis_deint_neon, export=1
        push            {r4-r8,lr}
        add             r0,  r0,  r2,  lsl #2
        add             r1,  r1,  r2,  lsl #5+1+2
        rsb             r2,  r2,  #64
        mov             r5,  #64*4
        mov             lr,  r0
        add             r4,  r0,  #38*64*4
        mov             r12, r3
 2:
        vld1.32         {d0,d1},  [r1,:128]!
        vst1.32         {d0[0]},  [lr,:32], r5
        vst1.32         {d0[1]},  [r4,:32], r5
        vst1.32         {d1[0]},  [lr,:32], r5
        vst1.32         {d1[1]},  [r4,:32], r5
        subs            r12, r12, #2
        bgt             2b
        add             r0,  r0,  #4
        sub             r2,  r2,  #1
        tst             r2,  #2
        bne             6f
 1:
        mov             lr,  r0
        add             r4,  r0,  #38*64*4
        add             r6,  r1,  #  32*2*4
        add             r7,  r1,  #2*32*2*4
        add             r8,  r1,  #3*32*2*4
        mov             r12, r3
 2:
        vld1.32         {d0,d1},  [r1,:128]!
        vld1.32         {d2,d3},  [r6,:128]!
        vld1.32         {d4,d5},  [r7,:128]!
        vld1.32         {d6,d7},  [r8,:128]!
        vst4.32         {d0[0],d2[0],d4[0],d6[0]}, [lr,:128], r5
        vst4.32         {d0[1],d2[1],d4[1],d6[1]}, [r4,:128], r5
        vst4.32         {d1[0],d3[0],d5[0],d7[0]}, [lr,:128], r5
        vst4.32         {d1[1],d3[1],d5[1],d7[1]}, [r4,:128], r5
        subs            r12, r12, #2
        bgt             2b
        add             r0,  r0,  #16
        add             r1,  r1,  #3*32*2*4
        subs            r2,  r2,  #4
        bgt             1b
        pop             {r4-r8,pc}
 6:
        mov             lr,  r0
        add             r4,  r0,  #38*64*4
        add             r6,  r1,  #32*2*4
        mov             r12, r3
 2:
        vld1.32         {d0,d1},  [r1,:128]!
        vld1.32         {d2,d3},  [r6,:128]!
        vst2.32         {d0[0],d2[0]}, [lr,:64], r5
        vst2.32         {d0[1],d2[1]}, [r4,:64], r5
        vst2.32         {d1[0],d3[0]}, [lr,:64], r5
        vst2.32         {d1[1],d3[1]}, [r4,:64], r5
        subs            r12, r12, #2
        bgt             2b
        add             r0,  r0,  #8
        add             r1,  r1,  #32*2*4
        sub             r2,  r2,  #2
        b               1b
 endfunc

 function ff_ps_hybrid_analysis_neon, export=1
        vldm            r1,  {d19-d31}
        ldr             r12, [sp]
        lsl             r3,  r3,  #3
        vadd.f32        d16, d19, d31
        vadd.f32        d17, d20, d30
        vsub.f32        d18, d19, d31
        vsub.f32        d19, d20, d30
        vsub.f32        d0,  d21, d29
        vsub.f32        d1,  d22, d28
        vadd.f32        d2,  d21, d29
        vadd.f32        d3,  d22, d28
        vadd.f32        d20, d23, d27
        vadd.f32        d21, d24, d26
        vsub.f32        d22, d23, d27
        vsub.f32        d23, d24, d26
        vmov.i32        d6,  #1<<31
        vmov.i32        d7,  #0
        vmov.f32        q14, #0.0
        vmov.f32        q15, #0.0
        vtrn.32         d6,  d7
        vrev64.32       q9,  q9
        vrev64.32       q0,  q0
        vrev64.32       q11, q11
        veor            q9,  q9,  q3
        veor            q0,  q0,  q3
        veor            q11, q11, q3
        vld1.32         {q13},    [r2,:128]!
        vtrn.32         q8,  q9
        vtrn.32         q1,  q0
        vtrn.32         q10, q11
        sub             r12, r12, #1
        vmla.f32        q14, q8,  q13
        vld1.32         {q2},     [r2,:128]!
        vmla.f32        q15, q9,  q13
 1:
        vmla.f32        q14, q1,  q2
        vld1.32         {q13},    [r2,:128]!
        vmla.f32        q15, q0,  q2
        vmla.f32        q14, q10, q13
        vld1.32         {q2},     [r2,:128]!
        vmla.f32        q15, q11, q13
        vld1.32         {q13},    [r2,:128]!
        vadd.f32        d6,  d28, d29
        vadd.f32        d7,  d30, d31
        vmov.f32        q14, #0.0
        vmov.f32        q15, #0.0
        vmla.f32        q14, q8,  q13
        vpadd.f32       d6,  d6,  d7
        vmla.f32        q15, q9,  q13
        vmla.f32        d6,  d25, d4[0]
        vld1.32         {q2},     [r2,:128]!
        vst1.32         {d6},     [r0,:64], r3
        subs            r12, r12, #1
        bgt             1b
        vmla.f32        q14, q1,  q2
        vld1.32         {q13},    [r2,:128]!
        vmla.f32        q15, q0,  q2
        vmla.f32        q14, q10, q13
        vld1.32         {q2},     [r2,:128]!
        vmla.f32        q15, q11, q13
        vadd.f32        d6,  d28, d29
        vadd.f32        d7,  d30, d31
        vpadd.f32       d6,  d6,  d7
        vmla.f32        d6,  d25, d4[0]
        vst1.32         {d6},     [r0,:64], r3
        bx              lr
 endfunc

 function ff_ps_stereo_interpolate_neon, export=1
        vld1.32         {q0},     [r2]
        vld1.32         {q14},    [r3]
        vadd.f32        q15, q14, q14
        mov             r2,  r0
        mov             r3,  r1
        ldr             r12, [sp]
        vadd.f32        q1,  q0,  q14
        vadd.f32        q0,  q0,  q15
        vld1.32         {q2},     [r0,:64]!
        vld1.32         {q3},     [r1,:64]!
        subs            r12, r12, #1
        beq             2f
 1:
        vmul.f32        d16, d4,  d2[0]
        vmul.f32        d17, d5,  d0[0]
        vmul.f32        d18, d4,  d2[1]
        vmul.f32        d19, d5,  d0[1]
        vmla.f32        d16, d6,  d3[0]
        vmla.f32        d17, d7,  d1[0]
        vmla.f32        d18, d6,  d3[1]
        vmla.f32        d19, d7,  d1[1]
        vadd.f32        q1,  q1,  q15
        vadd.f32        q0,  q0,  q15
        vld1.32         {q2},     [r0,:64]!
        vld1.32         {q3},     [r1,:64]!
        vst1.32         {q8},     [r2,:64]!
        vst1.32         {q9},     [r3,:64]!
        subs            r12, r12, #2
        bgt             1b
        it              lt
        bxlt            lr
 2:
        vmul.f32        d16, d4,  d2[0]
        vmul.f32        d18, d4,  d2[1]
        vmla.f32        d16, d6,  d3[0]
        vmla.f32        d18, d6,  d3[1]
        vst1.32         {d16},    [r2,:64]!
        vst1.32         {d18},    [r3,:64]!
        bx              lr
 endfunc
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2722,7 +2722,7 @@ typedef struct AVCodecContext {

    /**
     * Set by the client if its custom get_buffer() callback can be called
     * from another thread, which allows faster multithreaded decoding.
     * synchronously from another thread, which allows faster multithreaded decoding.
     * draw_horiz_band() will be called from other threads regardless of this setting.
     * Ignored if the default get_buffer() is used.
     * - encoding: Set by user.
--- a/libavcodec/dfa.c
+++ b/libavcodec/dfa.c
@@ -49,7 +49,7 @@ static int decode_copy(GetByteContext *gb, uint8_t *frame, int width, int height
    const int size = width * height;

    if (bytestream2_get_buffer(gb, frame, size) != size)
        return -1;
        return AVERROR_INVALIDDATA;
    return 0;
 }

@@ -66,23 +66,23 @@ static int decode_tsw1(GetByteContext *gb, uint8_t *frame, int width, int height
    if (segments == 0 && offset == frame_end - frame)
        return 0; // skip frame
    if (frame_end - frame <= offset)
        return -1;
        return AVERROR_INVALIDDATA;
    frame += offset;
    while (segments--) {
        if (bytestream2_get_bytes_left(gb) < 2)
            return -1;
            return AVERROR_INVALIDDATA;
        if (mask == 0x10000) {
            bitbuf = bytestream2_get_le16u(gb);
            mask = 1;
        }
        if (frame_end - frame < 2)
            return -1;
            return AVERROR_INVALIDDATA;
        if (bitbuf & mask) {
            v = bytestream2_get_le16(gb);
            offset = (v & 0x1FFF) << 1;
            count = ((v >> 13) + 2) << 1;
            if (frame - frame_start < offset || frame_end - frame < count)
                return -1;
                return AVERROR_INVALIDDATA;
            av_memcpy_backptr(frame, offset, count);
            frame += count;
        } else {
@@ -105,19 +105,19 @@ static int decode_dsw1(GetByteContext *gb, uint8_t *frame, int width, int height
    segments = bytestream2_get_le16(gb);
    while (segments--) {
        if (bytestream2_get_bytes_left(gb) < 2)
            return -1;
            return AVERROR_INVALIDDATA;
        if (mask == 0x10000) {
            bitbuf = bytestream2_get_le16u(gb);
            mask = 1;
        }
        if (frame_end - frame < 2)
            return -1;
            return AVERROR_INVALIDDATA;
        if (bitbuf & mask) {
            v = bytestream2_get_le16(gb);
            offset = (v & 0x1FFF) << 1;
            count = ((v >> 13) + 2) << 1;
            if (frame - frame_start < offset || frame_end - frame < count)
                return -1;
                return AVERROR_INVALIDDATA;
            // can't use av_memcpy_backptr() since it can overwrite following pixels
            for (v = 0; v < count; v++)
                frame[v] = frame[v - offset];
@@ -144,19 +144,19 @@ static int decode_dds1(GetByteContext *gb, uint8_t *frame, int width, int height
    segments = bytestream2_get_le16(gb);
    while (segments--) {
        if (bytestream2_get_bytes_left(gb) < 2)
            return -1;
            return AVERROR_INVALIDDATA;
        if (mask == 0x10000) {
            bitbuf = bytestream2_get_le16u(gb);
            mask = 1;
        }
        if (frame_end - frame < width + 2)
            return -1;
            return AVERROR_INVALIDDATA;
        if (bitbuf & mask) {
            v = bytestream2_get_le16(gb);
            offset = (v & 0x1FFF) << 2;
            count = ((v >> 13) + 2) << 1;
            if (frame - frame_start < offset || frame_end - frame < count*2 + width)
                return -1;
                return AVERROR_INVALIDDATA;
            for (i = 0; i < count; i++) {
                frame[0] = frame[1] =
                frame[width] = frame[width + 1] = frame[-offset];
@@ -166,6 +166,8 @@ static int decode_dds1(GetByteContext *gb, uint8_t *frame, int width, int height
        } else if (bitbuf & (mask << 1)) {
            frame += bytestream2_get_le16(gb) * 2;
        } else {
            if (frame_end - frame < width + 2)
                return AVERROR_INVALIDDATA;
            frame[0] = frame[1] =
            frame[width] = frame[width + 1] =  bytestream2_get_byte(gb);
            frame += 2;
@@ -186,32 +188,32 @@ static int decode_bdlt(GetByteContext *gb, uint8_t *frame, int width, int height

    count = bytestream2_get_le16(gb);
    if (count >= height)
        return -1;
        return AVERROR_INVALIDDATA;
    frame += width * count;
    lines = bytestream2_get_le16(gb);
    if (count + lines > height)
        return -1;
        return AVERROR_INVALIDDATA;

    while (lines--) {
        if (bytestream2_get_bytes_left(gb) < 1)
            return -1;
            return AVERROR_INVALIDDATA;
        line_ptr = frame;
        frame += width;
        segments = bytestream2_get_byteu(gb);
        while (segments--) {
            if (frame - line_ptr <= bytestream2_peek_byte(gb))
                return -1;
                return AVERROR_INVALIDDATA;
            line_ptr += bytestream2_get_byte(gb);
            count = (int8_t)bytestream2_get_byte(gb);
            if (count >= 0) {
                if (frame - line_ptr < count)
                    return -1;
                    return AVERROR_INVALIDDATA;
                if (bytestream2_get_buffer(gb, line_ptr, count) != count)
                    return -1;
                    return AVERROR_INVALIDDATA;
            } else {
                count = -count;
                if (frame - line_ptr < count)
                    return -1;
                    return AVERROR_INVALIDDATA;
                memset(line_ptr, bytestream2_get_byte(gb), count);
            }
            line_ptr += count;
@@ -226,20 +228,23 @@ static int decode_wdlt(GetByteContext *gb, uint8_t *frame, int width, int height
    const uint8_t *frame_end   = frame + width * height;
    uint8_t *line_ptr;
    int count, i, v, lines, segments;
    int y = 0;

    lines = bytestream2_get_le16(gb);
    if (lines > height)
        return -1;
        return AVERROR_INVALIDDATA;

    while (lines--) {
        if (bytestream2_get_bytes_left(gb) < 2)
            return -1;
            return AVERROR_INVALIDDATA;
        segments = bytestream2_get_le16u(gb);
        while ((segments & 0xC000) == 0xC000) {
            unsigned skip_lines = -(int16_t)segments;
            unsigned delta = -((int16_t)segments * width);
            if (frame_end - frame <= delta)
                return -1;
            if (frame_end - frame <= delta || y + lines + skip_lines > height)
                return AVERROR_INVALIDDATA;
            frame    += delta;
            y        += skip_lines;
            segments = bytestream2_get_le16(gb);
        }
        if (frame_end <= frame)
@@ -250,21 +255,22 @@ static int decode_wdlt(GetByteContext *gb, uint8_t *frame, int width, int height
        }
        line_ptr = frame;
        frame += width;
        y++;
        while (segments--) {
            if (frame - line_ptr <= bytestream2_peek_byte(gb))
                return -1;
                return AVERROR_INVALIDDATA;
            line_ptr += bytestream2_get_byte(gb);
            count = (int8_t)bytestream2_get_byte(gb);
            if (count >= 0) {
                if (frame - line_ptr < count * 2)
                    return -1;
                    return AVERROR_INVALIDDATA;
                if (bytestream2_get_buffer(gb, line_ptr, count * 2) != count * 2)
                    return -1;
                    return AVERROR_INVALIDDATA;
                line_ptr += count * 2;
            } else {
                count = -count;
                if (frame - line_ptr < count * 2)
                    return -1;
                    return AVERROR_INVALIDDATA;
                v = bytestream2_get_le16(gb);
                for (i = 0; i < count; i++)
                    bytestream_put_le16(&line_ptr, v);
@@ -277,7 +283,7 @@ static int decode_wdlt(GetByteContext *gb, uint8_t *frame, int width, int height

 static int decode_unk6(GetByteContext *gb, uint8_t *frame, int width, int height)
 {
    return -1;
    return AVERROR_PATCHWELCOME;
 }

 static int decode_blck(GetByteContext *gb, uint8_t *frame, int width, int height)
@@ -336,7 +342,7 @@ static int dfa_decode_frame(AVCodecContext *avctx,
            if (decoder[chunk_type - 2](&gb, s->frame_buf, avctx->width, avctx->height)) {
                av_log(avctx, AV_LOG_ERROR, "Error decoding %s chunk\n",
                       chunk_name[chunk_type - 2]);
                return -1;
                return AVERROR_INVALIDDATA;
            }
        } else {
            av_log(avctx, AV_LOG_WARNING, "Ignoring unknown chunk type %d\n",
--- a/libavcodec/dvdec.c
+++ b/libavcodec/dvdec.c
@@ -313,7 +313,7 @@ static int dvvideo_decode_frame(AVCodecContext *avctx,
                                 void *data, int *data_size,
                                 AVPacket *avpkt)
 {
    const uint8_t *buf = avpkt->data;
    uint8_t *buf = avpkt->data;
    int buf_size = avpkt->size;
    DVVideoContext *s = avctx->priv_data;
    const uint8_t* vsc_pack;
--- a/libavcodec/eatgv.c
+++ b/libavcodec/eatgv.c
@@ -138,7 +138,6 @@ static int unpack(const uint8_t *src, const uint8_t *src_end, unsigned char *dst
 * @return 0 on success, -1 on critical buffer underflow
 */
 static int tgv_decode_inter(TgvContext * s, const uint8_t *buf, const uint8_t *buf_end){
    unsigned last_frame_size = s->avctx->height*s->last_frame.linesize[0];
    int num_mvs;
    int num_blocks_raw;
    int num_blocks_packed;
@@ -158,7 +157,8 @@ static int tgv_decode_inter(TgvContext * s, const uint8_t *buf, const uint8_t *b
    buf += 12;

    if (vector_bits > MIN_CACHE_BITS || !vector_bits) {
        av_log(s->avctx, AV_LOG_ERROR, "vector_bits %d invalid\n", vector_bits);
        av_log(s->avctx, AV_LOG_ERROR,
               "Invalid value for motion vector bits: %d\n", vector_bits);
        return AVERROR_INVALIDDATA;
    }

@@ -212,14 +212,17 @@ static int tgv_decode_inter(TgvContext * s, const uint8_t *buf, const uint8_t *b
        int src_stride;

        if (vector < num_mvs) {
            unsigned offset =
                (y*4 + s->mv_codebook[vector][1])*s->last_frame.linesize[0] +
                 x*4 + s->mv_codebook[vector][0];
            int mx = x * 4 + s->mv_codebook[vector][0];
            int my = y * 4 + s->mv_codebook[vector][1];

            src_stride = s->last_frame.linesize[0];
            if (offset >= last_frame_size - (3*src_stride+3))
            if (   mx < 0 || mx + 4 > s->avctx->width
                || my < 0 || my + 4 > s->avctx->height) {
                av_log(s->avctx, AV_LOG_ERROR, "MV %d %d out of picture\n", mx, my);
                continue;
            src = s->last_frame.data[0] + offset;
            }

            src = s->last_frame.data[0] + mx + my * s->last_frame.linesize[0];
            src_stride = s->last_frame.linesize[0];
        }else{
            int offset = vector - num_mvs;
            if (offset<num_blocks_raw)
--- a/libavcodec/lagarith.c
+++ b/libavcodec/lagarith.c
@@ -467,7 +467,7 @@ static int lag_decode_frame(AVCodecContext *avctx,
    AVFrame *const p = &l->picture;
    uint8_t frametype = 0;
    uint32_t offset_gu = 0, offset_bv = 0, offset_ry = 9;
    int offs[4];
    uint32_t offs[4];
    uint8_t *srcs[4], *dst;
    int i, j, planes = 3;

--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -22,6 +22,7 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

 #include "libavutil/attributes.h"
 #include "avcodec.h"
 #include "internal.h"
 #include "get_bits.h"
@@ -1045,7 +1046,7 @@ static int decode_frame(WmallDecodeCtx *s)
    /* no idea what these are for, might be the number of samples
       that need to be skipped at the beginning or end of a stream */
    if (get_bits1(gb)) {
        int skip;
        int av_unused skip;

        /* usually true for the first frame */
        if (get_bits1(gb)) {
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -1,15 +1,14 @@
 include $(SUBDIR)../config.mak

 NAME = avfilter
 FFLIBS = avutil
 FFLIBS = avutil swscale

 FFLIBS-$(CONFIG_ACONVERT_FILTER)             += swresample
 FFLIBS-$(CONFIG_AMOVIE_FILTER)               += avformat avcodec
 FFLIBS-$(CONFIG_ARESAMPLE_FILTER)            += swresample
 FFLIBS-$(CONFIG_MOVIE_FILTER)                += avformat avcodec
 FFLIBS-$(CONFIG_PAN_FILTER)                  += swresample
 FFLIBS-$(CONFIG_REMOVELOGO_FILTER)           += swscale avformat avcodec
 FFLIBS-$(CONFIG_SCALE_FILTER)                += swscale
 FFLIBS-$(CONFIG_REMOVELOGO_FILTER)           += avformat avcodec
 FFLIBS-$(CONFIG_MP_FILTER)                   += avcodec postproc

 HEADERS = asrc_abuffer.h avcodec.h avfilter.h avfiltergraph.h buffersink.h buffersrc.h version.h vsrc_buffer.h
@@ -24,6 +23,7 @@ OBJS = allfilters.o                                                     \
       graphparser.o                                                    \
       src_buffer.o                                                     \
       transform.o                                                      \
       vf_scale.o                                                       \

 OBJS-$(CONFIG_AVCODEC)                       += avcodec.o
 OBJS-$(CONFIG_AVFORMAT)                      += lavfutils.o
@@ -83,7 +83,6 @@ OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o
 OBJS-$(CONFIG_PAD_FILTER)                    += vf_pad.o
 OBJS-$(CONFIG_PIXDESCTEST_FILTER)            += vf_pixdesctest.o
 OBJS-$(CONFIG_REMOVELOGO_FILTER)             += bbox.o lswsutils.o lavfutils.o vf_removelogo.o
 OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o
 OBJS-$(CONFIG_SELECT_FILTER)                 += vf_select.o
 OBJS-$(CONFIG_SETDAR_FILTER)                 += vf_aspect.o
 OBJS-$(CONFIG_SETFIELD_FILTER)               += vf_setfield.o
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -89,7 +89,6 @@ void avfilter_register_all(void)
    REGISTER_FILTER (PAD,         pad,         vf);
    REGISTER_FILTER (PIXDESCTEST, pixdesctest, vf);
    REGISTER_FILTER (REMOVELOGO,  removelogo,  vf);
    REGISTER_FILTER (SCALE,       scale,       vf);
    REGISTER_FILTER (SELECT,      select,      vf);
    REGISTER_FILTER (SETDAR,      setdar,      vf);
    REGISTER_FILTER (SETFIELD,    setfield,    vf);
@@ -123,9 +122,14 @@ void avfilter_register_all(void)
    REGISTER_FILTER (BUFFERSINK,  buffersink,  vsink);
    REGISTER_FILTER (NULLSINK,    nullsink,    vsink);

    /* vsrc_buffer is a part of public API => registered unconditionally */
    /* those filters are part of public or internal API => registered
     * unconditionally */
    {
        extern AVFilter avfilter_vsrc_buffer;
        avfilter_register(&avfilter_vsrc_buffer);
    }
    {
        extern AVFilter avfilter_vf_scale;
        avfilter_register(&avfilter_vf_scale);
    }
 }
--- a/libavfilter/formats.c
+++ b/libavfilter/formats.c
@@ -49,7 +49,7 @@ AVFilterFormats *avfilter_merge_formats(AVFilterFormats *a, AVFilterFormats *b)

    if (a == b) return a;

    ret = av_mallocz(sizeof(AVFilterFormats));
    ret = av_mallocz(sizeof(*ret));

    /* merge list of formats */
    ret->formats = av_malloc(sizeof(*ret->formats) * FFMIN(a->format_count,
@@ -74,7 +74,7 @@ AVFilterFormats *avfilter_merge_formats(AVFilterFormats *a, AVFilterFormats *b)
        return NULL;
    }

    ret->refs = av_malloc(sizeof(AVFilterFormats**)*(a->refcount+b->refcount));
    ret->refs = av_malloc(sizeof(*ret->refs) * (a->refcount + b->refcount));

    merge_ref(ret, a);
    merge_ref(ret, b);
@@ -158,7 +158,7 @@ int avfilter_add_format(AVFilterFormats **avff, int64_t fmt)
 {
    int64_t *fmts;

    if (!(*avff) && !(*avff = av_mallocz(sizeof(AVFilterFormats))))
    if (!(*avff) && !(*avff = av_mallocz(sizeof(**avff))))
        return AVERROR(ENOMEM);

    fmts = av_realloc((*avff)->formats,
@@ -217,7 +217,7 @@ AVFilterFormats *avfilter_make_all_packing_formats(void)
 void avfilter_formats_ref(AVFilterFormats *f, AVFilterFormats **ref)
 {
    *ref = f;
    f->refs = av_realloc(f->refs, sizeof(AVFilterFormats**) * ++f->refcount);
    f->refs = av_realloc(f->refs, sizeof(*f->refs) * ++f->refcount);
    f->refs[f->refcount-1] = ref;
 }

@@ -239,9 +239,9 @@ void avfilter_formats_unref(AVFilterFormats **ref)

    idx = find_ref_index(ref);

    if (idx >= 0)
        memmove((*ref)->refs + idx, (*ref)->refs + idx+1,
            sizeof(AVFilterFormats**) * ((*ref)->refcount-idx-1));
    if(idx >= 0)
        memmove((*ref)->refs + idx, (*ref)->refs + idx + 1,
            sizeof(*(*ref)->refs) * ((*ref)->refcount - idx - 1));

    if (!--(*ref)->refcount) {
        av_free((*ref)->formats);
--- a/libavfilter/vf_split.c
+++ b/libavfilter/vf_split.c
@@ -25,24 +25,67 @@

 #include "avfilter.h"

 static int split_init(AVFilterContext *ctx, const char *args, void *opaque)
 {
    int i, nb_outputs = 2;

    if (args) {
        nb_outputs = strtol(args, NULL, 0);
        if (nb_outputs <= 0) {
            av_log(ctx, AV_LOG_ERROR, "Invalid number of outputs specified: %d.\n",
                   nb_outputs);
            return AVERROR(EINVAL);
        }
    }

    for (i = 0; i < nb_outputs; i++) {
        char name[32];
        AVFilterPad pad = { 0 };

        snprintf(name, sizeof(name), "output%d", i);
        pad.type = AVMEDIA_TYPE_VIDEO;
        pad.name = av_strdup(name);

        avfilter_insert_outpad(ctx, i, &pad);
    }

    return 0;
 }

 static void split_uninit(AVFilterContext *ctx)
 {
    int i;

    for (i = 0; i < ctx->output_count; i++)
        av_freep(&ctx->output_pads[i].name);
 }

 static void start_frame(AVFilterLink *inlink, AVFilterBufferRef *picref)
 {
    avfilter_start_frame(inlink->dst->outputs[0],
                         avfilter_ref_buffer(picref, ~AV_PERM_WRITE));
    avfilter_start_frame(inlink->dst->outputs[1],
                         avfilter_ref_buffer(picref, ~AV_PERM_WRITE));
    AVFilterContext *ctx = inlink->dst;
    int i;

    for (i = 0; i < ctx->output_count; i++)
        avfilter_start_frame(ctx->outputs[i],
                             avfilter_ref_buffer(picref, ~AV_PERM_WRITE));
 }

 static void draw_slice(AVFilterLink *inlink, int y, int h, int slice_dir)
 {
    avfilter_draw_slice(inlink->dst->outputs[0], y, h, slice_dir);
    avfilter_draw_slice(inlink->dst->outputs[1], y, h, slice_dir);
    AVFilterContext *ctx = inlink->dst;
    int i;

    for (i = 0; i < ctx->output_count; i++)
        avfilter_draw_slice(ctx->outputs[i], y, h, slice_dir);
 }

 static void end_frame(AVFilterLink *inlink)
 {
    avfilter_end_frame(inlink->dst->outputs[0]);
    avfilter_end_frame(inlink->dst->outputs[1]);
    AVFilterContext *ctx = inlink->dst;
    int i;

    for (i = 0; i < ctx->output_count; i++)
        avfilter_end_frame(ctx->outputs[i]);

    avfilter_unref_buffer(inlink->cur_buf);
 }
@@ -51,6 +94,9 @@ AVFilter avfilter_vf_split = {
    .name      = "split",
    .description = NULL_IF_CONFIG_SMALL("Pass on the input to two outputs."),

    .init   = split_init,
    .uninit = split_uninit,

    .inputs    = (const AVFilterPad[]) {{ .name      = "default",
                                    .type            = AVMEDIA_TYPE_VIDEO,
                                    .get_video_buffer= avfilter_null_get_video_buffer,
@@ -58,9 +104,5 @@ AVFilter avfilter_vf_split = {
                                    .draw_slice      = draw_slice,
                                    .end_frame       = end_frame, },
                                  { .name = NULL}},
    .outputs   = (const AVFilterPad[]) {{ .name      = "output1",
                                    .type            = AVMEDIA_TYPE_VIDEO, },
                                  { .name            = "output2",
                                    .type            = AVMEDIA_TYPE_VIDEO, },
                                  { .name = NULL}},
    .outputs   = (AVFilterPad[]) {{ .name = NULL}},
 };
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -25,6 +25,7 @@
 //#define DEBUG
 //#define MOV_EXPORT_ALL_METADATA

 #include "libavutil/attributes.h"
 #include "libavutil/audioconvert.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/intfloat.h"
@@ -627,8 +628,9 @@ static int mov_read_dec3(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 static int mov_read_chan(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
    AVStream *st;
    uint8_t version;
    uint32_t flags, layout_tag, bitmap, num_descr, label_mask;
    uint8_t av_unused version;
    uint32_t av_unused flags;
    uint32_t layout_tag, bitmap, num_descr, label_mask;
    int i;

    if (c->fc->nb_streams < 1)
--- a/libavformat/mpegenc.c
+++ b/libavformat/mpegenc.c
@@ -835,6 +835,12 @@ static int flush_packet(AVFormatContext *ctx, int stream_index,

        if (stuffing_size < 0)
            stuffing_size = 0;

        if (startcode == PRIVATE_STREAM_1 && id >= 0xa0) {
            if (payload_size < av_fifo_size(stream->fifo))
                stuffing_size += payload_size % stream->lpcm_align;
        }

        if (stuffing_size > 16) {    /*<=16 for MPEG-1, <=32 for MPEG-2*/
            pad_packet_bytes += stuffing_size;
            packet_size      -= stuffing_size;
--- a/libavformat/nsvdec.c
+++ b/libavformat/nsvdec.c
@@ -19,6 +19,7 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

 #include "libavutil/attributes.h"
 #include "libavutil/mathematics.h"
 #include "avformat.h"
 #include "internal.h"
@@ -272,7 +273,7 @@ static int nsv_parse_NSVf_header(AVFormatContext *s)
 {
    NSVContext *nsv = s->priv_data;
    AVIOContext *pb = s->pb;
    unsigned int file_size;
    unsigned int av_unused file_size;
    unsigned int size;
    int64_t duration;
    int strings_size;
@@ -594,7 +595,7 @@ null_chunk_retry:
    av_dlog(s, "NSV CHUNK %d aux, %u bytes video, %d bytes audio\n", auxcount, vsize, asize);
    /* skip aux stuff */
    for (i = 0; i < auxcount; i++) {
        uint32_t auxtag;
        uint32_t av_unused auxtag;
        auxsize = avio_rl16(pb);
        auxtag = avio_rl32(pb);
        av_dlog(s, "NSV aux data: '%c%c%c%c', %d bytes\n",
--- a/libavformat/rtpdec_h264.c
+++ b/libavformat/rtpdec_h264.c
@@ -20,7 +20,7 @@
 */

 /**
 * @file
 * @file
 * @brief H.264 / RTP Code (RFC3984)
 * @author Ryan Martell <rdm4@martellventures.com>
 *
@@ -29,7 +29,8 @@
 * This currently supports packetization mode:
 * Single Nal Unit Mode (0), or
 * Non-Interleaved Mode (1).  It currently does not support
 * Interleaved Mode (2). (This requires implementing STAP-B, MTAP16, MTAP24, FU-B packet types)
 * Interleaved Mode (2). (This requires implementing STAP-B, MTAP16, MTAP24,
 *                        FU-B packet types)
 */

 #include "libavutil/base64.h"
@@ -46,7 +47,7 @@
 #include "rtpdec_formats.h"

 struct PayloadContext {
    //sdp setup parameters
    // sdp setup parameters
    uint8_t profile_idc;
    uint8_t profile_iop;
    uint8_t level_idc;
@@ -56,8 +57,16 @@ struct PayloadContext {
 #endif
 };

 static int sdp_parse_fmtp_config_h264(AVStream * stream,
                                      PayloadContext * h264_data,
 #ifdef DEBUG
 #define COUNT_NAL_TYPE(data, nal) data->packet_types_received[(nal) & 0x1f]++
 #else
 #define COUNT_NAL_TYPE(data, nal) do { } while (0)
 #endif

 static const uint8_t start_sequence[] = { 0, 0, 0, 1 };

 static int sdp_parse_fmtp_config_h264(AVStream *stream,
                                      PayloadContext *h264_data,
                                      char *attr, char *value)
 {
    AVCodecContext *codec = stream->codec;
@@ -68,14 +77,15 @@ static int sdp_parse_fmtp_config_h264(AVStream * stream,
        av_log(codec, AV_LOG_DEBUG, "RTP Packetization Mode: %d\n", atoi(value));
        h264_data->packetization_mode = atoi(value);
        /*
           Packetization Mode:
           0 or not present: Single NAL mode (Only nals from 1-23 are allowed)
           1: Non-interleaved Mode: 1-23, 24 (STAP-A), 28 (FU-A) are allowed.
           2: Interleaved Mode: 25 (STAP-B), 26 (MTAP16), 27 (MTAP24), 28 (FU-A), and 29 (FU-B) are allowed.
         * Packetization Mode:
         * 0 or not present: Single NAL mode (Only nals from 1-23 are allowed)
         * 1: Non-interleaved Mode: 1-23, 24 (STAP-A), 28 (FU-A) are allowed.
         * 2: Interleaved Mode: 25 (STAP-B), 26 (MTAP16), 27 (MTAP24), 28 (FU-A),
         *                      and 29 (FU-B) are allowed.
         */
        if (h264_data->packetization_mode > 1)
            av_log(codec, AV_LOG_ERROR,
                   "Interleaved RTP mode is not supported yet.");
                   "Interleaved RTP mode is not supported yet.\n");
    } else if (!strcmp(attr, "profile-level-id")) {
        if (strlen(value) == 6) {
            char buffer[3];
@@ -84,24 +94,27 @@ static int sdp_parse_fmtp_config_h264(AVStream * stream,
            uint8_t profile_iop;
            uint8_t level_idc;

            buffer[0] = value[0]; buffer[1] = value[1]; buffer[2] = '\0';
            buffer[0]   = value[0];
            buffer[1]   = value[1];
            buffer[2]   = '\0';
            profile_idc = strtol(buffer, NULL, 16);
            buffer[0] = value[2]; buffer[1] = value[3];
            buffer[0]   = value[2];
            buffer[1]   = value[3];
            profile_iop = strtol(buffer, NULL, 16);
            buffer[0] = value[4]; buffer[1] = value[5];
            level_idc = strtol(buffer, NULL, 16);
            buffer[0]   = value[4];
            buffer[1]   = value[5];
            level_idc   = strtol(buffer, NULL, 16);

            av_log(codec, AV_LOG_DEBUG,
                   "RTP Profile IDC: %x Profile IOP: %x Level: %x\n",
                   profile_idc, profile_iop, level_idc);
            h264_data->profile_idc = profile_idc;
            h264_data->profile_iop = profile_iop;
            h264_data->level_idc = level_idc;
            h264_data->level_idc   = level_idc;
        }
    } else  if (!strcmp(attr, "sprop-parameter-sets")) {
        uint8_t start_sequence[] = { 0, 0, 0, 1 };
        codec->extradata_size= 0;
        codec->extradata= NULL;
    } else if (!strcmp(attr, "sprop-parameter-sets")) {
        codec->extradata_size = 0;
        av_freep(&codec->extradata);

        while (*value) {
            char base64packet[1024];
@@ -118,50 +131,47 @@ static int sdp_parse_fmtp_config_h264(AVStream * stream,
            if (*value == ',')
                value++;

            packet_size= av_base64_decode(decoded_packet, base64packet, sizeof(decoded_packet));
            packet_size = av_base64_decode(decoded_packet, base64packet,
                                           sizeof(decoded_packet));
            if (packet_size > 0) {
                uint8_t *dest = av_malloc(packet_size + sizeof(start_sequence) +
                                         codec->extradata_size +
                                         FF_INPUT_BUFFER_PADDING_SIZE);
                if(dest)
                {
                    if(codec->extradata_size)
                    {
                        memcpy(dest, codec->extradata, codec->extradata_size);
                        av_free(codec->extradata);
                    }

                    memcpy(dest+codec->extradata_size, start_sequence, sizeof(start_sequence));
                    memcpy(dest+codec->extradata_size+sizeof(start_sequence), decoded_packet, packet_size);
                    memset(dest+codec->extradata_size+sizeof(start_sequence)+
                           packet_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);

                    codec->extradata= dest;
                    codec->extradata_size+= sizeof(start_sequence)+packet_size;
                } else {
                    av_log(codec, AV_LOG_ERROR, "Unable to allocate memory for extradata!");
                                          codec->extradata_size +
                                          FF_INPUT_BUFFER_PADDING_SIZE);
                if (!dest) {
                    av_log(codec, AV_LOG_ERROR,
                           "Unable to allocate memory for extradata!\n");
                    return AVERROR(ENOMEM);
                }
                if (codec->extradata_size) {
                    memcpy(dest, codec->extradata, codec->extradata_size);
                    av_free(codec->extradata);
                }

                memcpy(dest + codec->extradata_size, start_sequence,
                       sizeof(start_sequence));
                memcpy(dest + codec->extradata_size + sizeof(start_sequence),
                       decoded_packet, packet_size);
                memset(dest + codec->extradata_size + sizeof(start_sequence) +
                       packet_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);

                codec->extradata       = dest;
                codec->extradata_size += sizeof(start_sequence) + packet_size;
            }
        }
        av_log(codec, AV_LOG_DEBUG, "Extradata set to %p (size: %d)!", codec->extradata, codec->extradata_size);
        av_log(codec, AV_LOG_DEBUG, "Extradata set to %p (size: %d)!\n",
               codec->extradata, codec->extradata_size);
    }
    return 0;
 }

 // return 0 on packet, no more left, 1 on packet, 1 on partial packet...
 static int h264_handle_packet(AVFormatContext *ctx,
                              PayloadContext *data,
                              AVStream *st,
                              AVPacket * pkt,
                              uint32_t * timestamp,
                              const uint8_t * buf,
                              int len, int flags)
 // return 0 on packet, no more left, 1 on packet, 1 on partial packet
 static int h264_handle_packet(AVFormatContext *ctx, PayloadContext *data,
                              AVStream *st, AVPacket *pkt, uint32_t *timestamp,
                              const uint8_t *buf, int len, int flags)
 {
    uint8_t nal;
    uint8_t type;
    int result= 0;
    uint8_t start_sequence[] = { 0, 0, 0, 1 };
    int result = 0;

    if (!len) {
        av_log(ctx, AV_LOG_ERROR, "Empty H264 RTP packet\n");
@@ -173,62 +183,60 @@ static int h264_handle_packet(AVFormatContext *ctx,
    assert(data);
    assert(buf);

    /* Simplify the case (these are all the nal types used internally by
     * the h264 codec). */
    if (type >= 1 && type <= 23)
        type = 1;              // simplify the case. (these are all the nal types used internally by the h264 codec)
        type = 1;
    switch (type) {
    case 0:                    // undefined, but pass them through
    case 1:
        av_new_packet(pkt, len+sizeof(start_sequence));
        av_new_packet(pkt, len + sizeof(start_sequence));
        memcpy(pkt->data, start_sequence, sizeof(start_sequence));
        memcpy(pkt->data+sizeof(start_sequence), buf, len);
 #ifdef DEBUG
        data->packet_types_received[nal & 0x1f]++;
 #endif
        memcpy(pkt->data + sizeof(start_sequence), buf, len);
        COUNT_NAL_TYPE(data, nal);
        break;

    case 24:                   // STAP-A (one packet, multiple nals)
        // consume the STAP-A NAL
        buf++;
        len--;
        // first we are going to figure out the total size....
        // first we are going to figure out the total size
        {
            int pass= 0;
            int total_length= 0;
            uint8_t *dst= NULL;
            int pass         = 0;
            int total_length = 0;
            uint8_t *dst     = NULL;

            for(pass= 0; pass<2; pass++) {
                const uint8_t *src= buf;
                int src_len= len;
            for (pass = 0; pass < 2; pass++) {
                const uint8_t *src = buf;
                int src_len        = len;

                while (src_len > 2) {
                    uint16_t nal_size = AV_RB16(src);

                    // consume the length of the aggregate...
                    src += 2;
                    // consume the length of the aggregate
                    src     += 2;
                    src_len -= 2;

                    if (nal_size <= src_len) {
                        if(pass==0) {
                            // counting...
                            total_length+= sizeof(start_sequence)+nal_size;
                        if (pass == 0) {
                            // counting
                            total_length += sizeof(start_sequence) + nal_size;
                        } else {
                            // copying
                            assert(dst);
                            memcpy(dst, start_sequence, sizeof(start_sequence));
                            dst+= sizeof(start_sequence);
                            dst += sizeof(start_sequence);
                            memcpy(dst, src, nal_size);
 #ifdef DEBUG
                            data->packet_types_received[*src & 0x1f]++;
 #endif
                            dst+= nal_size;
                            COUNT_NAL_TYPE(data, *src);
                            dst += nal_size;
                        }
                    } else {
                        av_log(ctx, AV_LOG_ERROR,
                               "nal size exceeds length: %d %d\n", nal_size, src_len);
                    }

                    // eat what we handled...
                    src += nal_size;
                    // eat what we handled
                    src     += nal_size;
                    src_len -= nal_size;

                    if (src_len < 0)
@@ -236,12 +244,13 @@ static int h264_handle_packet(AVFormatContext *ctx,
                               "Consumed more bytes than we got! (%d)\n", src_len);
                }

                if(pass==0) {
                    // now we know the total size of the packet (with the start sequences added)
                if (pass == 0) {
                    /* now we know the total size of the packet (with the
                     * start sequences added) */
                    av_new_packet(pkt, total_length);
                    dst= pkt->data;
                    dst = pkt->data;
                } else {
                    assert(dst-pkt->data==total_length);
                    assert(dst - pkt->data == total_length);
                }
            }
        }
@@ -259,34 +268,34 @@ static int h264_handle_packet(AVFormatContext *ctx,

    case 28:                   // FU-A (fragmented nal)
        buf++;
        len--;                  // skip the fu_indicator
        len--;                 // skip the fu_indicator
        if (len > 1) {
            // these are the same as above, we just redo them here for clarity...
            uint8_t fu_indicator = nal;
            uint8_t fu_header = *buf;
            uint8_t start_bit = fu_header >> 7;
 //            uint8_t end_bit = (fu_header & 0x40) >> 6;
            uint8_t nal_type = (fu_header & 0x1f);
            // these are the same as above, we just redo them here for clarity
            uint8_t fu_indicator      = nal;
            uint8_t fu_header         = *buf;
            uint8_t start_bit         = fu_header >> 7;
            uint8_t av_unused end_bit = (fu_header & 0x40) >> 6;
            uint8_t nal_type          = fu_header & 0x1f;
            uint8_t reconstructed_nal;

            // reconstruct this packet's true nal; only the data follows..
            reconstructed_nal = fu_indicator & (0xe0);  // the original nal forbidden bit and NRI are stored in this packet's nal;
            // Reconstruct this packet's true nal; only the data follows.
            /* The original nal forbidden bit and NRI are stored in this
             * packet's nal. */
            reconstructed_nal  = fu_indicator & 0xe0;
            reconstructed_nal |= nal_type;

            // skip the fu_header...
            // skip the fu_header
            buf++;
            len--;

 #ifdef DEBUG
            if (start_bit)
                data->packet_types_received[nal_type]++;
 #endif
            if(start_bit) {
                // copy in the start sequence, and the reconstructed nal....
                av_new_packet(pkt, sizeof(start_sequence)+sizeof(nal)+len);
                COUNT_NAL_TYPE(data, nal_type);
            if (start_bit) {
                /* copy in the start sequence, and the reconstructed nal */
                av_new_packet(pkt, sizeof(start_sequence) + sizeof(nal) + len);
                memcpy(pkt->data, start_sequence, sizeof(start_sequence));
                pkt->data[sizeof(start_sequence)]= reconstructed_nal;
                memcpy(pkt->data+sizeof(start_sequence)+sizeof(nal), buf, len);
                pkt->data[sizeof(start_sequence)] = reconstructed_nal;
                memcpy(pkt->data + sizeof(start_sequence) + sizeof(nal), buf, len);
            } else {
                av_new_packet(pkt, len);
                memcpy(pkt->data, buf, len);
@@ -300,7 +309,7 @@ static int h264_handle_packet(AVFormatContext *ctx,
    case 30:                   // undefined
    case 31:                   // undefined
    default:
        av_log(ctx, AV_LOG_ERROR, "Undefined type (%d)", type);
        av_log(ctx, AV_LOG_ERROR, "Undefined type (%d)\n", type);
        result = AVERROR_INVALIDDATA;
        break;
    }
@@ -341,25 +350,27 @@ static int parse_h264_sdp_line(AVFormatContext *s, int st_index,
        return 0;

    stream = s->streams[st_index];
    codec = stream->codec;
    codec  = stream->codec;

    if (av_strstart(p, "framesize:", &p)) {
        char buf1[50];
        char *dst = buf1;

        // remove the protocol identifier..
        while (*p && *p == ' ') p++; // strip spaces.
        while (*p && *p != ' ') p++; // eat protocol identifier
        while (*p && *p == ' ') p++; // strip trailing spaces.
        while (*p && *p != '-' && (dst - buf1) < sizeof(buf1) - 1) {
        // remove the protocol identifier
        while (*p && *p == ' ')
            p++;                     // strip spaces.
        while (*p && *p != ' ')
            p++;                     // eat protocol identifier
        while (*p && *p == ' ')
            p++;                     // strip trailing spaces.
        while (*p && *p != '-' && (dst - buf1) < sizeof(buf1) - 1)
            *dst++ = *p++;
        }
        *dst = '\0';

        // a='framesize:96 320-240'
        // set our parameters..
        codec->width = atoi(buf1);
        codec->height = atoi(p + 1); // skip the -
        // set our parameters
        codec->width   = atoi(buf1);
        codec->height  = atoi(p + 1); // skip the -
        codec->pix_fmt = PIX_FMT_YUV420P;
    } else if (av_strstart(p, "fmtp:", &p)) {
        return ff_parse_fmtp(stream, h264_data, p, sdp_parse_fmtp_config_h264);
--- a/libavformat/rtsp.c
+++ b/libavformat/rtsp.c
@@ -637,16 +637,17 @@ static int rtsp_open_transport_ctx(AVFormatContext *s, RTSPStream *rtsp_st)
 #if CONFIG_RTSP_DEMUXER || CONFIG_RTSP_MUXER
 static void rtsp_parse_range(int *min_ptr, int *max_ptr, const char **pp)
 {
    const char *p;
    const char *q;
    char *p;
    int v;

    p = *pp;
    p += strspn(p, SPACE_CHARS);
    v = strtol(p, (char **)&p, 10);
    q = *pp;
    q += strspn(q, SPACE_CHARS);
    v = strtol(q, &p, 10);
    if (*p == '-') {
        p++;
        *min_ptr = v;
        v = strtol(p, (char **)&p, 10);
        v = strtol(p, &p, 10);
        *max_ptr = v;
    } else {
        *min_ptr = v;
--- a/tests/utils.c
+++ b/tests/utils.c
@@ -18,13 +18,22 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>

 #define SCALEBITS 8
 #define ONE_HALF  (1 << (SCALEBITS - 1))
 #define FIX(x)    ((int) ((x) * (1L << SCALEBITS) + 0.5))

 #define err_if(expr) do {                                              \
    if (expr) {                                                        \
        fprintf(stderr, "%s\n", strerror(errno));                      \
        exit(1);                                                       \
    }                                                                  \
 } while (0)

 static void rgb24_to_yuv420p(unsigned char *lum, unsigned char *cb,
                             unsigned char *cr, unsigned char *src,
                             int width, int height)
@@ -110,14 +119,14 @@ static void pgmyuv_save(const char *filename, int w, int h,

    f = fopen(filename, "wb");
    fprintf(f, "P5\n%d %d\n%d\n", w, h * 3 / 2, 255);
    fwrite(lum_tab, 1, w * h, f);
    err_if(fwrite(lum_tab, 1, w * h, f) != w * h);
    h2 = h / 2;
    w2 = w / 2;
    cb = cb_tab;
    cr = cr_tab;
    for (i = 0; i < h2; i++) {
        fwrite(cb, 1, w2, f);
        fwrite(cr, 1, w2, f);
        err_if(fwrite(cb, 1, w2, f) != w2);
        err_if(fwrite(cr, 1, w2, f) != w2);
        cb += w2;
        cr += w2;
    }