Browse Source

Merge commit '8d686ca59db14900ad5c12b547fb8a7afc8b0b94'

* commit '8d686ca59db14900ad5c12b547fb8a7afc8b0b94':
  dsputil: Split off *_8x8basis to a separate context

Conflicts:
	libavcodec/dsputil.c
	libavcodec/mpegvideo_enc.c
	libavcodec/x86/dsputilenc_mmx.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>
tags/n2.3
Michael Niedermayer 11 years ago
parent
commit
462c6cdb8e
11 changed files with 247 additions and 126 deletions
  1. +2
    -1
      libavcodec/Makefile
  2. +0
    -32
      libavcodec/dsputil.c
  3. +0
    -6
      libavcodec/dsputil.h
  4. +2
    -0
      libavcodec/mpegvideo.h
  5. +8
    -5
      libavcodec/mpegvideo_enc.c
  6. +65
    -0
      libavcodec/mpegvideoencdsp.c
  7. +41
    -0
      libavcodec/mpegvideoencdsp.h
  8. +2
    -1
      libavcodec/x86/Makefile
  9. +0
    -79
      libavcodec/x86/dsputilenc_mmx.c
  10. +2
    -2
      libavcodec/x86/mpegvideoenc_qns_template.c
  11. +125
    -0
      libavcodec/x86/mpegvideoencdsp_init.c

+ 2
- 1
libavcodec/Makefile View File

@@ -77,7 +77,8 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += mpegaudiodsp.o \
OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideodsp.o \
mpegvideo_motion.o mpegutils.o
OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \
motion_est.o ratecontrol.o
motion_est.o ratecontrol.o \
mpegvideoencdsp.o
OBJS-$(CONFIG_QPELDSP) += qpeldsp.o
OBJS-$(CONFIG_RANGECODER) += rangecoder.o
RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o


+ 0
- 32
libavcodec/dsputil.c View File

@@ -465,35 +465,6 @@ static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int
return score1 + FFABS(score2) * 8;
}

static int try_8x8basis_c(int16_t rem[64], int16_t weight[64],
int16_t basis[64], int scale)
{
int i;
unsigned int sum = 0;

for (i = 0; i < 8 * 8; i++) {
int b = rem[i] + ((basis[i] * scale +
(1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
(BASIS_SHIFT - RECON_SHIFT));
int w = weight[i];
b >>= RECON_SHIFT;
av_assert2(-512 < b && b < 512);

sum += (w * b) * (w * b) >> 4;
}
return sum >> 2;
}

static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale)
{
int i;

for (i = 0; i < 8 * 8; i++)
rem[i] += (basis[i] * scale +
(1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
(BASIS_SHIFT - RECON_SHIFT);
}

static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b,
int stride, int h)
{
@@ -1170,9 +1141,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
ff_dsputil_init_dwt(c);
#endif

c->try_8x8basis = try_8x8basis_c;
c->add_8x8basis = add_8x8basis_c;

c->shrink[0] = av_image_copy_plane;
c->shrink[1] = ff_shrink22;
c->shrink[2] = ff_shrink44;


+ 0
- 6
libavcodec/dsputil.h View File

@@ -103,12 +103,6 @@ typedef struct DSPContext {
void (*fdct)(int16_t *block /* align 16 */);
void (*fdct248)(int16_t *block /* align 16 */);

int (*try_8x8basis)(int16_t rem[64], int16_t weight[64],
int16_t basis[64], int scale);
void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
#define BASIS_SHIFT 16
#define RECON_SHIFT 6

void (*draw_edges)(uint8_t *buf, int wrap, int width, int height,
int w, int h, int sides);
#define EDGE_WIDTH 16


+ 2
- 0
libavcodec/mpegvideo.h View File

@@ -38,6 +38,7 @@
#include "hpeldsp.h"
#include "idctdsp.h"
#include "mpegvideodsp.h"
#include "mpegvideoencdsp.h"
#include "put_bits.h"
#include "ratecontrol.h"
#include "parser.h"
@@ -365,6 +366,7 @@ typedef struct MpegEncContext {
HpelDSPContext hdsp;
IDCTDSPContext idsp;
MpegVideoDSPContext mdsp;
MpegvideoEncDSPContext mpvencdsp;
QpelDSPContext qdsp;
VideoDSPContext vdsp;
H263DSPContext h263dsp;


+ 8
- 5
libavcodec/mpegvideo_enc.c View File

@@ -818,6 +818,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
if (ff_MPV_common_init(s) < 0)
return -1;

ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
ff_qpeldsp_init(&s->qdsp);

s->avctx->coded_frame = s->current_picture.f;
@@ -4090,7 +4091,7 @@ STOP_TIMER("memset rem[]")}
run_tab[rle_index++]=run;
run=0;

s->dsp.add_8x8basis(rem, basis[j], coeff);
s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
}else{
run++;
}
@@ -4104,7 +4105,7 @@ STOP_TIMER("init rem[]")
{START_TIMER
#endif
for(;;){
int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
int best_coeff=0;
int best_change=0;
int run2, best_unquant_change=0, analyze_gradient;
@@ -4148,7 +4149,8 @@ STOP_TIMER("dct")}
if(new_coeff >= 2048 || new_coeff < 0)
continue;

score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
new_coeff - old_coeff);
if(score<best_score){
best_score= score;
best_coeff= 0;
@@ -4271,7 +4273,8 @@ STOP_TIMER("dct")}
unquant_change= new_coeff - old_coeff;
av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);

score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
unquant_change);
if(score<best_score){
best_score= score;
best_coeff= i;
@@ -4345,7 +4348,7 @@ if(256*256*256*64 % count == 0){
}
}

s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
}else{
break;
}


+ 65
- 0
libavcodec/mpegvideoencdsp.c View File

@@ -0,0 +1,65 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include <assert.h>
#include <stdint.h>

#include "config.h"
#include "libavutil/avassert.h"
#include "libavutil/attributes.h"
#include "avcodec.h"
#include "mpegvideoencdsp.h"

static int try_8x8basis_c(int16_t rem[64], int16_t weight[64],
int16_t basis[64], int scale)
{
int i;
unsigned int sum = 0;

for (i = 0; i < 8 * 8; i++) {
int b = rem[i] + ((basis[i] * scale +
(1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
(BASIS_SHIFT - RECON_SHIFT));
int w = weight[i];
b >>= RECON_SHIFT;
av_assert2(-512 < b && b < 512);

sum += (w * b) * (w * b) >> 4;
}
return sum >> 2;
}

static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale)
{
int i;

for (i = 0; i < 8 * 8; i++)
rem[i] += (basis[i] * scale +
(1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
(BASIS_SHIFT - RECON_SHIFT);
}

av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
AVCodecContext *avctx)
{
c->try_8x8basis = try_8x8basis_c;
c->add_8x8basis = add_8x8basis_c;

if (ARCH_X86)
ff_mpegvideoencdsp_init_x86(c, avctx);
}

+ 41
- 0
libavcodec/mpegvideoencdsp.h View File

@@ -0,0 +1,41 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#ifndef AVCODEC_MPEGVIDEOENCDSP_H
#define AVCODEC_MPEGVIDEOENCDSP_H

#include <stdint.h>

#include "avcodec.h"

#define BASIS_SHIFT 16
#define RECON_SHIFT 6

typedef struct MpegvideoEncDSPContext {
int (*try_8x8basis)(int16_t rem[64], int16_t weight[64],
int16_t basis[64], int scale);
void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);

} MpegvideoEncDSPContext;

void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
AVCodecContext *avctx);
void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
AVCodecContext *avctx);

#endif /* AVCODEC_MPEGVIDEOENCDSP_H */

+ 2
- 1
libavcodec/x86/Makefile View File

@@ -28,7 +28,8 @@ OBJS-$(CONFIG_LPC) += x86/lpc.o
OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o
OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \
x86/mpegvideodsp.o
OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o
OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o \
x86/mpegvideoencdsp_init.o
OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o


+ 0
- 79
libavcodec/x86/dsputilenc_mmx.c View File

@@ -352,72 +352,6 @@ static int vsad16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
#undef SUM


#define PHADDD(a, t) \
"movq " #a ", " #t " \n\t" \
"psrlq $32, " #a " \n\t" \
"paddd " #t ", " #a " \n\t"

/*
* pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
* pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
* pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
*/
#define PMULHRW(x, y, s, o) \
"pmulhw " #s ", " #x " \n\t" \
"pmulhw " #s ", " #y " \n\t" \
"paddw " #o ", " #x " \n\t" \
"paddw " #o ", " #y " \n\t" \
"psraw $1, " #x " \n\t" \
"psraw $1, " #y " \n\t"
#define DEF(x) x ## _mmx
#define SET_RND MOVQ_WONE
#define SCALE_OFFSET 1

#include "dsputil_qns_template.c"

#undef DEF
#undef SET_RND
#undef SCALE_OFFSET
#undef PMULHRW

#define DEF(x) x ## _3dnow
#define SET_RND(x)
#define SCALE_OFFSET 0
#define PMULHRW(x, y, s, o) \
"pmulhrw " #s ", " #x " \n\t" \
"pmulhrw " #s ", " #y " \n\t"

#include "dsputil_qns_template.c"

#undef DEF
#undef SET_RND
#undef SCALE_OFFSET
#undef PMULHRW

#if HAVE_SSSE3_INLINE
#undef PHADDD
#define DEF(x) x ## _ssse3
#define SET_RND(x)
#define SCALE_OFFSET -1

#define PHADDD(a, t) \
"pshufw $0x0E, " #a ", " #t " \n\t" \
/* faster than phaddd on core2 */ \
"paddd " #t ", " #a " \n\t"

#define PMULHRW(x, y, s, o) \
"pmulhrsw " #s ", " #x " \n\t" \
"pmulhrsw " #s ", " #y " \n\t"

#include "dsputil_qns_template.c"

#undef DEF
#undef SET_RND
#undef SCALE_OFFSET
#undef PMULHRW
#undef PHADDD
#endif /* HAVE_SSSE3_INLINE */

#endif /* HAVE_INLINE_ASM */

av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
@@ -448,16 +382,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,

if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
c->vsad[0] = vsad16_mmx;
c->try_8x8basis = try_8x8basis_mmx;
}
c->add_8x8basis = add_8x8basis_mmx;
}

if (INLINE_AMD3DNOW(cpu_flags)) {
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
c->try_8x8basis = try_8x8basis_3dnow;
}
c->add_8x8basis = add_8x8basis_3dnow;
}

if (INLINE_MMXEXT(cpu_flags)) {
@@ -480,10 +405,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,

#if HAVE_SSSE3_INLINE
if (INLINE_SSSE3(cpu_flags)) {
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
c->try_8x8basis = try_8x8basis_ssse3;
}
c->add_8x8basis = add_8x8basis_ssse3;
}
#endif
#endif /* HAVE_INLINE_ASM */


libavcodec/x86/dsputil_qns_template.c → libavcodec/x86/mpegvideoenc_qns_template.c View File

@@ -1,5 +1,5 @@
/*
* DSP utils : QNS functions are compiled 3 times for mmx/3dnow/ssse3
* QNS functions are compiled 3 times for MMX/3DNOW/SSSE3
* Copyright (c) 2004 Michael Niedermayer
*
* MMX optimization by Michael Niedermayer <michaelni@gmx.at>
@@ -22,9 +22,9 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include <assert.h>
#include <stdint.h>

#include "libavutil/avassert.h"
#include "libavutil/common.h"
#include "libavutil/x86/asm.h"


+ 125
- 0
libavcodec/x86/mpegvideoencdsp_init.c View File

@@ -0,0 +1,125 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/mpegvideoencdsp.h"

#if HAVE_INLINE_ASM

#define PHADDD(a, t) \
"movq " #a ", " #t " \n\t" \
"psrlq $32, " #a " \n\t" \
"paddd " #t ", " #a " \n\t"

/*
* pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
* pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
* pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
*/
#define PMULHRW(x, y, s, o) \
"pmulhw " #s ", " #x " \n\t" \
"pmulhw " #s ", " #y " \n\t" \
"paddw " #o ", " #x " \n\t" \
"paddw " #o ", " #y " \n\t" \
"psraw $1, " #x " \n\t" \
"psraw $1, " #y " \n\t"
#define DEF(x) x ## _mmx
#define SET_RND MOVQ_WONE
#define SCALE_OFFSET 1

#include "mpegvideoenc_qns_template.c"

#undef DEF
#undef SET_RND
#undef SCALE_OFFSET
#undef PMULHRW

#define DEF(x) x ## _3dnow
#define SET_RND(x)
#define SCALE_OFFSET 0
#define PMULHRW(x, y, s, o) \
"pmulhrw " #s ", " #x " \n\t" \
"pmulhrw " #s ", " #y " \n\t"

#include "mpegvideoenc_qns_template.c"

#undef DEF
#undef SET_RND
#undef SCALE_OFFSET
#undef PMULHRW

#if HAVE_SSSE3_INLINE
#undef PHADDD
#define DEF(x) x ## _ssse3
#define SET_RND(x)
#define SCALE_OFFSET -1

#define PHADDD(a, t) \
"pshufw $0x0E, " #a ", " #t " \n\t" \
/* faster than phaddd on core2 */ \
"paddd " #t ", " #a " \n\t"

#define PMULHRW(x, y, s, o) \
"pmulhrsw " #s ", " #x " \n\t" \
"pmulhrsw " #s ", " #y " \n\t"

#include "mpegvideoenc_qns_template.c"

#undef DEF
#undef SET_RND
#undef SCALE_OFFSET
#undef PMULHRW
#undef PHADDD
#endif /* HAVE_SSSE3_INLINE */

#endif /* HAVE_INLINE_ASM */

av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
AVCodecContext *avctx)
{
#if HAVE_INLINE_ASM
int cpu_flags = av_get_cpu_flags();

if (INLINE_MMX(cpu_flags)) {
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
c->try_8x8basis = try_8x8basis_mmx;
}
c->add_8x8basis = add_8x8basis_mmx;
}

if (INLINE_AMD3DNOW(cpu_flags)) {
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
c->try_8x8basis = try_8x8basis_3dnow;
}
c->add_8x8basis = add_8x8basis_3dnow;
}

#if HAVE_SSSE3_INLINE
if (INLINE_SSSE3(cpu_flags)) {
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
c->try_8x8basis = try_8x8basis_ssse3;
}
c->add_8x8basis = add_8x8basis_ssse3;
}
#endif /* HAVE_SSSE3_INLINE */

#endif /* HAVE_INLINE_ASM */
}

Loading…
Cancel
Save