Browse Source

aarch64: implement videodsp.prefetch

8% faster h264 decoding on Apple A7.
tags/n2.3
Janne Grunau Janne Grunau 11 years ago
parent
commit
d3789eeeed
5 changed files with 66 additions and 0 deletions
  1. +3
    -0
      libavcodec/aarch64/Makefile
  2. +28
    -0
      libavcodec/aarch64/videodsp.S
  3. +32
    -0
      libavcodec/aarch64/videodsp_init.c
  4. +2
    -0
      libavcodec/videodsp.c
  5. +1
    -0
      libavcodec/videodsp.h

+ 3
- 0
libavcodec/aarch64/Makefile View File

@@ -3,10 +3,13 @@ OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o
OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o
OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_init_aarch64.o OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_init_aarch64.o
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += aarch64/neontest.o OBJS-$(CONFIG_NEON_CLOBBER_TEST) += aarch64/neontest.o
OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp_init.o


OBJS-$(CONFIG_RV40_DECODER) += aarch64/rv40dsp_init_aarch64.o OBJS-$(CONFIG_RV40_DECODER) += aarch64/rv40dsp_init_aarch64.o
OBJS-$(CONFIG_VC1_DECODER) += aarch64/vc1dsp_init_aarch64.o OBJS-$(CONFIG_VC1_DECODER) += aarch64/vc1dsp_init_aarch64.o


ARMV8-OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp.o

NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o
NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o \ NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o \
aarch64/h264idct_neon.o aarch64/h264idct_neon.o


+ 28
- 0
libavcodec/aarch64/videodsp.S View File

@@ -0,0 +1,28 @@
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include "libavutil/aarch64/asm.S"

function ff_prefetch_aarch64, export=1
subs w2, w2, #2
prfm pldl1strm, [x0]
prfm pldl1strm, [x0, x1]
add x0, x0, x1, lsl #1
b.gt X(ff_prefetch_aarch64)
ret
endfunc

+ 32
- 0
libavcodec/aarch64/videodsp_init.c View File

@@ -0,0 +1,32 @@
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/videodsp.h"

void ff_prefetch_aarch64(uint8_t *mem, ptrdiff_t stride, int h);

av_cold void ff_videodsp_init_aarch64(VideoDSPContext *ctx, int bpc)
{
int cpu_flags = av_get_cpu_flags();

if (have_armv8(cpu_flags))
ctx->prefetch = ff_prefetch_aarch64;
}

+ 2
- 0
libavcodec/videodsp.c View File

@@ -43,6 +43,8 @@ av_cold void ff_videodsp_init(VideoDSPContext *ctx, int bpc)
ctx->emulated_edge_mc = ff_emulated_edge_mc_16; ctx->emulated_edge_mc = ff_emulated_edge_mc_16;
} }


if (ARCH_AARCH64)
ff_videodsp_init_aarch64(ctx, bpc);
if (ARCH_ARM) if (ARCH_ARM)
ff_videodsp_init_arm(ctx, bpc); ff_videodsp_init_arm(ctx, bpc);
if (ARCH_PPC) if (ARCH_PPC)


+ 1
- 0
libavcodec/videodsp.h View File

@@ -68,6 +68,7 @@ typedef struct VideoDSPContext {
void ff_videodsp_init(VideoDSPContext *ctx, int bpc); void ff_videodsp_init(VideoDSPContext *ctx, int bpc);


/* for internal use only (i.e. called by ff_videodsp_init() */ /* for internal use only (i.e. called by ff_videodsp_init() */
void ff_videodsp_init_aarch64(VideoDSPContext *ctx, int bpc);
void ff_videodsp_init_arm(VideoDSPContext *ctx, int bpc); void ff_videodsp_init_arm(VideoDSPContext *ctx, int bpc);
void ff_videodsp_init_ppc(VideoDSPContext *ctx, int bpc); void ff_videodsp_init_ppc(VideoDSPContext *ctx, int bpc);
void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc); void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc);


Loading…
Cancel
Save