avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC intra prediction functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC intra predition functions in new file hevcpred_msa.c Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
10 years ago · d6d98237ed
--- a/libavcodec/hevcpred.c
+++ b/libavcodec/hevcpred.c
@@ -74,4 +74,7 @@ void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth)
        HEVC_PRED(8);
        break;
    }

    if (ARCH_MIPS)
        ff_hevc_pred_init_mips(hpc, bit_depth);
 }
--- a/libavcodec/hevcpred.h
+++ b/libavcodec/hevcpred.h
@@ -41,5 +41,6 @@ typedef struct HEVCPredContext {
 } HEVCPredContext;

 void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth);
 void ff_hevc_pred_init_mips(HEVCPredContext *hpc, int bit_depth);

 #endif /* AVCODEC_HEVCPRED_H */
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -18,7 +18,8 @@ OBJS-$(CONFIG_AAC_DECODER)                += mips/aacdec_mips.o            \
                                             mips/aacpsdsp_mips.o
 MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER)      += mips/aaccoder_mips.o
 MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER)        += mips/iirfilter_mips.o
 OBJS-$(CONFIG_HEVC_DECODER)               += mips/hevcdsp_init_mips.o
 OBJS-$(CONFIG_HEVC_DECODER)               += mips/hevcdsp_init_mips.o      \
                                             mips/hevcpred_init_mips.o
 OBJS-$(CONFIG_H264DSP)                    += mips/h264dsp_init_mips.o
 OBJS-$(CONFIG_H264CHROMA)                 += mips/h264chroma_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)           += mips/hevcdsp_msa.o            \
@@ -27,7 +28,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER)           += mips/hevcdsp_msa.o            \
                                             mips/hevc_mc_bi_msa.o         \
                                             mips/hevc_mc_biw_msa.o        \
                                             mips/hevc_idct_msa.o          \
                                             mips/hevc_lpf_sao_msa.o
                                             mips/hevc_lpf_sao_msa.o       \
                                             mips/hevcpred_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)                += mips/h264dsp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)          += mips/h264dsp_mmi.o
 LOONGSON3-OBJS-$(CONFIG_H264CHROMA)       += mips/h264chroma_mmi.o
--- a/libavcodec/mips/hevcpred_init_mips.c
+++ b/libavcodec/mips/hevcpred_init_mips.c
@@ -0,0 +1,48 @@
 /*
 * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

 #include "libavcodec/hevc.h"
 #include "libavcodec/mips/hevcpred_mips.h"

 #if HAVE_MSA
 static av_cold void hevc_pred_init_msa(HEVCPredContext *c, const int bit_depth)
 {
    if (8 == bit_depth) {
        c->intra_pred[2] = ff_intra_pred_8_16x16_msa;
        c->intra_pred[3] = ff_intra_pred_8_32x32_msa;
        c->pred_planar[0] = ff_hevc_intra_pred_planar_0_msa;
        c->pred_planar[1] = ff_hevc_intra_pred_planar_1_msa;
        c->pred_planar[2] = ff_hevc_intra_pred_planar_2_msa;
        c->pred_planar[3] = ff_hevc_intra_pred_planar_3_msa;
        c->pred_dc = ff_hevc_intra_pred_dc_msa;
        c->pred_angular[0] = ff_pred_intra_pred_angular_0_msa;
        c->pred_angular[1] = ff_pred_intra_pred_angular_1_msa;
        c->pred_angular[2] = ff_pred_intra_pred_angular_2_msa;
        c->pred_angular[3] = ff_pred_intra_pred_angular_3_msa;
    }
 }
 #endif  // #if HAVE_MSA

 void ff_hevc_pred_init_mips(HEVCPredContext *c, const int bit_depth)
 {
 #if HAVE_MSA
    hevc_pred_init_msa(c, bit_depth);
 #endif  // #if HAVE_MSA
 }
--- a/libavcodec/mips/hevcpred_mips.h
+++ b/libavcodec/mips/hevcpred_mips.h
@@ -0,0 +1,73 @@
 /*
 * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

 #ifndef AVCODEC_MIPS_HEVCPRED_MIPS_H
 #define AVCODEC_MIPS_HEVCPRED_MIPS_H

 #include "libavcodec/hevcdsp.h"

 void ff_hevc_intra_pred_planar_0_msa(uint8_t *dst,
                                     const uint8_t *src_top,
                                     const uint8_t *src_left,
                                     ptrdiff_t stride);

 void ff_hevc_intra_pred_planar_1_msa(uint8_t *dst,
                                     const uint8_t *src_top,
                                     const uint8_t *src_left,
                                     ptrdiff_t stride);

 void ff_hevc_intra_pred_planar_2_msa(uint8_t *dst,
                                     const uint8_t *src_top,
                                     const uint8_t *src_left,
                                     ptrdiff_t stride);

 void ff_hevc_intra_pred_planar_3_msa(uint8_t *dst,
                                     const uint8_t *src_top,
                                     const uint8_t *src_left,
                                     ptrdiff_t stride);

 void ff_hevc_intra_pred_dc_msa(uint8_t *dst, const uint8_t *src_top,
                               const uint8_t *src_left,
                               ptrdiff_t stride, int log2, int c_idx);

 void ff_pred_intra_pred_angular_0_msa(uint8_t *dst,
                                      const uint8_t *src_top,
                                      const uint8_t *src_left,
                                      ptrdiff_t stride, int c_idx, int mode);

 void ff_pred_intra_pred_angular_1_msa(uint8_t *dst,
                                      const uint8_t *src_top,
                                      const uint8_t *src_left,
                                      ptrdiff_t stride, int c_idx, int mode);

 void ff_pred_intra_pred_angular_2_msa(uint8_t *dst,
                                      const uint8_t *src_top,
                                      const uint8_t *src_left,
                                      ptrdiff_t stride, int c_idx, int mode);

 void ff_pred_intra_pred_angular_3_msa(uint8_t *dst,
                                      const uint8_t *src_top,
                                      const uint8_t *src_left,
                                      ptrdiff_t stride, int c_idx, int mode);

 void ff_intra_pred_8_16x16_msa(HEVCContext *s, int x0, int y0, int c_idx);
 void ff_intra_pred_8_32x32_msa(HEVCContext *s, int x0, int y0, int c_idx);

 #endif  // #ifndef AVCODEC_MIPS_HEVCPRED_MIPS_H
--- a/libavcodec/mips/hevcpred_msa.c
+++ b/libavcodec/mips/hevcpred_msa.c
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -770,7 +770,9 @@
    SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val);  \
    SLDI_B2_0(RTYPE, in2, in3, out2, out3, slide_val);  \
 }
 #define SLDI_B4_0_UB(...) SLDI_B4_0(v16u8, __VA_ARGS__)
 #define SLDI_B4_0_SB(...) SLDI_B4_0(v16i8, __VA_ARGS__)
 #define SLDI_B4_0_SH(...) SLDI_B4_0(v8i16, __VA_ARGS__)

 /* Description : Immediate number of columns to slide
   Arguments   : Inputs  - in0_0, in0_1, in1_0, in1_1, slide_val
@@ -1037,6 +1039,21 @@
    out_m;                                                \
 } )

 /* Description : Horizontal addition of unsigned byte vector elements
   Arguments   : Inputs  - in0, in1
                 Outputs - out0, out1
                 Return Type - as per RTYPE
   Details     : Each unsigned odd byte element from 'in0' is added to
                 even unsigned byte element from 'in0' (pairwise) and the
                 halfword result is stored in 'out0'
 */
 #define HADD_UB2(RTYPE, in0, in1, out0, out1)                 \
 {                                                             \
    out0 = (RTYPE) __msa_hadd_u_h((v16u8) in0, (v16u8) in0);  \
    out1 = (RTYPE) __msa_hadd_u_h((v16u8) in1, (v16u8) in1);  \
 }
 #define HADD_UB2_UH(...) HADD_UB2(v8u16, __VA_ARGS__)

 /* Description : Horizontal subtraction of unsigned byte vector elements
   Arguments   : Inputs  - in0, in1
                 Outputs - out0, out1
@@ -1053,6 +1070,20 @@
 #define HSUB_UB2_UH(...) HSUB_UB2(v8u16, __VA_ARGS__)
 #define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__)

 /* Description : Insert specified word elements from input vectors to 1
                 destination vector
   Arguments   : Inputs  - in0, in1, in2, in3 (4 input vectors)
                 Outputs - out                (output vector)
                 Return Type - as per RTYPE
 */
 #define INSERT_W2(RTYPE, in0, in1, out)                 \
 {                                                       \
    out = (RTYPE) __msa_insert_w((v4i32) out, 0, in0);  \
    out = (RTYPE) __msa_insert_w((v4i32) out, 1, in1);  \
 }
 #define INSERT_W2_UB(...) INSERT_W2(v16u8, __VA_ARGS__)
 #define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__)

 #define INSERT_W4(RTYPE, in0, in1, in2, in3, out)       \
 {                                                       \
    out = (RTYPE) __msa_insert_w((v4i32) out, 0, in0);  \
@@ -1364,8 +1395,11 @@
    out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1);  \
    out1 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1);  \
 }
 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
 #define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__)
 #define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__)
 #define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__)
 #define ILVRL_B2_SW(...) ILVRL_B2(v4i32, __VA_ARGS__)

 #define ILVRL_H2(RTYPE, in0, in1, out0, out1)               \
 {                                                           \
@@ -1923,6 +1957,18 @@
    ADD2(in4, in5, in6, in7, out2, out3);                                     \
 }

 /* Description : Subtraction of 2 pairs of vectors
   Arguments   : Inputs  - in0, in1, in2, in3
                 Outputs - out0, out1
   Details     : Each element from 2 pairs vectors is subtracted and 2 results
                 are produced
 */
 #define SUB2(in0, in1, in2, in3, out0, out1)  \
 {                                             \
    out0 = in0 - in1;                         \
    out1 = in2 - in3;                         \
 }

 /* Description : Sign extend byte elements from input vector and return
                 halfword results in pair of vectors
   Arguments   : Inputs  - in           (1 input byte vector)