This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC intra predition functions in new file hevcpred_msa.c Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>tags/n2.8
| @@ -74,4 +74,7 @@ void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth) | |||
| HEVC_PRED(8); | |||
| break; | |||
| } | |||
| if (ARCH_MIPS) | |||
| ff_hevc_pred_init_mips(hpc, bit_depth); | |||
| } | |||
| @@ -41,5 +41,6 @@ typedef struct HEVCPredContext { | |||
| } HEVCPredContext; | |||
| void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth); | |||
| void ff_hevc_pred_init_mips(HEVCPredContext *hpc, int bit_depth); | |||
| #endif /* AVCODEC_HEVCPRED_H */ | |||
| @@ -18,7 +18,8 @@ OBJS-$(CONFIG_AAC_DECODER) += mips/aacdec_mips.o \ | |||
| mips/aacpsdsp_mips.o | |||
| MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER) += mips/aaccoder_mips.o | |||
| MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER) += mips/iirfilter_mips.o | |||
| OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_init_mips.o | |||
| OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_init_mips.o \ | |||
| mips/hevcpred_init_mips.o | |||
| OBJS-$(CONFIG_H264DSP) += mips/h264dsp_init_mips.o | |||
| OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_init_mips.o | |||
| MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o \ | |||
| @@ -27,7 +28,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o \ | |||
| mips/hevc_mc_bi_msa.o \ | |||
| mips/hevc_mc_biw_msa.o \ | |||
| mips/hevc_idct_msa.o \ | |||
| mips/hevc_lpf_sao_msa.o | |||
| mips/hevc_lpf_sao_msa.o \ | |||
| mips/hevcpred_msa.o | |||
| MSA-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_msa.o | |||
| LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o | |||
| LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o | |||
| @@ -0,0 +1,48 @@ | |||
| /* | |||
| * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com) | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "libavcodec/hevc.h" | |||
| #include "libavcodec/mips/hevcpred_mips.h" | |||
| #if HAVE_MSA | |||
| static av_cold void hevc_pred_init_msa(HEVCPredContext *c, const int bit_depth) | |||
| { | |||
| if (8 == bit_depth) { | |||
| c->intra_pred[2] = ff_intra_pred_8_16x16_msa; | |||
| c->intra_pred[3] = ff_intra_pred_8_32x32_msa; | |||
| c->pred_planar[0] = ff_hevc_intra_pred_planar_0_msa; | |||
| c->pred_planar[1] = ff_hevc_intra_pred_planar_1_msa; | |||
| c->pred_planar[2] = ff_hevc_intra_pred_planar_2_msa; | |||
| c->pred_planar[3] = ff_hevc_intra_pred_planar_3_msa; | |||
| c->pred_dc = ff_hevc_intra_pred_dc_msa; | |||
| c->pred_angular[0] = ff_pred_intra_pred_angular_0_msa; | |||
| c->pred_angular[1] = ff_pred_intra_pred_angular_1_msa; | |||
| c->pred_angular[2] = ff_pred_intra_pred_angular_2_msa; | |||
| c->pred_angular[3] = ff_pred_intra_pred_angular_3_msa; | |||
| } | |||
| } | |||
| #endif // #if HAVE_MSA | |||
| void ff_hevc_pred_init_mips(HEVCPredContext *c, const int bit_depth) | |||
| { | |||
| #if HAVE_MSA | |||
| hevc_pred_init_msa(c, bit_depth); | |||
| #endif // #if HAVE_MSA | |||
| } | |||
| @@ -0,0 +1,73 @@ | |||
| /* | |||
| * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com) | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef AVCODEC_MIPS_HEVCPRED_MIPS_H | |||
| #define AVCODEC_MIPS_HEVCPRED_MIPS_H | |||
| #include "libavcodec/hevcdsp.h" | |||
| void ff_hevc_intra_pred_planar_0_msa(uint8_t *dst, | |||
| const uint8_t *src_top, | |||
| const uint8_t *src_left, | |||
| ptrdiff_t stride); | |||
| void ff_hevc_intra_pred_planar_1_msa(uint8_t *dst, | |||
| const uint8_t *src_top, | |||
| const uint8_t *src_left, | |||
| ptrdiff_t stride); | |||
| void ff_hevc_intra_pred_planar_2_msa(uint8_t *dst, | |||
| const uint8_t *src_top, | |||
| const uint8_t *src_left, | |||
| ptrdiff_t stride); | |||
| void ff_hevc_intra_pred_planar_3_msa(uint8_t *dst, | |||
| const uint8_t *src_top, | |||
| const uint8_t *src_left, | |||
| ptrdiff_t stride); | |||
| void ff_hevc_intra_pred_dc_msa(uint8_t *dst, const uint8_t *src_top, | |||
| const uint8_t *src_left, | |||
| ptrdiff_t stride, int log2, int c_idx); | |||
| void ff_pred_intra_pred_angular_0_msa(uint8_t *dst, | |||
| const uint8_t *src_top, | |||
| const uint8_t *src_left, | |||
| ptrdiff_t stride, int c_idx, int mode); | |||
| void ff_pred_intra_pred_angular_1_msa(uint8_t *dst, | |||
| const uint8_t *src_top, | |||
| const uint8_t *src_left, | |||
| ptrdiff_t stride, int c_idx, int mode); | |||
| void ff_pred_intra_pred_angular_2_msa(uint8_t *dst, | |||
| const uint8_t *src_top, | |||
| const uint8_t *src_left, | |||
| ptrdiff_t stride, int c_idx, int mode); | |||
| void ff_pred_intra_pred_angular_3_msa(uint8_t *dst, | |||
| const uint8_t *src_top, | |||
| const uint8_t *src_left, | |||
| ptrdiff_t stride, int c_idx, int mode); | |||
| void ff_intra_pred_8_16x16_msa(HEVCContext *s, int x0, int y0, int c_idx); | |||
| void ff_intra_pred_8_32x32_msa(HEVCContext *s, int x0, int y0, int c_idx); | |||
| #endif // #ifndef AVCODEC_MIPS_HEVCPRED_MIPS_H | |||
| @@ -770,7 +770,9 @@ | |||
| SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val); \ | |||
| SLDI_B2_0(RTYPE, in2, in3, out2, out3, slide_val); \ | |||
| } | |||
| #define SLDI_B4_0_UB(...) SLDI_B4_0(v16u8, __VA_ARGS__) | |||
| #define SLDI_B4_0_SB(...) SLDI_B4_0(v16i8, __VA_ARGS__) | |||
| #define SLDI_B4_0_SH(...) SLDI_B4_0(v8i16, __VA_ARGS__) | |||
| /* Description : Immediate number of columns to slide | |||
| Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val | |||
| @@ -1037,6 +1039,21 @@ | |||
| out_m; \ | |||
| } ) | |||
| /* Description : Horizontal addition of unsigned byte vector elements | |||
| Arguments : Inputs - in0, in1 | |||
| Outputs - out0, out1 | |||
| Return Type - as per RTYPE | |||
| Details : Each unsigned odd byte element from 'in0' is added to | |||
| even unsigned byte element from 'in0' (pairwise) and the | |||
| halfword result is stored in 'out0' | |||
| */ | |||
| #define HADD_UB2(RTYPE, in0, in1, out0, out1) \ | |||
| { \ | |||
| out0 = (RTYPE) __msa_hadd_u_h((v16u8) in0, (v16u8) in0); \ | |||
| out1 = (RTYPE) __msa_hadd_u_h((v16u8) in1, (v16u8) in1); \ | |||
| } | |||
| #define HADD_UB2_UH(...) HADD_UB2(v8u16, __VA_ARGS__) | |||
| /* Description : Horizontal subtraction of unsigned byte vector elements | |||
| Arguments : Inputs - in0, in1 | |||
| Outputs - out0, out1 | |||
| @@ -1053,6 +1070,20 @@ | |||
| #define HSUB_UB2_UH(...) HSUB_UB2(v8u16, __VA_ARGS__) | |||
| #define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__) | |||
| /* Description : Insert specified word elements from input vectors to 1 | |||
| destination vector | |||
| Arguments : Inputs - in0, in1, in2, in3 (4 input vectors) | |||
| Outputs - out (output vector) | |||
| Return Type - as per RTYPE | |||
| */ | |||
| #define INSERT_W2(RTYPE, in0, in1, out) \ | |||
| { \ | |||
| out = (RTYPE) __msa_insert_w((v4i32) out, 0, in0); \ | |||
| out = (RTYPE) __msa_insert_w((v4i32) out, 1, in1); \ | |||
| } | |||
| #define INSERT_W2_UB(...) INSERT_W2(v16u8, __VA_ARGS__) | |||
| #define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__) | |||
| #define INSERT_W4(RTYPE, in0, in1, in2, in3, out) \ | |||
| { \ | |||
| out = (RTYPE) __msa_insert_w((v4i32) out, 0, in0); \ | |||
| @@ -1364,8 +1395,11 @@ | |||
| out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1); \ | |||
| out1 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \ | |||
| } | |||
| #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) | |||
| #define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__) | |||
| #define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__) | |||
| #define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__) | |||
| #define ILVRL_B2_SW(...) ILVRL_B2(v4i32, __VA_ARGS__) | |||
| #define ILVRL_H2(RTYPE, in0, in1, out0, out1) \ | |||
| { \ | |||
| @@ -1923,6 +1957,18 @@ | |||
| ADD2(in4, in5, in6, in7, out2, out3); \ | |||
| } | |||
| /* Description : Subtraction of 2 pairs of vectors | |||
| Arguments : Inputs - in0, in1, in2, in3 | |||
| Outputs - out0, out1 | |||
| Details : Each element from 2 pairs vectors is subtracted and 2 results | |||
| are produced | |||
| */ | |||
| #define SUB2(in0, in1, in2, in3, out0, out1) \ | |||
| { \ | |||
| out0 = in0 - in1; \ | |||
| out1 = in2 - in3; \ | |||
| } | |||
| /* Description : Sign extend byte elements from input vector and return | |||
| halfword results in pair of vectors | |||
| Arguments : Inputs - in (1 input byte vector) | |||