You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

106 lines
3.1KB

  1. /* SIMD-optimized IDCT functions for HEVC decoding
  2. * Copyright (c) Alexandra Hajkova
  3. *
  4. * This file is part of Libav.
  5. *
  6. * Libav is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * Libav is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with Libav; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "config.h"
  21. #include "libavutil/attributes.h"
  22. #include "libavutil/cpu.h"
  23. #include "libavutil/ppc/cpu.h"
  24. #include "libavutil/ppc/util_altivec.h"
  25. #include "libavcodec/hevcdsp.h"
  26. #if HAVE_ALTIVEC
  27. static const vec_s16 trans4[4] = {
  28. { 64, 64, 64, 64, 64, 64, 64, 64 },
  29. { 83, 36, 83, 36, 83, 36, 83, 36 },
  30. { 64, -64, 64, -64, 64, -64, 64, -64 },
  31. { 36, -83, 36, -83, 36, -83, 36, -83 },
  32. };
  33. static const vec_u8 mask[2] = {
  34. { 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, 0x02, 0x03, 0x0A, 0x0B, 0x12, 0x13, 0x1A, 0x1B },
  35. { 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D, 0x06, 0x07, 0x0E, 0x0F, 0x16, 0x17, 0x1E, 0x1F },
  36. };
  37. static void transform4x4(vec_s16 src_01, vec_s16 src_23, vec_s32 res[4],
  38. const int shift, int16_t *coeffs)
  39. {
  40. vec_s16 src_02, src_13;
  41. vec_s32 zero = vec_splat_s32(0);
  42. vec_s32 e0, o0, e1, o1;
  43. vec_s32 add;
  44. src_13 = vec_mergel(src_01, src_23);
  45. src_02 = vec_mergeh(src_01, src_23);
  46. e0 = vec_msums(src_02, trans4[0], zero);
  47. o0 = vec_msums(src_13, trans4[1], zero);
  48. e1 = vec_msums(src_02, trans4[2], zero);
  49. o1 = vec_msums(src_13, trans4[3], zero);
  50. add = vec_sl(vec_splat_s32(1), vec_splat_u32(shift - 1));
  51. e0 = vec_add(e0, add);
  52. e1 = vec_add(e1, add);
  53. res[0] = vec_add(e0, o0);
  54. res[1] = vec_add(e1, o1);
  55. res[2] = vec_sub(e1, o1);
  56. res[3] = vec_sub(e0, o0);
  57. }
  58. static void scale(vec_s32 res[4], vec_s16 res_packed[2], int shift)
  59. {
  60. int i;
  61. vec_u32 v_shift = vec_splat_u32(shift);
  62. for (i = 0; i < 4; i++)
  63. res[i] = vec_sra(res[i], v_shift);
  64. // clip16
  65. res_packed[0] = vec_packs(res[0], res[1]);
  66. res_packed[1] = vec_packs(res[2], res[3]);
  67. }
  68. #define FUNCDECL(a, depth) a ## _ ## depth ## _altivec
  69. #define FUNC(a, b) FUNCDECL(a, b)
  70. #define BIT_DEPTH 8
  71. #include "hevcdsp_template.c"
  72. #undef BIT_DEPTH
  73. #define BIT_DEPTH 10
  74. #include "hevcdsp_template.c"
  75. #undef BIT_DEPTH
  76. #endif /* HAVE_ALTIVEC */
  77. av_cold void ff_hevc_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth)
  78. {
  79. #if HAVE_ALTIVEC
  80. if (!PPC_ALTIVEC(av_get_cpu_flags()))
  81. return;
  82. if (bit_depth == 8)
  83. c->idct[0] = ff_hevc_idct_4x4_8_altivec;
  84. if (bit_depth == 10)
  85. c->idct[0] = ff_hevc_idct_4x4_10_altivec;
  86. #endif /* HAVE_ALTIVEC */
  87. }