You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

195 lines
5.6KB

  1. /*
  2. * Copyright (c) 2002 Brian Foley
  3. * Copyright (c) 2002 Dieter Shirley
  4. * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
  5. *
  6. * This file is part of Libav.
  7. *
  8. * Libav is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * Libav is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with Libav; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include "config.h"
  23. #if HAVE_ALTIVEC_H
  24. #include <altivec.h>
  25. #endif
  26. #include "libavutil/attributes.h"
  27. #include "libavutil/cpu.h"
  28. #include "libavutil/ppc/cpu.h"
  29. #include "libavutil/ppc/types_altivec.h"
  30. #include "libavutil/ppc/util_altivec.h"
  31. #include "libavcodec/avcodec.h"
  32. #include "libavcodec/pixblockdsp.h"
  33. #if HAVE_ALTIVEC && HAVE_BIGENDIAN
  34. static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
  35. int line_size)
  36. {
  37. int i;
  38. vec_u8 perm = vec_lvsl(0, pixels);
  39. const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
  40. for (i = 0; i < 8; i++) {
  41. /* Read potentially unaligned pixels.
  42. * We're reading 16 pixels, and actually only want 8,
  43. * but we simply ignore the extras. */
  44. vec_u8 pixl = vec_ld(0, pixels);
  45. vec_u8 pixr = vec_ld(7, pixels);
  46. vec_u8 bytes = vec_perm(pixl, pixr, perm);
  47. // Convert the bytes into shorts.
  48. vec_s16 shorts = (vec_s16)vec_mergeh(zero, bytes);
  49. // Save the data to the block, we assume the block is 16-byte aligned.
  50. vec_st(shorts, i * 16, (vec_s16 *)block);
  51. pixels += line_size;
  52. }
  53. }
  54. static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
  55. const uint8_t *s2, int stride)
  56. {
  57. int i;
  58. vec_u8 perm1 = vec_lvsl(0, s1);
  59. vec_u8 perm2 = vec_lvsl(0, s2);
  60. const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
  61. vec_s16 shorts1, shorts2;
  62. for (i = 0; i < 4; i++) {
  63. /* Read potentially unaligned pixels.
  64. * We're reading 16 pixels, and actually only want 8,
  65. * but we simply ignore the extras. */
  66. vec_u8 pixl = vec_ld(0, s1);
  67. vec_u8 pixr = vec_ld(15, s1);
  68. vec_u8 bytes = vec_perm(pixl, pixr, perm1);
  69. // Convert the bytes into shorts.
  70. shorts1 = (vec_s16)vec_mergeh(zero, bytes);
  71. // Do the same for the second block of pixels.
  72. pixl = vec_ld(0, s2);
  73. pixr = vec_ld(15, s2);
  74. bytes = vec_perm(pixl, pixr, perm2);
  75. // Convert the bytes into shorts.
  76. shorts2 = (vec_s16)vec_mergeh(zero, bytes);
  77. // Do the subtraction.
  78. shorts1 = vec_sub(shorts1, shorts2);
  79. // Save the data to the block, we assume the block is 16-byte aligned.
  80. vec_st(shorts1, 0, (vec_s16 *)block);
  81. s1 += stride;
  82. s2 += stride;
  83. block += 8;
  84. /* The code below is a copy of the code above...
  85. * This is a manual unroll. */
  86. /* Read potentially unaligned pixels.
  87. * We're reading 16 pixels, and actually only want 8,
  88. * but we simply ignore the extras. */
  89. pixl = vec_ld(0, s1);
  90. pixr = vec_ld(15, s1);
  91. bytes = vec_perm(pixl, pixr, perm1);
  92. // Convert the bytes into shorts.
  93. shorts1 = (vec_s16)vec_mergeh(zero, bytes);
  94. // Do the same for the second block of pixels.
  95. pixl = vec_ld(0, s2);
  96. pixr = vec_ld(15, s2);
  97. bytes = vec_perm(pixl, pixr, perm2);
  98. // Convert the bytes into shorts.
  99. shorts2 = (vec_s16)vec_mergeh(zero, bytes);
  100. // Do the subtraction.
  101. shorts1 = vec_sub(shorts1, shorts2);
  102. // Save the data to the block, we assume the block is 16-byte aligned.
  103. vec_st(shorts1, 0, (vec_s16 *)block);
  104. s1 += stride;
  105. s2 += stride;
  106. block += 8;
  107. }
  108. }
  109. #endif /* HAVE_ALTIVEC */
  110. #if HAVE_VSX
  111. static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels,
  112. int line_size)
  113. {
  114. int i;
  115. for (i = 0; i < 8; i++) {
  116. vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
  117. vec_vsx_st(shorts, i * 16, block);
  118. pixels += line_size;
  119. }
  120. }
  121. static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1,
  122. const uint8_t *s2, int stride)
  123. {
  124. int i;
  125. vec_s16 shorts1, shorts2;
  126. for (i = 0; i < 8; i++) {
  127. shorts1 = vsx_ld_u8_s16(0, s1);
  128. shorts2 = vsx_ld_u8_s16(0, s2);
  129. shorts1 = vec_sub(shorts1, shorts2);
  130. vec_vsx_st(shorts1, 0, block);
  131. s1 += stride;
  132. s2 += stride;
  133. block += 8;
  134. }
  135. }
  136. #endif /* HAVE_VSX */
  137. av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
  138. AVCodecContext *avctx,
  139. unsigned high_bit_depth)
  140. {
  141. #if HAVE_ALTIVEC && HAVE_BIGENDIAN
  142. if (!PPC_ALTIVEC(av_get_cpu_flags()))
  143. return;
  144. c->diff_pixels = diff_pixels_altivec;
  145. if (!high_bit_depth) {
  146. c->get_pixels = get_pixels_altivec;
  147. }
  148. #endif /* HAVE_ALTIVEC */
  149. #if HAVE_VSX
  150. if (!PPC_VSX(av_get_cpu_flags()))
  151. return;
  152. c->diff_pixels = diff_pixels_vsx;
  153. if (!high_bit_depth)
  154. c->get_pixels = get_pixels_vsx;
  155. #endif /* HAVE_VSX */
  156. }