You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

192 lines
5.6KB

  1. /*
  2. * Copyright (c) 2002 Brian Foley
  3. * Copyright (c) 2002 Dieter Shirley
  4. * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
  5. *
  6. * This file is part of Libav.
  7. *
  8. * Libav is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * Libav is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with Libav; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include "config.h"
  23. #include "libavutil/attributes.h"
  24. #include "libavutil/cpu.h"
  25. #include "libavutil/ppc/cpu.h"
  26. #include "libavutil/ppc/util_altivec.h"
  27. #include "libavcodec/avcodec.h"
  28. #include "libavcodec/pixblockdsp.h"
  29. #if HAVE_ALTIVEC && HAVE_BIGENDIAN
  30. static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
  31. ptrdiff_t stride)
  32. {
  33. int i;
  34. vec_u8 perm = vec_lvsl(0, pixels);
  35. const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
  36. for (i = 0; i < 8; i++) {
  37. /* Read potentially unaligned pixels.
  38. * We're reading 16 pixels, and actually only want 8,
  39. * but we simply ignore the extras. */
  40. vec_u8 pixl = vec_ld(0, pixels);
  41. vec_u8 pixr = vec_ld(7, pixels);
  42. vec_u8 bytes = vec_perm(pixl, pixr, perm);
  43. // Convert the bytes into shorts.
  44. vec_s16 shorts = (vec_s16)vec_mergeh(zero, bytes);
  45. // Save the data to the block, we assume the block is 16-byte aligned.
  46. vec_st(shorts, i * 16, (vec_s16 *)block);
  47. pixels += stride;
  48. }
  49. }
  50. static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
  51. const uint8_t *s2, ptrdiff_t stride)
  52. {
  53. int i;
  54. vec_u8 perm1 = vec_lvsl(0, s1);
  55. vec_u8 perm2 = vec_lvsl(0, s2);
  56. const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
  57. vec_s16 shorts1, shorts2;
  58. for (i = 0; i < 4; i++) {
  59. /* Read potentially unaligned pixels.
  60. * We're reading 16 pixels, and actually only want 8,
  61. * but we simply ignore the extras. */
  62. vec_u8 pixl = vec_ld(0, s1);
  63. vec_u8 pixr = vec_ld(15, s1);
  64. vec_u8 bytes = vec_perm(pixl, pixr, perm1);
  65. // Convert the bytes into shorts.
  66. shorts1 = (vec_s16)vec_mergeh(zero, bytes);
  67. // Do the same for the second block of pixels.
  68. pixl = vec_ld(0, s2);
  69. pixr = vec_ld(15, s2);
  70. bytes = vec_perm(pixl, pixr, perm2);
  71. // Convert the bytes into shorts.
  72. shorts2 = (vec_s16)vec_mergeh(zero, bytes);
  73. // Do the subtraction.
  74. shorts1 = vec_sub(shorts1, shorts2);
  75. // Save the data to the block, we assume the block is 16-byte aligned.
  76. vec_st(shorts1, 0, (vec_s16 *)block);
  77. s1 += stride;
  78. s2 += stride;
  79. block += 8;
  80. /* The code below is a copy of the code above...
  81. * This is a manual unroll. */
  82. /* Read potentially unaligned pixels.
  83. * We're reading 16 pixels, and actually only want 8,
  84. * but we simply ignore the extras. */
  85. pixl = vec_ld(0, s1);
  86. pixr = vec_ld(15, s1);
  87. bytes = vec_perm(pixl, pixr, perm1);
  88. // Convert the bytes into shorts.
  89. shorts1 = (vec_s16)vec_mergeh(zero, bytes);
  90. // Do the same for the second block of pixels.
  91. pixl = vec_ld(0, s2);
  92. pixr = vec_ld(15, s2);
  93. bytes = vec_perm(pixl, pixr, perm2);
  94. // Convert the bytes into shorts.
  95. shorts2 = (vec_s16)vec_mergeh(zero, bytes);
  96. // Do the subtraction.
  97. shorts1 = vec_sub(shorts1, shorts2);
  98. // Save the data to the block, we assume the block is 16-byte aligned.
  99. vec_st(shorts1, 0, (vec_s16 *)block);
  100. s1 += stride;
  101. s2 += stride;
  102. block += 8;
  103. }
  104. }
  105. #endif /* HAVE_ALTIVEC && HAVE_BIGENDIAN */
  106. #if HAVE_VSX
  107. static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels,
  108. ptrdiff_t stride)
  109. {
  110. int i;
  111. for (i = 0; i < 8; i++) {
  112. vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
  113. vec_vsx_st(shorts, i * 16, block);
  114. pixels += stride;
  115. }
  116. }
  117. static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1,
  118. const uint8_t *s2, ptrdiff_t stride)
  119. {
  120. int i;
  121. vec_s16 shorts1, shorts2;
  122. for (i = 0; i < 8; i++) {
  123. shorts1 = vsx_ld_u8_s16(0, s1);
  124. shorts2 = vsx_ld_u8_s16(0, s2);
  125. shorts1 = vec_sub(shorts1, shorts2);
  126. vec_vsx_st(shorts1, 0, block);
  127. s1 += stride;
  128. s2 += stride;
  129. block += 8;
  130. }
  131. }
  132. #endif /* HAVE_VSX */
  133. av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
  134. AVCodecContext *avctx,
  135. unsigned high_bit_depth)
  136. {
  137. #if HAVE_ALTIVEC && HAVE_BIGENDIAN
  138. if (!PPC_ALTIVEC(av_get_cpu_flags()))
  139. return;
  140. c->diff_pixels = diff_pixels_altivec;
  141. if (!high_bit_depth) {
  142. c->get_pixels = get_pixels_altivec;
  143. }
  144. #endif /* HAVE_ALTIVEC && HAVE_BIGENDIAN */
  145. #if HAVE_VSX
  146. if (!PPC_VSX(av_get_cpu_flags()))
  147. return;
  148. c->diff_pixels = diff_pixels_vsx;
  149. if (!high_bit_depth)
  150. c->get_pixels = get_pixels_vsx;
  151. #endif /* HAVE_VSX */
  152. }