You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

154 lines
5.0KB

  1. /*
  2. * Copyright (c) 2002 Brian Foley
  3. * Copyright (c) 2002 Dieter Shirley
  4. * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
  5. *
  6. * This file is part of Libav.
  7. *
  8. * Libav is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * Libav is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with Libav; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include "config.h"
  23. #if HAVE_ALTIVEC_H
  24. #include <altivec.h>
  25. #endif
  26. #include "libavutil/attributes.h"
  27. #include "libavutil/cpu.h"
  28. #include "libavutil/ppc/cpu.h"
  29. #include "libavutil/ppc/types_altivec.h"
  30. #include "libavutil/ppc/util_altivec.h"
  31. #include "libavcodec/avcodec.h"
  32. #include "libavcodec/pixblockdsp.h"
  33. #if HAVE_ALTIVEC
  34. static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
  35. int line_size)
  36. {
  37. int i;
  38. vector unsigned char perm = vec_lvsl(0, pixels);
  39. const vector unsigned char zero =
  40. (const vector unsigned char) vec_splat_u8(0);
  41. for (i = 0; i < 8; i++) {
  42. /* Read potentially unaligned pixels.
  43. * We're reading 16 pixels, and actually only want 8,
  44. * but we simply ignore the extras. */
  45. vector unsigned char pixl = vec_ld(0, pixels);
  46. vector unsigned char pixr = vec_ld(7, pixels);
  47. vector unsigned char bytes = vec_perm(pixl, pixr, perm);
  48. // Convert the bytes into shorts.
  49. vector signed short shorts = (vector signed short) vec_mergeh(zero,
  50. bytes);
  51. // Save the data to the block, we assume the block is 16-byte aligned.
  52. vec_st(shorts, i * 16, (vector signed short *) block);
  53. pixels += line_size;
  54. }
  55. }
  56. static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
  57. const uint8_t *s2, int stride)
  58. {
  59. int i;
  60. vector unsigned char perm1 = vec_lvsl(0, s1);
  61. vector unsigned char perm2 = vec_lvsl(0, s2);
  62. const vector unsigned char zero =
  63. (const vector unsigned char) vec_splat_u8(0);
  64. vector signed short shorts1, shorts2;
  65. for (i = 0; i < 4; i++) {
  66. /* Read potentially unaligned pixels.
  67. * We're reading 16 pixels, and actually only want 8,
  68. * but we simply ignore the extras. */
  69. vector unsigned char pixl = vec_ld(0, s1);
  70. vector unsigned char pixr = vec_ld(15, s1);
  71. vector unsigned char bytes = vec_perm(pixl, pixr, perm1);
  72. // Convert the bytes into shorts.
  73. shorts1 = (vector signed short) vec_mergeh(zero, bytes);
  74. // Do the same for the second block of pixels.
  75. pixl = vec_ld(0, s2);
  76. pixr = vec_ld(15, s2);
  77. bytes = vec_perm(pixl, pixr, perm2);
  78. // Convert the bytes into shorts.
  79. shorts2 = (vector signed short) vec_mergeh(zero, bytes);
  80. // Do the subtraction.
  81. shorts1 = vec_sub(shorts1, shorts2);
  82. // Save the data to the block, we assume the block is 16-byte aligned.
  83. vec_st(shorts1, 0, (vector signed short *) block);
  84. s1 += stride;
  85. s2 += stride;
  86. block += 8;
  87. /* The code below is a copy of the code above...
  88. * This is a manual unroll. */
  89. /* Read potentially unaligned pixels.
  90. * We're reading 16 pixels, and actually only want 8,
  91. * but we simply ignore the extras. */
  92. pixl = vec_ld(0, s1);
  93. pixr = vec_ld(15, s1);
  94. bytes = vec_perm(pixl, pixr, perm1);
  95. // Convert the bytes into shorts.
  96. shorts1 = (vector signed short) vec_mergeh(zero, bytes);
  97. // Do the same for the second block of pixels.
  98. pixl = vec_ld(0, s2);
  99. pixr = vec_ld(15, s2);
  100. bytes = vec_perm(pixl, pixr, perm2);
  101. // Convert the bytes into shorts.
  102. shorts2 = (vector signed short) vec_mergeh(zero, bytes);
  103. // Do the subtraction.
  104. shorts1 = vec_sub(shorts1, shorts2);
  105. // Save the data to the block, we assume the block is 16-byte aligned.
  106. vec_st(shorts1, 0, (vector signed short *) block);
  107. s1 += stride;
  108. s2 += stride;
  109. block += 8;
  110. }
  111. }
  112. #endif /* HAVE_ALTIVEC */
  113. av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
  114. AVCodecContext *avctx,
  115. unsigned high_bit_depth)
  116. {
  117. #if HAVE_ALTIVEC
  118. if (!PPC_ALTIVEC(av_get_cpu_flags()))
  119. return;
  120. c->diff_pixels = diff_pixels_altivec;
  121. if (!high_bit_depth) {
  122. c->get_pixels = get_pixels_altivec;
  123. }
  124. #endif /* HAVE_ALTIVEC */
  125. }