You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

83 lines
2.5KB

  1. /*
  2. * Copyright (c) 2007 Luca Barbato <lu_zero@gentoo.org>
  3. *
  4. * This file is part of Libav.
  5. *
  6. * Libav is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * Libav is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with Libav; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "config.h"
  21. #include <stdint.h>
  22. #include "libavutil/attributes.h"
  23. #include "libavutil/cpu.h"
  24. #include "libavutil/ppc/cpu.h"
  25. #include "libavutil/ppc/util_altivec.h"
  26. #include "libavcodec/svq1enc.h"
  27. #if HAVE_ALTIVEC && HAVE_BIGENDIAN
  28. static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
  29. int size)
  30. {
  31. int i, size16 = size >> 4;
  32. vector signed char vpix1;
  33. vector signed short vpix2, vdiff, vpix1l, vpix1h;
  34. union {
  35. vector signed int vscore;
  36. int32_t score[4];
  37. } u = { .vscore = vec_splat_s32(0) };
  38. while (size16) {
  39. // score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
  40. // load pix1 and the first batch of pix2
  41. vpix1 = vec_unaligned_load(pix1);
  42. vpix2 = vec_unaligned_load(pix2);
  43. pix2 += 8;
  44. // unpack
  45. vpix1h = vec_unpackh(vpix1);
  46. vdiff = vec_sub(vpix1h, vpix2);
  47. vpix1l = vec_unpackl(vpix1);
  48. // load another batch from pix2
  49. vpix2 = vec_unaligned_load(pix2);
  50. u.vscore = vec_msum(vdiff, vdiff, u.vscore);
  51. vdiff = vec_sub(vpix1l, vpix2);
  52. u.vscore = vec_msum(vdiff, vdiff, u.vscore);
  53. pix1 += 16;
  54. pix2 += 8;
  55. size16--;
  56. }
  57. u.vscore = vec_sums(u.vscore, vec_splat_s32(0));
  58. size %= 16;
  59. for (i = 0; i < size; i++)
  60. u.score[3] += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
  61. return u.score[3];
  62. }
  63. #endif /* HAVE_ALTIVEC && HAVE_BIGENDIAN */
  64. av_cold void ff_svq1enc_init_ppc(SVQ1EncContext *c)
  65. {
  66. #if HAVE_ALTIVEC && HAVE_BIGENDIAN
  67. if (!PPC_ALTIVEC(av_get_cpu_flags()))
  68. return;
  69. c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec;
  70. #endif /* HAVE_ALTIVEC && HAVE_BIGENDIAN */
  71. }