You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

86 lines
2.5KB

  1. /*
  2. * Copyright (c) 2007 Luca Barbato <lu_zero@gentoo.org>
  3. *
  4. * This file is part of Libav.
  5. *
  6. * Libav is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * Libav is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with Libav; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include <stdint.h>
  21. #include "config.h"
  22. #if HAVE_ALTIVEC_H
  23. #include <altivec.h>
  24. #endif
  25. #include "libavutil/attributes.h"
  26. #include "libavutil/cpu.h"
  27. #include "libavutil/ppc/cpu.h"
  28. #include "libavutil/ppc/types_altivec.h"
  29. #include "libavutil/ppc/util_altivec.h"
  30. #include "libavcodec/svq1enc.h"
  31. #if HAVE_ALTIVEC
  32. static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
  33. int size)
  34. {
  35. int i, size16 = size >> 4;
  36. vector signed char vpix1;
  37. vector signed short vpix2, vdiff, vpix1l, vpix1h;
  38. union {
  39. vector signed int vscore;
  40. int32_t score[4];
  41. } u = { .vscore = vec_splat_s32(0) };
  42. while (size16) {
  43. // score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
  44. // load pix1 and the first batch of pix2
  45. vpix1 = vec_unaligned_load(pix1);
  46. vpix2 = vec_unaligned_load(pix2);
  47. pix2 += 8;
  48. // unpack
  49. vpix1h = vec_unpackh(vpix1);
  50. vdiff = vec_sub(vpix1h, vpix2);
  51. vpix1l = vec_unpackl(vpix1);
  52. // load another batch from pix2
  53. vpix2 = vec_unaligned_load(pix2);
  54. u.vscore = vec_msum(vdiff, vdiff, u.vscore);
  55. vdiff = vec_sub(vpix1l, vpix2);
  56. u.vscore = vec_msum(vdiff, vdiff, u.vscore);
  57. pix1 += 16;
  58. pix2 += 8;
  59. size16--;
  60. }
  61. u.vscore = vec_sums(u.vscore, vec_splat_s32(0));
  62. size %= 16;
  63. for (i = 0; i < size; i++)
  64. u.score[3] += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
  65. return u.score[3];
  66. }
  67. #endif /* HAVE_ALTIVEC */
  68. av_cold void ff_svq1enc_init_ppc(SVQ1EncContext *c)
  69. {
  70. #if HAVE_ALTIVEC
  71. if (!PPC_ALTIVEC(av_get_cpu_flags()))
  72. return;
  73. c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec;
  74. #endif /* HAVE_ALTIVEC */
  75. }