You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

119 lines
4.2KB

  1. /*
  2. * Lossless video DSP utils
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "config.h"
  21. #include "libavutil/x86/asm.h"
  22. #include "../lossless_videodsp.h"
  23. #include "libavutil/x86/cpu.h"
  24. void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t w);
  25. void ff_add_bytes_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t w);
  26. void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
  27. const uint8_t *diff, ptrdiff_t w,
  28. int *left, int *left_top);
  29. void ff_add_median_pred_sse2(uint8_t *dst, const uint8_t *top,
  30. const uint8_t *diff, ptrdiff_t w,
  31. int *left, int *left_top);
  32. int ff_add_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
  33. ptrdiff_t w, int left);
  34. int ff_add_left_pred_unaligned_ssse3(uint8_t *dst, const uint8_t *src,
  35. ptrdiff_t w, int left);
  36. int ff_add_left_pred_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, ptrdiff_t w, unsigned acc);
  37. int ff_add_left_pred_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, ptrdiff_t w, unsigned acc);
  38. #if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
  39. static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top,
  40. const uint8_t *diff, ptrdiff_t w,
  41. int *left, int *left_top)
  42. {
  43. x86_reg w2 = -w;
  44. x86_reg x;
  45. int l = *left & 0xff;
  46. int tl = *left_top & 0xff;
  47. int t;
  48. __asm__ volatile (
  49. "mov %7, %3 \n"
  50. "1: \n"
  51. "movzbl (%3, %4), %2 \n"
  52. "mov %2, %k3 \n"
  53. "sub %b1, %b3 \n"
  54. "add %b0, %b3 \n"
  55. "mov %2, %1 \n"
  56. "cmp %0, %2 \n"
  57. "cmovg %0, %2 \n"
  58. "cmovg %1, %0 \n"
  59. "cmp %k3, %0 \n"
  60. "cmovg %k3, %0 \n"
  61. "mov %7, %3 \n"
  62. "cmp %2, %0 \n"
  63. "cmovl %2, %0 \n"
  64. "add (%6, %4), %b0 \n"
  65. "mov %b0, (%5, %4) \n"
  66. "inc %4 \n"
  67. "jl 1b \n"
  68. : "+&q"(l), "+&q"(tl), "=&r"(t), "=&q"(x), "+&r"(w2)
  69. : "r"(dst + w), "r"(diff + w), "rm"(top + w)
  70. );
  71. *left = l;
  72. *left_top = tl;
  73. }
  74. #endif
  75. void ff_llviddsp_init_x86(LLVidDSPContext *c)
  76. {
  77. int cpu_flags = av_get_cpu_flags();
  78. #if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
  79. if (cpu_flags & AV_CPU_FLAG_CMOV)
  80. c->add_median_pred = add_median_pred_cmov;
  81. #endif
  82. if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) {
  83. c->add_bytes = ff_add_bytes_mmx;
  84. }
  85. if (ARCH_X86_32 && EXTERNAL_MMXEXT(cpu_flags)) {
  86. /* slower than cmov version on AMD */
  87. if (!(cpu_flags & AV_CPU_FLAG_3DNOW))
  88. c->add_median_pred = ff_add_median_pred_mmxext;
  89. }
  90. if (EXTERNAL_SSE2(cpu_flags)) {
  91. c->add_bytes = ff_add_bytes_sse2;
  92. c->add_median_pred = ff_add_median_pred_sse2;
  93. }
  94. if (EXTERNAL_SSSE3(cpu_flags)) {
  95. c->add_left_pred = ff_add_left_pred_ssse3;
  96. c->add_left_pred_int16 = ff_add_left_pred_int16_ssse3;
  97. }
  98. if (EXTERNAL_SSSE3_FAST(cpu_flags)) {
  99. c->add_left_pred = ff_add_left_pred_unaligned_ssse3;
  100. }
  101. if (EXTERNAL_SSE4(cpu_flags)) {
  102. c->add_left_pred_int16 = ff_add_left_pred_int16_sse4;
  103. }
  104. }