You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

228 lines
8.5KB

  1. /*
  2. * The simplest mpeg encoder (well, it was the simplest!)
  3. * Copyright (c) 2000,2001 Fabrice Bellard
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/attributes.h"
  22. #include "libavutil/cpu.h"
  23. #include "libavutil/x86/asm.h"
  24. #include "libavutil/x86/cpu.h"
  25. #include "libavcodec/avcodec.h"
  26. #include "libavcodec/dct.h"
  27. #include "libavcodec/mpegvideo.h"
  28. /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
  29. DECLARE_ALIGNED(16, static uint16_t, inv_zigzag_direct16)[64];
  30. #if HAVE_MMX_INLINE
  31. #define COMPILE_TEMPLATE_MMXEXT 0
  32. #define COMPILE_TEMPLATE_SSE2 0
  33. #define COMPILE_TEMPLATE_SSSE3 0
  34. #define RENAME(a) a ## _MMX
  35. #define RENAMEl(a) a ## _mmx
  36. #include "mpegvideoenc_template.c"
  37. #endif /* HAVE_MMX_INLINE */
  38. #if HAVE_MMXEXT_INLINE
  39. #undef COMPILE_TEMPLATE_SSSE3
  40. #undef COMPILE_TEMPLATE_SSE2
  41. #undef COMPILE_TEMPLATE_MMXEXT
  42. #define COMPILE_TEMPLATE_MMXEXT 1
  43. #define COMPILE_TEMPLATE_SSE2 0
  44. #define COMPILE_TEMPLATE_SSSE3 0
  45. #undef RENAME
  46. #undef RENAMEl
  47. #define RENAME(a) a ## _MMXEXT
  48. #define RENAMEl(a) a ## _mmxext
  49. #include "mpegvideoenc_template.c"
  50. #endif /* HAVE_MMXEXT_INLINE */
  51. #if HAVE_SSE2_INLINE
  52. #undef COMPILE_TEMPLATE_MMXEXT
  53. #undef COMPILE_TEMPLATE_SSE2
  54. #undef COMPILE_TEMPLATE_SSSE3
  55. #define COMPILE_TEMPLATE_MMXEXT 0
  56. #define COMPILE_TEMPLATE_SSE2 1
  57. #define COMPILE_TEMPLATE_SSSE3 0
  58. #undef RENAME
  59. #undef RENAMEl
  60. #define RENAME(a) a ## _SSE2
  61. #define RENAMEl(a) a ## _sse2
  62. #include "mpegvideoenc_template.c"
  63. #endif /* HAVE_SSE2_INLINE */
  64. #if HAVE_SSSE3_INLINE
  65. #undef COMPILE_TEMPLATE_MMXEXT
  66. #undef COMPILE_TEMPLATE_SSE2
  67. #undef COMPILE_TEMPLATE_SSSE3
  68. #define COMPILE_TEMPLATE_MMXEXT 0
  69. #define COMPILE_TEMPLATE_SSE2 1
  70. #define COMPILE_TEMPLATE_SSSE3 1
  71. #undef RENAME
  72. #undef RENAMEl
  73. #define RENAME(a) a ## _SSSE3
  74. #define RENAMEl(a) a ## _sse2
  75. #include "mpegvideoenc_template.c"
  76. #endif /* HAVE_SSSE3_INLINE */
  77. #if HAVE_INLINE_ASM
  78. static void denoise_dct_mmx(MpegEncContext *s, int16_t *block){
  79. const int intra= s->mb_intra;
  80. int *sum= s->dct_error_sum[intra];
  81. uint16_t *offset= s->dct_offset[intra];
  82. s->dct_count[intra]++;
  83. __asm__ volatile(
  84. "pxor %%mm7, %%mm7 \n\t"
  85. "1: \n\t"
  86. "pxor %%mm0, %%mm0 \n\t"
  87. "pxor %%mm1, %%mm1 \n\t"
  88. "movq (%0), %%mm2 \n\t"
  89. "movq 8(%0), %%mm3 \n\t"
  90. "pcmpgtw %%mm2, %%mm0 \n\t"
  91. "pcmpgtw %%mm3, %%mm1 \n\t"
  92. "pxor %%mm0, %%mm2 \n\t"
  93. "pxor %%mm1, %%mm3 \n\t"
  94. "psubw %%mm0, %%mm2 \n\t"
  95. "psubw %%mm1, %%mm3 \n\t"
  96. "movq %%mm2, %%mm4 \n\t"
  97. "movq %%mm3, %%mm5 \n\t"
  98. "psubusw (%2), %%mm2 \n\t"
  99. "psubusw 8(%2), %%mm3 \n\t"
  100. "pxor %%mm0, %%mm2 \n\t"
  101. "pxor %%mm1, %%mm3 \n\t"
  102. "psubw %%mm0, %%mm2 \n\t"
  103. "psubw %%mm1, %%mm3 \n\t"
  104. "movq %%mm2, (%0) \n\t"
  105. "movq %%mm3, 8(%0) \n\t"
  106. "movq %%mm4, %%mm2 \n\t"
  107. "movq %%mm5, %%mm3 \n\t"
  108. "punpcklwd %%mm7, %%mm4 \n\t"
  109. "punpckhwd %%mm7, %%mm2 \n\t"
  110. "punpcklwd %%mm7, %%mm5 \n\t"
  111. "punpckhwd %%mm7, %%mm3 \n\t"
  112. "paddd (%1), %%mm4 \n\t"
  113. "paddd 8(%1), %%mm2 \n\t"
  114. "paddd 16(%1), %%mm5 \n\t"
  115. "paddd 24(%1), %%mm3 \n\t"
  116. "movq %%mm4, (%1) \n\t"
  117. "movq %%mm2, 8(%1) \n\t"
  118. "movq %%mm5, 16(%1) \n\t"
  119. "movq %%mm3, 24(%1) \n\t"
  120. "add $16, %0 \n\t"
  121. "add $32, %1 \n\t"
  122. "add $16, %2 \n\t"
  123. "cmp %3, %0 \n\t"
  124. " jb 1b \n\t"
  125. : "+r" (block), "+r" (sum), "+r" (offset)
  126. : "r"(block+64)
  127. );
  128. }
  129. static void denoise_dct_sse2(MpegEncContext *s, int16_t *block){
  130. const int intra= s->mb_intra;
  131. int *sum= s->dct_error_sum[intra];
  132. uint16_t *offset= s->dct_offset[intra];
  133. s->dct_count[intra]++;
  134. __asm__ volatile(
  135. "pxor %%xmm7, %%xmm7 \n\t"
  136. "1: \n\t"
  137. "pxor %%xmm0, %%xmm0 \n\t"
  138. "pxor %%xmm1, %%xmm1 \n\t"
  139. "movdqa (%0), %%xmm2 \n\t"
  140. "movdqa 16(%0), %%xmm3 \n\t"
  141. "pcmpgtw %%xmm2, %%xmm0 \n\t"
  142. "pcmpgtw %%xmm3, %%xmm1 \n\t"
  143. "pxor %%xmm0, %%xmm2 \n\t"
  144. "pxor %%xmm1, %%xmm3 \n\t"
  145. "psubw %%xmm0, %%xmm2 \n\t"
  146. "psubw %%xmm1, %%xmm3 \n\t"
  147. "movdqa %%xmm2, %%xmm4 \n\t"
  148. "movdqa %%xmm3, %%xmm5 \n\t"
  149. "psubusw (%2), %%xmm2 \n\t"
  150. "psubusw 16(%2), %%xmm3 \n\t"
  151. "pxor %%xmm0, %%xmm2 \n\t"
  152. "pxor %%xmm1, %%xmm3 \n\t"
  153. "psubw %%xmm0, %%xmm2 \n\t"
  154. "psubw %%xmm1, %%xmm3 \n\t"
  155. "movdqa %%xmm2, (%0) \n\t"
  156. "movdqa %%xmm3, 16(%0) \n\t"
  157. "movdqa %%xmm4, %%xmm6 \n\t"
  158. "movdqa %%xmm5, %%xmm0 \n\t"
  159. "punpcklwd %%xmm7, %%xmm4 \n\t"
  160. "punpckhwd %%xmm7, %%xmm6 \n\t"
  161. "punpcklwd %%xmm7, %%xmm5 \n\t"
  162. "punpckhwd %%xmm7, %%xmm0 \n\t"
  163. "paddd (%1), %%xmm4 \n\t"
  164. "paddd 16(%1), %%xmm6 \n\t"
  165. "paddd 32(%1), %%xmm5 \n\t"
  166. "paddd 48(%1), %%xmm0 \n\t"
  167. "movdqa %%xmm4, (%1) \n\t"
  168. "movdqa %%xmm6, 16(%1) \n\t"
  169. "movdqa %%xmm5, 32(%1) \n\t"
  170. "movdqa %%xmm0, 48(%1) \n\t"
  171. "add $32, %0 \n\t"
  172. "add $64, %1 \n\t"
  173. "add $32, %2 \n\t"
  174. "cmp %3, %0 \n\t"
  175. " jb 1b \n\t"
  176. : "+r" (block), "+r" (sum), "+r" (offset)
  177. : "r"(block+64)
  178. XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
  179. "%xmm4", "%xmm5", "%xmm6", "%xmm7")
  180. );
  181. }
  182. #endif /* HAVE_INLINE_ASM */
  183. av_cold void ff_MPV_encode_init_x86(MpegEncContext *s)
  184. {
  185. const int dct_algo = s->avctx->dct_algo;
  186. int i;
  187. for (i = 0; i < 64; i++)
  188. inv_zigzag_direct16[ff_zigzag_direct[i]] = i + 1;
  189. if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) {
  190. #if HAVE_MMX_INLINE
  191. int cpu_flags = av_get_cpu_flags();
  192. if (INLINE_MMX(cpu_flags)) {
  193. s->dct_quantize = dct_quantize_MMX;
  194. s->denoise_dct = denoise_dct_mmx;
  195. }
  196. #endif
  197. #if HAVE_MMXEXT_INLINE
  198. if (INLINE_MMXEXT(cpu_flags))
  199. s->dct_quantize = dct_quantize_MMXEXT;
  200. #endif
  201. #if HAVE_SSE2_INLINE
  202. if (INLINE_SSE2(cpu_flags)) {
  203. s->dct_quantize = dct_quantize_SSE2;
  204. s->denoise_dct = denoise_dct_sse2;
  205. }
  206. #endif
  207. #if HAVE_SSSE3_INLINE
  208. if (INLINE_SSSE3(cpu_flags))
  209. s->dct_quantize = dct_quantize_SSSE3;
  210. #endif
  211. }
  212. }