You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

229 lines
8.7KB

  1. /*
  2. * The simplest mpeg encoder (well, it was the simplest!)
  3. * Copyright (c) 2000,2001 Fabrice Bellard
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/attributes.h"
  22. #include "libavutil/cpu.h"
  23. #include "libavutil/x86/asm.h"
  24. #include "libavutil/x86/cpu.h"
  25. #include "libavcodec/avcodec.h"
  26. #include "libavcodec/dct.h"
  27. #include "libavcodec/mpegvideo.h"
  28. /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
  29. DECLARE_ALIGNED(16, static const uint16_t, inv_zigzag_direct16)[64] = {
  30. 1, 2, 6, 7, 15, 16, 28, 29, 3, 5, 8, 14, 17, 27, 30, 43, 4, 9, 13,
  31. 18, 26, 31, 42, 44, 10, 12, 19, 25, 32, 41, 45, 54, 11, 20, 24, 33, 40, 46,
  32. 53, 55, 21, 23, 34, 39, 47, 52, 56, 61, 22, 35, 38, 48, 51, 57, 60, 62, 36,
  33. 37, 49, 50, 58, 59, 63, 64,
  34. };
  35. #if HAVE_MMX_INLINE
  36. #define COMPILE_TEMPLATE_MMXEXT 0
  37. #define COMPILE_TEMPLATE_SSE2 0
  38. #define COMPILE_TEMPLATE_SSSE3 0
  39. #define RENAME(a) a ## _mmx
  40. #define RENAME_FDCT(a) a ## _mmx
  41. #include "mpegvideoenc_template.c"
  42. #endif /* HAVE_MMX_INLINE */
  43. #if HAVE_MMXEXT_INLINE
  44. #undef COMPILE_TEMPLATE_SSSE3
  45. #undef COMPILE_TEMPLATE_SSE2
  46. #undef COMPILE_TEMPLATE_MMXEXT
  47. #define COMPILE_TEMPLATE_MMXEXT 1
  48. #define COMPILE_TEMPLATE_SSE2 0
  49. #define COMPILE_TEMPLATE_SSSE3 0
  50. #undef RENAME
  51. #undef RENAME_FDCT
  52. #define RENAME(a) a ## _mmxext
  53. #define RENAME_FDCT(a) a ## _mmxext
  54. #include "mpegvideoenc_template.c"
  55. #endif /* HAVE_MMXEXT_INLINE */
  56. #if HAVE_SSE2_INLINE
  57. #undef COMPILE_TEMPLATE_MMXEXT
  58. #undef COMPILE_TEMPLATE_SSE2
  59. #undef COMPILE_TEMPLATE_SSSE3
  60. #define COMPILE_TEMPLATE_MMXEXT 0
  61. #define COMPILE_TEMPLATE_SSE2 1
  62. #define COMPILE_TEMPLATE_SSSE3 0
  63. #undef RENAME
  64. #undef RENAME_FDCT
  65. #define RENAME(a) a ## _sse2
  66. #define RENAME_FDCT(a) a ## _sse2
  67. #include "mpegvideoenc_template.c"
  68. #endif /* HAVE_SSE2_INLINE */
  69. #if HAVE_SSSE3_INLINE
  70. #undef COMPILE_TEMPLATE_MMXEXT
  71. #undef COMPILE_TEMPLATE_SSE2
  72. #undef COMPILE_TEMPLATE_SSSE3
  73. #define COMPILE_TEMPLATE_MMXEXT 0
  74. #define COMPILE_TEMPLATE_SSE2 1
  75. #define COMPILE_TEMPLATE_SSSE3 1
  76. #undef RENAME
  77. #undef RENAME_FDCT
  78. #define RENAME(a) a ## _ssse3
  79. #define RENAME_FDCT(a) a ## _sse2
  80. #include "mpegvideoenc_template.c"
  81. #endif /* HAVE_SSSE3_INLINE */
  82. #if HAVE_INLINE_ASM
  83. static void denoise_dct_mmx(MpegEncContext *s, int16_t *block){
  84. const int intra= s->mb_intra;
  85. int *sum= s->dct_error_sum[intra];
  86. uint16_t *offset= s->dct_offset[intra];
  87. s->dct_count[intra]++;
  88. __asm__ volatile(
  89. "pxor %%mm7, %%mm7 \n\t"
  90. "1: \n\t"
  91. "pxor %%mm0, %%mm0 \n\t"
  92. "pxor %%mm1, %%mm1 \n\t"
  93. "movq (%0), %%mm2 \n\t"
  94. "movq 8(%0), %%mm3 \n\t"
  95. "pcmpgtw %%mm2, %%mm0 \n\t"
  96. "pcmpgtw %%mm3, %%mm1 \n\t"
  97. "pxor %%mm0, %%mm2 \n\t"
  98. "pxor %%mm1, %%mm3 \n\t"
  99. "psubw %%mm0, %%mm2 \n\t"
  100. "psubw %%mm1, %%mm3 \n\t"
  101. "movq %%mm2, %%mm4 \n\t"
  102. "movq %%mm3, %%mm5 \n\t"
  103. "psubusw (%2), %%mm2 \n\t"
  104. "psubusw 8(%2), %%mm3 \n\t"
  105. "pxor %%mm0, %%mm2 \n\t"
  106. "pxor %%mm1, %%mm3 \n\t"
  107. "psubw %%mm0, %%mm2 \n\t"
  108. "psubw %%mm1, %%mm3 \n\t"
  109. "movq %%mm2, (%0) \n\t"
  110. "movq %%mm3, 8(%0) \n\t"
  111. "movq %%mm4, %%mm2 \n\t"
  112. "movq %%mm5, %%mm3 \n\t"
  113. "punpcklwd %%mm7, %%mm4 \n\t"
  114. "punpckhwd %%mm7, %%mm2 \n\t"
  115. "punpcklwd %%mm7, %%mm5 \n\t"
  116. "punpckhwd %%mm7, %%mm3 \n\t"
  117. "paddd (%1), %%mm4 \n\t"
  118. "paddd 8(%1), %%mm2 \n\t"
  119. "paddd 16(%1), %%mm5 \n\t"
  120. "paddd 24(%1), %%mm3 \n\t"
  121. "movq %%mm4, (%1) \n\t"
  122. "movq %%mm2, 8(%1) \n\t"
  123. "movq %%mm5, 16(%1) \n\t"
  124. "movq %%mm3, 24(%1) \n\t"
  125. "add $16, %0 \n\t"
  126. "add $32, %1 \n\t"
  127. "add $16, %2 \n\t"
  128. "cmp %3, %0 \n\t"
  129. " jb 1b \n\t"
  130. : "+r" (block), "+r" (sum), "+r" (offset)
  131. : "r"(block+64)
  132. );
  133. }
  134. static void denoise_dct_sse2(MpegEncContext *s, int16_t *block){
  135. const int intra= s->mb_intra;
  136. int *sum= s->dct_error_sum[intra];
  137. uint16_t *offset= s->dct_offset[intra];
  138. s->dct_count[intra]++;
  139. __asm__ volatile(
  140. "pxor %%xmm7, %%xmm7 \n\t"
  141. "1: \n\t"
  142. "pxor %%xmm0, %%xmm0 \n\t"
  143. "pxor %%xmm1, %%xmm1 \n\t"
  144. "movdqa (%0), %%xmm2 \n\t"
  145. "movdqa 16(%0), %%xmm3 \n\t"
  146. "pcmpgtw %%xmm2, %%xmm0 \n\t"
  147. "pcmpgtw %%xmm3, %%xmm1 \n\t"
  148. "pxor %%xmm0, %%xmm2 \n\t"
  149. "pxor %%xmm1, %%xmm3 \n\t"
  150. "psubw %%xmm0, %%xmm2 \n\t"
  151. "psubw %%xmm1, %%xmm3 \n\t"
  152. "movdqa %%xmm2, %%xmm4 \n\t"
  153. "movdqa %%xmm3, %%xmm5 \n\t"
  154. "psubusw (%2), %%xmm2 \n\t"
  155. "psubusw 16(%2), %%xmm3 \n\t"
  156. "pxor %%xmm0, %%xmm2 \n\t"
  157. "pxor %%xmm1, %%xmm3 \n\t"
  158. "psubw %%xmm0, %%xmm2 \n\t"
  159. "psubw %%xmm1, %%xmm3 \n\t"
  160. "movdqa %%xmm2, (%0) \n\t"
  161. "movdqa %%xmm3, 16(%0) \n\t"
  162. "movdqa %%xmm4, %%xmm6 \n\t"
  163. "movdqa %%xmm5, %%xmm0 \n\t"
  164. "punpcklwd %%xmm7, %%xmm4 \n\t"
  165. "punpckhwd %%xmm7, %%xmm6 \n\t"
  166. "punpcklwd %%xmm7, %%xmm5 \n\t"
  167. "punpckhwd %%xmm7, %%xmm0 \n\t"
  168. "paddd (%1), %%xmm4 \n\t"
  169. "paddd 16(%1), %%xmm6 \n\t"
  170. "paddd 32(%1), %%xmm5 \n\t"
  171. "paddd 48(%1), %%xmm0 \n\t"
  172. "movdqa %%xmm4, (%1) \n\t"
  173. "movdqa %%xmm6, 16(%1) \n\t"
  174. "movdqa %%xmm5, 32(%1) \n\t"
  175. "movdqa %%xmm0, 48(%1) \n\t"
  176. "add $32, %0 \n\t"
  177. "add $64, %1 \n\t"
  178. "add $32, %2 \n\t"
  179. "cmp %3, %0 \n\t"
  180. " jb 1b \n\t"
  181. : "+r" (block), "+r" (sum), "+r" (offset)
  182. : "r"(block+64)
  183. XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
  184. "%xmm4", "%xmm5", "%xmm6", "%xmm7")
  185. );
  186. }
  187. #endif /* HAVE_INLINE_ASM */
  188. av_cold void ff_mpv_encode_init_x86(MpegEncContext *s)
  189. {
  190. const int dct_algo = s->avctx->dct_algo;
  191. if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) {
  192. #if HAVE_MMX_INLINE
  193. int cpu_flags = av_get_cpu_flags();
  194. if (INLINE_MMX(cpu_flags)) {
  195. s->dct_quantize = dct_quantize_mmx;
  196. s->denoise_dct = denoise_dct_mmx;
  197. }
  198. #endif
  199. #if HAVE_MMXEXT_INLINE
  200. if (INLINE_MMXEXT(cpu_flags))
  201. s->dct_quantize = dct_quantize_mmxext;
  202. #endif
  203. #if HAVE_SSE2_INLINE
  204. if (INLINE_SSE2(cpu_flags)) {
  205. s->dct_quantize = dct_quantize_sse2;
  206. s->denoise_dct = denoise_dct_sse2;
  207. }
  208. #endif
  209. #if HAVE_SSSE3_INLINE
  210. if (INLINE_SSSE3(cpu_flags))
  211. s->dct_quantize = dct_quantize_ssse3;
  212. #endif
  213. }
  214. }