From f2fd167835b6f039a593e46ab3a84e1b9a453660 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Fri, 22 Jun 2012 21:40:28 +0100
Subject: [PATCH 01/11] x86: vc1: fix and enable optimised loop filter

The problem is that the ssse3 psign instruction does the wrong
thing here.  Commit ea60dfe incorrectly removed a macro emulating
this instruction for pre-ssse3 code.  However, the emulation is
incorrect, and the code relies on the behaviour of the macro.
Specifically, the psign sets destination elements to zero where
the corresponding source element is zero, whereas the emulation
only negates destination elements where the source is negative.

Furthermore, the PSIGNW_MMX macro in x86util.asm is totally bogus,
which is why the original VC-1 code had an additional right shift
when using it.  Since the psign instruction cannot be used here,
skip all the macro hell and use the working instruction sequence
directly.

None of this was noticed due a stray return statement in
ff_vc1dsp_init_mmx() which meant that only the mmx version of the
loop filter was ever used (before being removed in ea60dfe).

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/x86/vc1dsp_mmx.c    | 2 +-
 libavcodec/x86/vc1dsp_yasm.asm | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c
index 4e996f1ce6..717f74f287 100644
--- a/libavcodec/x86/vc1dsp_mmx.c
+++ b/libavcodec/x86/vc1dsp_mmx.c
@@ -797,7 +797,7 @@ void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
     if (mm_flags & AV_CPU_FLAG_MMX) {
         dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
     }
-    return;
+
     if (mm_flags & AV_CPU_FLAG_MMX2) {
         ASSIGN_LF(mmx2);
         dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd;
diff --git a/libavcodec/x86/vc1dsp_yasm.asm b/libavcodec/x86/vc1dsp_yasm.asm
index 2c5cf22a0a..ced2b5ba88 100644
--- a/libavcodec/x86/vc1dsp_yasm.asm
+++ b/libavcodec/x86/vc1dsp_yasm.asm
@@ -119,7 +119,9 @@ section .text
     pand    m2, m6
     pand    m3, m2  ; d final
 
-    PSIGNW  m3, m7
+    psraw   m7, 15
+    pxor    m3, m7
+    psubw   m3, m7
     psubw   m0, m3
     paddw   m1, m3
     packuswb m0, m0
@@ -284,7 +286,6 @@ cglobal vc1_h_loop_filter8_sse2, 3,6,8
     RET
 
 %define PABSW PABSW_SSSE3
-%define PSIGNW PSIGNW_SSSE3
 
 INIT_MMX
 ; void ff_vc1_v_loop_filter4_ssse3(uint8_t *src, int stride, int pq)

From 5aff37d28d6a140f70f6cae996e6f55b21b1106d Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 24 Jun 2012 21:29:14 +0300
Subject: [PATCH 02/11] os_support: Include all the necessary headers for the
 win32 open function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

io.h is required for open and _wopen, and fcntl.h is required for
the O_CREAT flag. On mingw, fcntl.h is included by os_support.h (and
the mingw fcntl.h includes io.h), but include it explicitly here
since this implementation requires it.

Also move the #undef open up. open must not be defined to ff_win32_open
while including the headers that declare the open function. On mingw,
this happened in os_support.h before open was redirected.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/os_support.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavformat/os_support.c b/libavformat/os_support.c
index 0cbaf453db..8054ba6bbb 100644
--- a/libavformat/os_support.c
+++ b/libavformat/os_support.c
@@ -28,9 +28,11 @@
 #include "os_support.h"
 
 #if defined(_WIN32) && !defined(__MINGW32CE__)
+#undef open
+#include <fcntl.h>
+#include <io.h>
 #include <windows.h>
 
-#undef open
 int ff_win32_open(const char *filename_utf8, int oflag, int pmode)
 {
     int fd;

From 71078ad3338d850a24071e93b69d2109a943f73e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sun, 24 Jun 2012 22:36:37 +0300
Subject: [PATCH 03/11] os_support: Don't compare a negative number against
 socket descriptors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The fds are unsigned integers in the windows definition of struct
sockfds. Due to this, the comparison if (fds[i].fd > n) was always
false.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/os_support.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavformat/os_support.c b/libavformat/os_support.c
index 8054ba6bbb..49ec0c60d1 100644
--- a/libavformat/os_support.c
+++ b/libavformat/os_support.c
@@ -286,7 +286,7 @@ int poll(struct pollfd *fds, nfds_t numfds, int timeout)
     FD_ZERO(&write_set);
     FD_ZERO(&exception_set);
 
-    n = -1;
+    n = 0;
     for(i = 0; i < numfds; i++) {
         if (fds[i].fd < 0)
             continue;
@@ -301,22 +301,22 @@ int poll(struct pollfd *fds, nfds_t numfds, int timeout)
         if (fds[i].events & POLLOUT) FD_SET(fds[i].fd, &write_set);
         if (fds[i].events & POLLERR) FD_SET(fds[i].fd, &exception_set);
 
-        if (fds[i].fd > n)
-            n = fds[i].fd;
+        if (fds[i].fd >= n)
+            n = fds[i].fd + 1;
     };
 
-    if (n == -1)
+    if (n == 0)
         /* Hey!? Nothing to poll, in fact!!! */
         return 0;
 
     if (timeout < 0)
-        rc = select(n+1, &read_set, &write_set, &exception_set, NULL);
+        rc = select(n, &read_set, &write_set, &exception_set, NULL);
     else {
         struct timeval    tv;
 
         tv.tv_sec = timeout / 1000;
         tv.tv_usec = 1000 * (timeout % 1000);
-        rc = select(n+1, &read_set, &write_set, &exception_set, &tv);
+        rc = select(n, &read_set, &write_set, &exception_set, &tv);
     };
 
     if (rc < 0)

From cdee08e36582e443ff8a9bed17ec409551c9f93b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Mon, 25 Jun 2012 12:08:44 +0300
Subject: [PATCH 04/11] network: Check for struct pollfd
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to include winsock2.h here, to make sure we have the
real pollfd struct definition, if one exists, before defining the
fallback poll function.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 configure                | 3 +++
 libavformat/os_support.h | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/configure b/configure
index 1290369f15..a7ac43fa07 100755
--- a/configure
+++ b/configure
@@ -1140,6 +1140,7 @@ HAVE_LIST="
     struct_group_source_req
     struct_ip_mreq_source
     struct_ipv6_mreq
+    struct_pollfd
     struct_rusage_ru_maxrss
     struct_sockaddr_in6
     struct_sockaddr_sa_len
@@ -2818,6 +2819,7 @@ if enabled network; then
     check_type netinet/in.h "struct ip_mreq_source" -D_BSD_SOURCE
     check_type netinet/in.h "struct ipv6_mreq" -D_DARWIN_C_SOURCE
     check_type netinet/in.h "struct sockaddr_in6"
+    check_type poll.h "struct pollfd"
     check_type "sys/types.h sys/socket.h" "struct sockaddr_storage"
     check_struct "sys/types.h sys/socket.h" "struct sockaddr" sa_len
     check_header netinet/sctp.h
@@ -2834,6 +2836,7 @@ if enabled network; then
         check_type ws2tcpip.h "struct group_source_req"
         check_type ws2tcpip.h "struct ip_mreq_source"
         check_type ws2tcpip.h "struct ipv6_mreq"
+        check_type winsock2.h "struct pollfd"
         check_type ws2tcpip.h "struct sockaddr_in6"
         check_type ws2tcpip.h "struct sockaddr_storage"
         check_struct winsock2.h "struct sockaddr" sa_len
diff --git a/libavformat/os_support.h b/libavformat/os_support.h
index 3db20a9aa8..cda84b0296 100644
--- a/libavformat/os_support.h
+++ b/libavformat/os_support.h
@@ -75,6 +75,10 @@ typedef int socklen_t;
 #if !HAVE_POLL_H
 typedef unsigned long nfds_t;
 
+#if HAVE_WINSOCK2_H
+#include <winsock2.h>
+#endif
+#if !HAVE_STRUCT_POLLFD
 struct pollfd {
     int fd;
     short events;  /* events to look for */
@@ -94,6 +98,7 @@ struct pollfd {
 #define POLLERR    0x0004  /* errors pending */
 #define POLLHUP    0x0080  /* disconnected */
 #define POLLNVAL   0x1000  /* invalid file descriptor */
+#endif
 
 
 int poll(struct pollfd *fds, nfds_t numfds, int timeout);

From cab2eb87f9d692f543d11057dbfac4e590570b18 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Mon, 25 Jun 2012 12:27:37 +0300
Subject: [PATCH 05/11] os_support: Rename the poll fallback function to
 ff_poll
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The fallback function is a non-static function, we shouldn't be
defining non-static functions outside of the proper ff/av prefix
namespaces.

This is especially important for a function like poll, which
other parties (other libraries, or executables linking these
libraries) also might provide similar but incompatible fallbacks for.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/os_support.c | 2 +-
 libavformat/os_support.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavformat/os_support.c b/libavformat/os_support.c
index 49ec0c60d1..6d8c8ac6a3 100644
--- a/libavformat/os_support.c
+++ b/libavformat/os_support.c
@@ -266,7 +266,7 @@ int ff_socket_nonblock(int socket, int enable)
 }
 
 #if !HAVE_POLL_H
-int poll(struct pollfd *fds, nfds_t numfds, int timeout)
+int ff_poll(struct pollfd *fds, nfds_t numfds, int timeout)
 {
     fd_set read_set;
     fd_set write_set;
diff --git a/libavformat/os_support.h b/libavformat/os_support.h
index cda84b0296..dfb87ef43a 100644
--- a/libavformat/os_support.h
+++ b/libavformat/os_support.h
@@ -101,7 +101,8 @@ struct pollfd {
 #endif
 
 
-int poll(struct pollfd *fds, nfds_t numfds, int timeout);
+int ff_poll(struct pollfd *fds, nfds_t numfds, int timeout);
+#define poll ff_poll
 #endif /* HAVE_POLL_H */
 #endif /* CONFIG_NETWORK */
 

From 5a608a239b8193a420df7b1579999b71aa12f4a8 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 24 Jun 2012 21:21:07 +0300
Subject: [PATCH 06/11] os_support: Add fallback definitions for stat flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mingw headers provide similar defines already (unconditional #defines,
without any #undef or #ifdef around it), while MSVC doesn't have
them.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/os_support.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/libavformat/os_support.h b/libavformat/os_support.h
index dfb87ef43a..8e2eb83ccd 100644
--- a/libavformat/os_support.h
+++ b/libavformat/os_support.h
@@ -29,6 +29,8 @@
 
 #include "config.h"
 
+#include <sys/stat.h>
+
 #if defined(__MINGW32__) && !defined(__MINGW32CE__)
 #  include <fcntl.h>
 #  define lseek(f,p,w) _lseeki64((f), (p), (w))
@@ -55,6 +57,13 @@ static inline int is_dos_path(const char *path)
 #define SHUT_RD SD_RECEIVE
 #define SHUT_WR SD_SEND
 #define SHUT_RDWR SD_BOTH
+
+#ifndef S_IRUSR
+#define S_IRUSR S_IREAD
+#endif
+#ifndef S_IWUSR
+#define S_IWUSR S_IWRITE
+#endif
 #endif
 
 #if defined(_WIN32) && !defined(__MINGW32CE__)

From d3ed1c9571c24d33fc485fba01bc2d6ed4de18b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sat, 30 Jun 2012 01:28:02 +0300
Subject: [PATCH 07/11] cmdutils: Pass the actual chosen encoder to
 filter_codec_opts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows passing the right options to encoders when there's more
than one encoder for a certain codec id.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 avconv.c   | 4 ++--
 avplay.c   | 2 +-
 cmdutils.c | 9 +++++----
 cmdutils.h | 4 +++-
 4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/avconv.c b/avconv.c
index 3eb249dcee..3a7cebfc95 100644
--- a/avconv.c
+++ b/avconv.c
@@ -3441,7 +3441,7 @@ static void add_input_streams(OptionsContext *o, AVFormatContext *ic)
         ist->file_index = nb_input_files;
         ist->discard = 1;
         st->discard  = AVDISCARD_ALL;
-        ist->opts = filter_codec_opts(codec_opts, ist->st->codec->codec_id, ic, st);
+        ist->opts = filter_codec_opts(codec_opts, ist->st->codec->codec_id, ic, st, NULL);
 
         ist->ts_scale = 1.0;
         MATCH_PER_STREAM_OPT(ts_scale, dbl, ist->ts_scale, ic, st);
@@ -3768,7 +3768,7 @@ static OutputStream *new_output_stream(OptionsContext *o, AVFormatContext *oc, e
     st->codec->codec_type = type;
     choose_encoder(o, oc, ost);
     if (ost->enc) {
-        ost->opts  = filter_codec_opts(codec_opts, ost->enc->id, oc, st);
+        ost->opts  = filter_codec_opts(codec_opts, ost->enc->id, oc, st, ost->enc);
     }
 
     avcodec_get_context_defaults3(st->codec, ost->enc);
diff --git a/avplay.c b/avplay.c
index 6acb6c10e8..71844c274d 100644
--- a/avplay.c
+++ b/avplay.c
@@ -2081,7 +2081,7 @@ static int stream_component_open(VideoState *is, int stream_index)
         return -1;
     avctx = ic->streams[stream_index]->codec;
 
-    opts = filter_codec_opts(codec_opts, avctx->codec_id, ic, ic->streams[stream_index]);
+    opts = filter_codec_opts(codec_opts, avctx->codec_id, ic, ic->streams[stream_index], NULL);
 
     codec = avcodec_find_decoder(avctx->codec_id);
     avctx->debug_mv          = debug_mv;
diff --git a/cmdutils.c b/cmdutils.c
index abf89a3e43..6d13bd6b4d 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -948,17 +948,18 @@ int check_stream_specifier(AVFormatContext *s, AVStream *st, const char *spec)
 }
 
 AVDictionary *filter_codec_opts(AVDictionary *opts, enum CodecID codec_id,
-                                AVFormatContext *s, AVStream *st)
+                                AVFormatContext *s, AVStream *st, AVCodec *codec)
 {
     AVDictionary    *ret = NULL;
     AVDictionaryEntry *t = NULL;
-    AVCodec       *codec = s->oformat ? avcodec_find_encoder(codec_id)
-                                      : avcodec_find_decoder(codec_id);
     int            flags = s->oformat ? AV_OPT_FLAG_ENCODING_PARAM
                                       : AV_OPT_FLAG_DECODING_PARAM;
     char          prefix = 0;
     const AVClass    *cc = avcodec_get_class();
 
+    if (!codec)
+        codec            = s->oformat ? avcodec_find_encoder(codec_id)
+                                      : avcodec_find_decoder(codec_id);
     if (!codec)
         return NULL;
 
@@ -1020,7 +1021,7 @@ AVDictionary **setup_find_stream_info_opts(AVFormatContext *s,
     }
     for (i = 0; i < s->nb_streams; i++)
         opts[i] = filter_codec_opts(codec_opts, s->streams[i]->codec->codec_id,
-                                    s, s->streams[i]);
+                                    s, s->streams[i], NULL);
     return opts;
 }
 
diff --git a/cmdutils.h b/cmdutils.h
index 5dac13037a..793a1e83c4 100644
--- a/cmdutils.h
+++ b/cmdutils.h
@@ -214,10 +214,12 @@ int check_stream_specifier(AVFormatContext *s, AVStream *st, const char *spec);
  *
  * @param s Corresponding format context.
  * @param st A stream from s for which the options should be filtered.
+ * @param codec The particular codec for which the options should be filtered.
+ *              If null, the default one is looked up according to the codec id.
  * @return a pointer to the created dictionary
  */
 AVDictionary *filter_codec_opts(AVDictionary *opts, enum CodecID codec_id,
-                                AVFormatContext *s, AVStream *st);
+                                AVFormatContext *s, AVStream *st, AVCodec *codec);
 
 /**
  * Setup AVCodecContext options for avformat_find_stream_info().

From 4f2c846d9644640cd881b7a7a48d1785a52f5c25 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 24 Jun 2012 20:57:14 +0300
Subject: [PATCH 08/11] attributes: Add a definition of av_always_inline for
 MSVC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavutil/attributes.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavutil/attributes.h b/libavutil/attributes.h
index ef990a1d4f..c0bac3f309 100644
--- a/libavutil/attributes.h
+++ b/libavutil/attributes.h
@@ -35,6 +35,8 @@
 #ifndef av_always_inline
 #if AV_GCC_VERSION_AT_LEAST(3,1)
 #    define av_always_inline __attribute__((always_inline)) inline
+#elif defined(_MSC_VER)
+#    define av_always_inline __forceinline
 #else
 #    define av_always_inline inline
 #endif

From 66a02159ea9a09965dfa3e06ea55f41e5f615f90 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 14 Jun 2012 15:03:08 +0100
Subject: [PATCH 09/11] x86: fmtconvert: add special asm for
 float_to_int16_interleave_misc_*
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This gets rid of a variable-length array and a for loop in C code.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavcodec/x86/fmtconvert.asm   | 78 +++++++++++++++++++++++++++++++++
 libavcodec/x86/fmtconvert_mmx.c | 12 ++---
 2 files changed, 85 insertions(+), 5 deletions(-)

diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 63befc94f6..4916e7af33 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -115,6 +115,84 @@ FLOAT_TO_INT16 sse, 0
 FLOAT_TO_INT16 3dnow, 0
 %undef cvtps2pi
 
+;------------------------------------------------------------------------------
+; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step);
+;------------------------------------------------------------------------------
+%macro FLOAT_TO_INT16_STEP 2
+cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2
+    add       lenq, lenq
+    lea       srcq, [srcq+2*lenq]
+    lea     step3q, [stepq*3]
+    neg       lenq
+.loop:
+%ifidn %1, sse2
+    cvtps2dq    m0, [srcq+2*lenq   ]
+    cvtps2dq    m1, [srcq+2*lenq+16]
+    packssdw    m0, m1
+    movd       v1d, m0
+    psrldq      m0, 4
+    movd       v2d, m0
+    psrldq      m0, 4
+    mov     [dstq], v1w
+    mov  [dstq+stepq*4], v2w
+    shr        v1d, 16
+    shr        v2d, 16
+    mov  [dstq+stepq*2], v1w
+    mov  [dstq+step3q*2], v2w
+    lea       dstq, [dstq+stepq*8]
+    movd       v1d, m0
+    psrldq      m0, 4
+    movd       v2d, m0
+    mov     [dstq], v1w
+    mov  [dstq+stepq*4], v2w
+    shr        v1d, 16
+    shr        v2d, 16
+    mov  [dstq+stepq*2], v1w
+    mov  [dstq+step3q*2], v2w
+    lea       dstq, [dstq+stepq*8]
+%else
+    cvtps2pi    m0, [srcq+2*lenq   ]
+    cvtps2pi    m1, [srcq+2*lenq+ 8]
+    cvtps2pi    m2, [srcq+2*lenq+16]
+    cvtps2pi    m3, [srcq+2*lenq+24]
+    packssdw    m0, m1
+    packssdw    m2, m3
+    movd       v1d, m0
+    psrlq       m0, 32
+    movd       v2d, m0
+    mov     [dstq], v1w
+    mov  [dstq+stepq*4], v2w
+    shr        v1d, 16
+    shr        v2d, 16
+    mov  [dstq+stepq*2], v1w
+    mov  [dstq+step3q*2], v2w
+    lea       dstq, [dstq+stepq*8]
+    movd       v1d, m2
+    psrlq       m2, 32
+    movd       v2d, m2
+    mov     [dstq], v1w
+    mov  [dstq+stepq*4], v2w
+    shr        v1d, 16
+    shr        v2d, 16
+    mov  [dstq+stepq*2], v1w
+    mov  [dstq+step3q*2], v2w
+    lea       dstq, [dstq+stepq*8]
+%endif
+    add       lenq, 16
+    js .loop
+%ifnidn %1, sse2
+    emms
+%endif
+    REP_RET
+%endmacro
+
+INIT_XMM
+FLOAT_TO_INT16_STEP sse2, 2
+INIT_MMX
+FLOAT_TO_INT16_STEP sse, 0
+%define cvtps2pi pf2id
+FLOAT_TO_INT16_STEP 3dnow, 0
+%undef cvtps2pi
 
 ;-------------------------------------------------------------------------------
 ; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
diff --git a/libavcodec/x86/fmtconvert_mmx.c b/libavcodec/x86/fmtconvert_mmx.c
index 42cb0bc85b..aaf634d37f 100644
--- a/libavcodec/x86/fmtconvert_mmx.c
+++ b/libavcodec/x86/fmtconvert_mmx.c
@@ -25,6 +25,7 @@
 #include "libavutil/cpu.h"
 #include "libavutil/x86_cpu.h"
 #include "libavcodec/fmtconvert.h"
+#include "libavcodec/dsputil.h"
 
 #if HAVE_YASM
 
@@ -35,6 +36,10 @@ void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len);
 void ff_float_to_int16_sse  (int16_t *dst, const float *src, long len);
 void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len);
 
+void ff_float_to_int16_step_3dnow(int16_t *dst, const float *src, long len, long step);
+void ff_float_to_int16_step_sse  (int16_t *dst, const float *src, long len, long step);
+void ff_float_to_int16_step_sse2 (int16_t *dst, const float *src, long len, long step);
+
 void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len);
 void ff_float_to_int16_interleave2_sse  (int16_t *dst, const float **src, long len);
 void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len);
@@ -48,12 +53,9 @@ void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len
 #define FLOAT_TO_INT16_INTERLEAVE(cpu) \
 /* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
-    DECLARE_ALIGNED(16, int16_t, tmp)[len];\
-    int i,j,c;\
+    int c;\
     for(c=0; c<channels; c++){\
-        ff_float_to_int16_##cpu(tmp, src[c], len);\
-        for(i=0, j=c; i<len; i++, j+=channels)\
-            dst[j] = tmp[i];\
+        ff_float_to_int16_step_##cpu(dst+c, src[c], len, channels);\
     }\
 }\
 \

From e9156741a763ac7ce48933f489f3e0a13e9a4df7 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 14 Jun 2012 15:57:26 -0700
Subject: [PATCH 10/11] dnxhdenc: add space between function argument type and
 comment.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavcodec/dnxhdenc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/dnxhdenc.h b/libavcodec/dnxhdenc.h
index 861546a54c..7e2f96f9d2 100644
--- a/libavcodec/dnxhdenc.h
+++ b/libavcodec/dnxhdenc.h
@@ -92,7 +92,7 @@ typedef struct DNXHDEncContext {
     RCCMPEntry *mb_cmp;
     RCEntry   (*mb_rc)[8160];
 
-    void (*get_pixels_8x4_sym)(DCTELEM */*align 16*/, const uint8_t *, int);
+    void (*get_pixels_8x4_sym)(DCTELEM * /*align 16*/, const uint8_t *, int);
 } DNXHDEncContext;
 
 void ff_dnxhd_init_mmx(DNXHDEncContext *ctx);

From ceabc13f129cd6344b1eebdbe10119083fe5520e Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 14 Jun 2012 22:05:10 +0000
Subject: [PATCH 11/11] dsputilenc_mmx: split assignment of ff_sse16_sse2 to
 SSE2 section.

---
 libavcodec/x86/dsputilenc_mmx.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index 2a403ba019..d8a60e106b 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -1127,8 +1127,8 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
 #endif
 
         c->pix_norm1 = pix_norm1_mmx;
-        c->sse[0] = (HAVE_YASM && mm_flags & AV_CPU_FLAG_SSE2) ? ff_sse16_sse2 : sse16_mmx;
-          c->sse[1] = sse8_mmx;
+        c->sse[0] = sse16_mmx;
+        c->sse[1] = sse8_mmx;
         c->vsad[4]= vsad_intra16_mmx;
 
         c->nsse[0] = nsse16_mmx;
@@ -1164,9 +1164,12 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
             if (bit_depth <= 8)
                 c->get_pixels = get_pixels_sse2;
             c->sum_abs_dctelem= sum_abs_dctelem_sse2;
-#if HAVE_YASM && HAVE_ALIGNED_STACK
+#if HAVE_YASM
+            c->sse[0] = ff_sse16_sse2;
+#if HAVE_ALIGNED_STACK
             c->hadamard8_diff[0]= ff_hadamard8_diff16_sse2;
             c->hadamard8_diff[1]= ff_hadamard8_diff_sse2;
+#endif
 #endif
         }