diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index 5d9d4995c4..51ed07cd1a 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -1621,11 +1621,9 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride "movq 64(%1), %%mm1 \n\t" MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0) - "movq %%mm0, %%mm1 \n\t" - "psrlq $32, %%mm0 \n\t" + "pshufw $0x0E, %%mm0, %%mm1 \n\t" "paddusw %%mm1, %%mm0 \n\t" - "movq %%mm0, %%mm1 \n\t" - "psrlq $16, %%mm0 \n\t" + "pshufw $0x01, %%mm0, %%mm1 \n\t" "paddusw %%mm1, %%mm0 \n\t" "movd %%mm0, %0 \n\t" diff --git a/libavcodec/i386/mpegvideo_mmx_template.c b/libavcodec/i386/mpegvideo_mmx_template.c index c9354dc1bb..93f156ee55 100644 --- a/libavcodec/i386/mpegvideo_mmx_template.c +++ b/libavcodec/i386/mpegvideo_mmx_template.c @@ -22,7 +22,11 @@ #ifdef HAVE_MMX2 #define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t" #define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t" - +#define PMAX(a,b) \ + "pshufw $0x0E," #a ", " #b " \n\t"\ + PMAXW(b, a)\ + "pshufw $0x01," #a ", " #b " \n\t"\ + PMAXW(b, a) #else #define SPREADW(a) \ "punpcklwd " #a ", " #a " \n\t"\ @@ -30,6 +34,14 @@ #define PMAXW(a,b) \ "psubusw " #a ", " #b " \n\t"\ "paddw " #a ", " #b " \n\t" +#define PMAX(a,b) \ + "movq " #a ", " #b " \n\t"\ + "psrlq $32, " #a " \n\t"\ + PMAXW(b, a)\ + "movq " #a ", " #b " \n\t"\ + "psrlq $16, " #a " \n\t"\ + PMAXW(b, a) + #endif static int RENAME(dct_quantize)(MpegEncContext *s, @@ -119,12 +131,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, PMAXW(%%mm0, %%mm3) "add $8, %%"REG_a" \n\t" " js 1b \n\t" - "movq %%mm3, %%mm0 \n\t" - "psrlq $32, %%mm3 \n\t" - PMAXW(%%mm0, %%mm3) - "movq %%mm3, %%mm0 \n\t" - "psrlq $16, %%mm3 \n\t" - PMAXW(%%mm0, %%mm3) + PMAX(%%mm3, %%mm0) "movd %%mm3, %%"REG_a" \n\t" "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 : "+a" (last_non_zero_p1) @@ -170,12 +177,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, PMAXW(%%mm0, %%mm3) "add $8, %%"REG_a" \n\t" " js 1b \n\t" - "movq %%mm3, %%mm0 \n\t" - "psrlq $32, %%mm3 \n\t" - PMAXW(%%mm0, %%mm3) - "movq %%mm3, %%mm0 \n\t" - "psrlq $16, %%mm3 \n\t" - PMAXW(%%mm0, %%mm3) + PMAX(%%mm3, %%mm0) "movd %%mm3, %%"REG_a" \n\t" "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 : "+a" (last_non_zero_p1)