diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index 3ebf1488f0..191688ce23 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -3627,8 +3627,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; } else{ + if(mm_flags & MM_MMXEXT){ c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; + } c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; } #endif diff --git a/libavcodec/i386/snowdsp_mmx.c b/libavcodec/i386/snowdsp_mmx.c index 6deaad2336..e7f4b2953b 100644 --- a/libavcodec/i386/snowdsp_mmx.c +++ b/libavcodec/i386/snowdsp_mmx.c @@ -111,22 +111,29 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ i = 0; asm volatile( - "psllw $2, %%xmm7 \n\t" + "psllw $13, %%xmm7 \n\t" + "pcmpeqw %%xmm6, %%xmm6 \n\t" + "psrlw $13, %%xmm6 \n\t" + "paddw %%xmm7, %%xmm6 \n\t" ::); for(; i> W_BS); asm volatile( - "psllw $2, %%mm7 \n\t" + "psllw $13, %%mm7 \n\t" + "pcmpeqw %%mm6, %%mm6 \n\t" + "psrlw $13, %%mm6 \n\t" + "paddw %%mm7, %%mm6 \n\t" ::); for(; i