From 79687079a97a039c325ab79d7a95920d800b791f Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Mon, 18 Jun 2012 23:39:14 -0400 Subject: [PATCH] x86: add support for fmaddps fma4 instruction with abstraction to avx/sse --- configure | 5 +++++ libavutil/x86/x86inc.asm | 16 +++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/configure b/configure index fd90369212..715e49b020 100755 --- a/configure +++ b/configure @@ -242,6 +242,7 @@ Optimization options (experts only): --disable-sse disable SSE optimizations --disable-ssse3 disable SSSE3 optimizations --disable-avx disable AVX optimizations + --disable-fma4 disable FMA4 optimizations --disable-armv5te disable armv5te optimizations --disable-armv6 disable armv6 optimizations --disable-armv6t2 disable armv6t2 optimizations @@ -1047,6 +1048,7 @@ ARCH_EXT_LIST=' armv6t2 armvfp avx + fma4 mmi mmx mmx2 @@ -1295,6 +1297,7 @@ mmx2_deps="mmx" sse_deps="mmx" ssse3_deps="sse" avx_deps="ssse3" +fma4_deps="avx" aligned_stack_if_any="ppc x86" fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64" @@ -2865,6 +2868,7 @@ EOF check_yasm "pextrd [eax], xmm0, 1" && enable yasm || die "yasm not found, use --disable-yasm for a crippled build" check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx + check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4 fi case "$cpu" in @@ -3292,6 +3296,7 @@ if enabled x86; then echo "SSE enabled ${sse-no}" echo "SSSE3 enabled ${ssse3-no}" echo "AVX enabled ${avx-no}" + echo "FMA4 enabled ${fma4-no}" echo "CMOV enabled ${cmov-no}" echo "CMOV is fast ${fast_cmov-no}" echo "EBX available ${ebx_available-no}" diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index b76a10ca5e..4b4a19b208 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -1093,16 +1093,22 @@ AVX_INSTR pfmul, 1, 0, 1 %undef j %macro FMA_INSTR 3 - %macro %1 4-7 %1, %2, %3 - %if cpuflag(xop) - v%5 %1, %2, %3, %4 + %macro %1 5-8 %1, %2, %3 + %if cpuflag(xop) || cpuflag(fma4) + v%6 %1, %2, %3, %4 %else - %6 %1, %2, %3 - %7 %1, %4 + %ifidn %1, %4 + %7 %5, %2, %3 + %8 %1, %4, %5 + %else + %7 %1, %2, %3 + %8 %1, %4 + %endif %endif %endmacro %endmacro +FMA_INSTR fmaddps, mulps, addps FMA_INSTR pmacsdd, pmulld, paddd FMA_INSTR pmacsww, pmullw, paddw FMA_INSTR pmadcswd, pmaddwd, paddd