[llvm] r337223 - [X86] Add a missing FMA3 scalar intrinsic pattern.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 16 16:10:58 PDT 2018


Author: ctopper
Date: Mon Jul 16 16:10:58 2018
New Revision: 337223

URL: http://llvm.org/viewvc/llvm-project?rev=337223&view=rev
Log:
[X86] Add a missing FMA3 scalar intrinsic pattern.

This allows us to use 231 form to fold an insertelement on the add input to the fma. There is technically no software intrinsic that can use this until AVX512F, but it can be manually built up from other intrinsics.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrFMA.td
    llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrFMA.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFMA.td?rev=337223&r1=337222&r2=337223&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFMA.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFMA.td Mon Jul 16 16:10:58 2018
@@ -341,6 +341,13 @@ multiclass scalar_fma_patterns<SDNode Op
                (VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
 
     def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
+                (Op RC:$src2, RC:$src3,
+                    (EltVT (extractelt (VT VR128:$src1), (iPTR 0)))))))),
+              (!cast<Instruction>(Prefix#"231"#Suffix#"r_Int")
+               VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
+               (VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
+
+    def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
                 (Op RC:$src2,
                     (EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
                     (mem_frag addr:$src3)))))),

Modified: llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll?rev=337223&r1=337222&r2=337223&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll Mon Jul 16 16:10:58 2018
@@ -64,6 +64,37 @@ define <4 x float> @test_x86_fma_vfmadd_
   ret <4 x float> %5
 }
 
+define <4 x float> @test_x86_fma_vfmadd_ss_231(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ss_231:
+; CHECK-FMA:       # %bb.0:
+; CHECK-FMA-NEXT:    vfmadd231ss %xmm1, %xmm0, %xmm2 # encoding: [0xc4,0xe2,0x79,0xb9,0xd1]
+; CHECK-FMA-NEXT:    # xmm2 = (xmm0 * xmm1) + xmm2
+; CHECK-FMA-NEXT:    vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
+; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
+;
+; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ss_231:
+; CHECK-AVX512VL:       # %bb.0:
+; CHECK-AVX512VL-NEXT:    vfmadd231ss %xmm1, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0xd1]
+; CHECK-AVX512VL-NEXT:    # xmm2 = (xmm0 * xmm1) + xmm2
+; CHECK-AVX512VL-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
+;
+; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ss_231:
+; CHECK-FMA-WIN:       # %bb.0:
+; CHECK-FMA-WIN-NEXT:    vmovaps (%r8), %xmm0 # encoding: [0xc4,0xc1,0x78,0x28,0x00]
+; CHECK-FMA-WIN-NEXT:    vmovss (%rcx), %xmm1 # encoding: [0xc5,0xfa,0x10,0x09]
+; CHECK-FMA-WIN-NEXT:    # xmm1 = mem[0],zero,zero,zero
+; CHECK-FMA-WIN-NEXT:    vfmadd231ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xb9,0x02]
+; CHECK-FMA-WIN-NEXT:    # xmm0 = (xmm1 * mem) + xmm0
+; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
+  %1 = extractelement <4 x float> %a0, i64 0
+  %2 = extractelement <4 x float> %a1, i64 0
+  %3 = extractelement <4 x float> %a2, i64 0
+  %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
+  %5 = insertelement <4 x float> %a2, float %4, i64 0
+  ret <4 x float> %5
+}
+
 define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_sd:
 ; CHECK-FMA:       # %bb.0:




More information about the llvm-commits mailing list