[llvm] f765824 - [AArch64] Consider instruction-level contract FMFs in combiner patterns.

Tue Aug 4 02:29:55 PDT 2020

Author: Florian Hahn
Date: 2020-08-04T10:25:16+01:00
New Revision: f7658241cb27491b4160a1f7060ef883bc535d09

URL: https://github.com/llvm/llvm-project/commit/f7658241cb27491b4160a1f7060ef883bc535d09
DIFF: https://github.com/llvm/llvm-project/commit/f7658241cb27491b4160a1f7060ef883bc535d09.diff

LOG: [AArch64] Consider instruction-level contract FMFs in combiner patterns.

Currently, instruction level fast math flags are not considered when
generating patterns for the machine combiner.

This currently leads to some missed opportunities to generate FMAs in
combination with `#pragma clang fp contract (fast)`.

For example, when building the example below with -O3 for AArch64, no
FMADD is generated. If built with -O2 and the DAGCombiner is used
instead of the MachineCombiner for FMAs, an FMADD is generated.

With this patch, the same code is generated in both cases.

    float madd_contract(float a, float b, float c) {
    #pragma clang fp contract (fast)
      return (a * b) + c;
    }

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D84930

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir
    llvm/test/CodeGen/AArch64/neon-fma-FMF.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 323ac76e903fd..b6fda6b367bf2 100644

--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -3861,7 +3861,7 @@ static bool isCombineInstrCandidate64(unsigned Opc) {
   return false;
 }
 
-// FP Opcodes that can be combined with a FMUL
+// FP Opcodes that can be combined with a FMUL.
 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
   switch (Inst.getOpcode()) {
   default:
@@ -3883,8 +3883,12 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
   case AArch64::FSUBv2f64:
   case AArch64::FSUBv4f32:
     TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
-    return (Options.UnsafeFPMath ||
-            Options.AllowFPOpFusion == FPOpFusion::Fast);
+    // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
+    // the target options or if FADD/FSUB has the contract fast-math flag.
+    return Options.UnsafeFPMath ||
+           Options.AllowFPOpFusion == FPOpFusion::Fast ||
+           Inst.getFlag(MachineInstr::FmContract);
+    return true;
   }
   return false;
 }

diff  --git a/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir b/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir
index 23ed96a42b513..992e636011b6b 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir
+++ b/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir
@@ -6,8 +6,7 @@
 # CHECK:        [[C:%.*]]:fpr32 = COPY $s2
 # CHECK-NEXT:   [[B:%.*]]:fpr32 = COPY $s1
 # CHECK-NEXT:   [[A:%.*]]:fpr32 = COPY $s0
-# CHECK-NEXT:   [[MUL:%.*]]:fpr32 = nnan ninf nsz arcp contract afn reassoc FMULSrr [[B]], [[A]]
-# CHECK-NEXT:   fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr killed [[MUL]], [[C]]
+# CHECK-NEXT:   :fpr32 = FMADDSrrr [[B]], [[A]], [[C]]
 ---
 name:            scalar_fmadd_fast
 alignment:       4
@@ -46,8 +45,7 @@ body:             |
 # CHECK:        [[C:%.*]]:fpr32 = COPY $s2
 # CHECK-NEXT:   [[B:%.*]]:fpr32 = COPY $s1
 # CHECK-NEXT:   [[A:%.*]]:fpr32 = COPY $s0
-# CHECK-NEXT:   [[MUL:%.*]]:fpr32 = contract FMULSrr [[B]], [[A]]
-# CHECK-NEXT:   fpr32 = contract FADDSrr killed [[MUL]], [[C]]
+# CHECK-NEXT:   :fpr32 = FMADDSrrr [[B]], [[A]], [[C]]
 
 ---
 name:            scalar_fmadd_contract
@@ -81,7 +79,7 @@ body:             |
 
 ...
 
-# Do not create FMADD, because we don't have the contract flag on moth instructions.
+# Do not create FMADD, because we don't have the contract flag on the FADD.
 
 # CHECK-LABEL: name: scalar_fmadd_contract_op0
 # CHECK:        [[C:%.*]]:fpr32 = COPY $s2
@@ -121,14 +119,13 @@ body:             |
 
 ...
 
-# Do not create FMADD, because we don't have the contract flag on moth instructions.
+# Do create FMADD, because we have the contract flag on the FADD.
 #
 # CHECK-LABEL: name: scalar_fmadd_contract_op1
 # CHECK:        [[C:%.*]]:fpr32 = COPY $s2
 # CHECK-NEXT:   [[B:%.*]]:fpr32 = COPY $s1
 # CHECK-NEXT:   [[A:%.*]]:fpr32 = COPY $s0
-# CHECK-NEXT:   [[MUL:%.*]]:fpr32 = FMULSrr [[B]], [[A]]
-# CHECK-NEXT:   fpr32 = contract FADDSrr killed [[MUL]], [[C]]
+# CHECK-NEXT:   :fpr32 = FMADDSrrr [[B]], [[A]], [[C]]
 
 ---
 name:            scalar_fmadd_contract_op1
@@ -203,14 +200,13 @@ body:             |
 
 ...
 
-# Can create FMADD, because both the fmul and fadd have all fast-math flags.
+# Can create FMLA, because both the fmul and fadd have all fast-math flags.
 #
 # CHECK-LABEL: name: vector_fmadd_fast
 # CHECK:       [[C:%.*]]:fpr128 = COPY $q2
 # CHECK-NEXT:  [[B:%.*]]:fpr128 = COPY $q1
 # CHECK-NEXT:  [[A:%.*]]:fpr128 = COPY $q0
-# CHECK-NEXT:  [[MUL:%.*]]:fpr128 = nnan ninf nsz arcp contract afn reassoc FMULv2f64 [[B]], [[A]]
-# CHECK-NEXT:  fpr128 = nnan ninf nsz arcp contract afn reassoc FADDv2f64 killed [[MUL]], [[C]]
+# CHECK-NEXT:  fpr128 = FMLAv2f64 [[C]], [[B]], [[A]]
 ---
 name:            vector_fmadd_fast
 alignment:       4
@@ -243,14 +239,13 @@ body:             |
 
 ...
 
-# Can create FMADD, because both the fmul and fadd have the contract fast-math flag.
+# Can create FMLA, because both the fmul and fadd have the contract fast-math flag.
 #
 # CHECK-LABEL: name: vector_fmadd_contract
 # CHECK:       [[C:%.*]]:fpr128 = COPY $q2
 # CHECK-NEXT:  [[B:%.*]]:fpr128 = COPY $q1
 # CHECK-NEXT:  [[A:%.*]]:fpr128 = COPY $q0
-# CHECK-NEXT:  [[MUL:%.*]]:fpr128 = contract FMULv2f64 [[B]], [[A]]
-# CHECK-NEXT:  fpr128 = contract FADDv2f64 killed [[MUL]], [[C]]
+# CHECK-NEXT:  fpr128 = FMLAv2f64 [[C]], [[B]], [[A]]
 ---
 name:            vector_fmadd_contract
 alignment:       4
@@ -283,7 +278,7 @@ body:             |
 
 ...
 
-# Do not create FMADD, because we don't have the contract flag on moth instructions.
+# Do not create FMLA, because we don't have the contract flag on the FADD.
 #
 # CHECK-LABEL: name: vector_fmadd_contract_op0
 # CHECK:       [[C:%.*]]:fpr128 = COPY $q2
@@ -323,14 +318,13 @@ body:             |
 
 ...
 
-# Do not create FMADD, because we don't have the contract flag on moth instructions.
+# Do create FMLA, because we have the contract flag on the FADD.
 #
 # CHECK-LABEL: name: vector_fmadd_contract_op1
 # CHECK:       [[C:%.*]]:fpr128 = COPY $q2
 # CHECK-NEXT:  [[B:%.*]]:fpr128 = COPY $q1
 # CHECK-NEXT:  [[A:%.*]]:fpr128 = COPY $q0
-# CHECK-NEXT:  [[MUL:%.*]]:fpr128 = FMULv2f64 [[B]], [[A]]
-# CHECK-NEXT:  fpr128 = contract FADDv2f64 killed [[MUL]], [[C]]
+# CHECK-NEXT:  fpr128 = FMLAv2f64 [[C]], [[B]], [[A]]
 
 ---
 name:            vector_fmadd_contract_op1
@@ -364,7 +358,7 @@ body:             |
 
 ...
 
-# Do not create FMADD, as nsz flag does not allow it.
+# Do not create FMLA, as nsz flag does not allow it.
 #
 # CHECK-LABEL: name: vector_fmadd_nsz
 # CHECK:       [[C:%.*]]:fpr128 = COPY $q2

diff  --git a/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll b/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll
index 893d153801a71..0eb173396ce97 100644
--- a/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll
+++ b/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -O3 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 define <2 x float> @fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
 ; CHECK-LABEL: fma_1: