[llvm] c650793 - [Reassociate] Enable FP reassociation via 'reassoc' and 'nsz'

Fri Jul 15 11:45:46 PDT 2022

Author: Warren Ristow
Date: 2022-07-15T11:44:35-07:00
New Revision: c6507930493bf57d88bcb1f7d83da1b3c08ab02a

URL: https://github.com/llvm/llvm-project/commit/c6507930493bf57d88bcb1f7d83da1b3c08ab02a
DIFF: https://github.com/llvm/llvm-project/commit/c6507930493bf57d88bcb1f7d83da1b3c08ab02a.diff

LOG: [Reassociate] Enable FP reassociation via 'reassoc' and 'nsz'

Compiling with '-ffast-math' tuns on all the FastMathFlags (FMF), as
expected, and that enables FP reassociation. Only the two FMF flags
'reassoc' and 'nsz' are technically required to perform reassociation,
but disabling other unrelated FMF bits is needlessly suppressing the
optimization.

This patch fixes that needless suppression, and makes appropriate
adjustments to test-cases, fixing some outstanding TODOs in the process.

Fixes: #56483

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D129523

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/Reassociate.cpp
    llvm/test/Transforms/PhaseOrdering/fast-basictest.ll
    llvm/test/Transforms/Reassociate/fast-basictest.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 63351dd58a291..240fb5e60687d 100644

--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -142,12 +142,21 @@ XorOpnd::XorOpnd(Value *V) {
   isOr = true;
 }
 
+/// Return true if I is an instruction with the FastMathFlags that are needed
+/// for general reassociation set.  This is not the same as testing
+/// Instruction::isAssociative() because it includes operations like fsub.
+/// (This routine is only intended to be called for floating-point operations.)
+static bool hasFPAssociativeFlags(Instruction *I) {
+  assert(I && I->getType()->isFPOrFPVectorTy() && "Should only check FP ops");
+  return I->hasAllowReassoc() && I->hasNoSignedZeros();
+}
+
 /// Return true if V is an instruction of the specified opcode and if it
 /// only has one use.
 static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
   auto *I = dyn_cast<Instruction>(V);
   if (I && I->hasOneUse() && I->getOpcode() == Opcode)
-    if (!isa<FPMathOperator>(I) || I->isFast())
+    if (!isa<FPMathOperator>(I) || hasFPAssociativeFlags(I))
       return cast<BinaryOperator>(I);
   return nullptr;
 }
@@ -157,7 +166,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
   auto *I = dyn_cast<Instruction>(V);
   if (I && I->hasOneUse() &&
       (I->getOpcode() == Opcode1 || I->getOpcode() == Opcode2))
-    if (!isa<FPMathOperator>(I) || I->isFast())
+    if (!isa<FPMathOperator>(I) || hasFPAssociativeFlags(I))
       return cast<BinaryOperator>(I);
   return nullptr;
 }
@@ -573,7 +582,7 @@ static bool LinearizeExprTree(Instruction *I,
       assert((!isa<Instruction>(Op) ||
               cast<Instruction>(Op)->getOpcode() != Opcode
               || (isa<FPMathOperator>(Op) &&
-                  !cast<Instruction>(Op)->isFast())) &&
+                  !hasFPAssociativeFlags(cast<Instruction>(Op)))) &&
              "Should have been handled above!");
       assert(Op->hasOneUse() && "Has uses outside the expression tree!");
 
@@ -2216,8 +2225,9 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
   if (Instruction *Res = canonicalizeNegFPConstants(I))
     I = Res;
 
-  // Don't optimize floating-point instructions unless they are 'fast'.
-  if (I->getType()->isFPOrFPVectorTy() && !I->isFast())
+  // Don't optimize floating-point instructions unless they have the
+  // appropriate FastMathFlags for reassociation enabled.
+  if (I->getType()->isFPOrFPVectorTy() && !hasFPAssociativeFlags(I))
     return;
 
   // Do not reassociate boolean (i1) expressions.  We want to preserve the

diff  --git a/llvm/test/Transforms/PhaseOrdering/fast-basictest.ll b/llvm/test/Transforms/PhaseOrdering/fast-basictest.ll
index f44ee73652a56..a5780ddc11605 100644
--- a/llvm/test/Transforms/PhaseOrdering/fast-basictest.ll
+++ b/llvm/test/Transforms/PhaseOrdering/fast-basictest.ll
@@ -122,14 +122,10 @@ define float @test15_unary_fneg(float %b, float %a) {
   ret float %4
 }
 
-; TODO: check if it is possible to perform the optimization without 'fast'
-; with 'reassoc' and 'nsz' only.
 define float @test15_reassoc_nsz(float %b, float %a) {
 ; CHECK-LABEL: @test15_reassoc_nsz(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[A:%.*]], 1.234000e+03
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc nsz float [[TMP1]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fsub reassoc nsz float [[TMP2]], [[A]]
-; CHECK-NEXT:    ret float [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[B:%.*]], 1.234000e+03
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %1 = fadd reassoc nsz float %a, 1234.0
   %2 = fadd reassoc nsz float %b, %1
@@ -197,15 +193,18 @@ define float @test16_unary_fneg(float %a, float %b, float %z) {
   ret float %g
 }
 
-; TODO: check if it is possible to perform the optimization without 'fast'
-; with 'reassoc' and 'nsz' only.
 define float @test16_reassoc_nsz(float %a, float %b, float %z) {
-; CHECK-LABEL: @test16_reassoc_nsz(
-; CHECK-NEXT:    [[C:%.*]] = fneg reassoc nsz float [[Z:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = fmul reassoc nsz float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[E:%.*]] = fmul reassoc nsz float [[D]], [[C]]
-; CHECK-NEXT:    [[G:%.*]] = fmul reassoc nsz float [[E]], -1.234500e+04
-; CHECK-NEXT:    ret float [[G]]
+; REASSOC_AND_IC-LABEL: @test16_reassoc_nsz(
+; REASSOC_AND_IC-NEXT:    [[C:%.*]] = fmul reassoc nsz float [[A:%.*]], 1.234500e+04
+; REASSOC_AND_IC-NEXT:    [[E:%.*]] = fmul reassoc nsz float [[C]], [[B:%.*]]
+; REASSOC_AND_IC-NEXT:    [[F:%.*]] = fmul reassoc nsz float [[E]], [[Z:%.*]]
+; REASSOC_AND_IC-NEXT:    ret float [[F]]
+;
+; O2-LABEL: @test16_reassoc_nsz(
+; O2-NEXT:    [[D:%.*]] = fmul reassoc nsz float [[A:%.*]], 1.234500e+04
+; O2-NEXT:    [[E:%.*]] = fmul reassoc nsz float [[D]], [[B:%.*]]
+; O2-NEXT:    [[G:%.*]] = fmul reassoc nsz float [[E]], [[Z:%.*]]
+; O2-NEXT:    ret float [[G]]
 ;
   %c = fsub reassoc nsz float 0.000000e+00, %z
   %d = fmul reassoc nsz float %a, %b
@@ -282,7 +281,7 @@ define float @test19(float %a, float %b, float %c) nounwind  {
 
 define float @test19_reassoc_nsz(float %a, float %b, float %c) nounwind  {
 ; CHECK-LABEL: @test19_reassoc_nsz(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[C:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[T7:%.*]] = fneg reassoc nsz float [[TMP1]]
 ; CHECK-NEXT:    ret float [[T7]]
 ;

diff  --git a/llvm/test/Transforms/Reassociate/fast-basictest.ll b/llvm/test/Transforms/Reassociate/fast-basictest.ll
index c6cb63d287dc1..189613c204c55 100644
--- a/llvm/test/Transforms/Reassociate/fast-basictest.ll
+++ b/llvm/test/Transforms/Reassociate/fast-basictest.ll
@@ -181,7 +181,6 @@ define float @test6_reassoc(float %A, float %B, float %C) {
 }
 
 ; (-X)*Y + Z -> Z-X*Y
-
 define float @test7(float %X, float %Y, float %Z) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
@@ -208,10 +207,9 @@ define float @test7_unary_fneg(float %X, float %Y, float %Z) {
 
 define float @test7_reassoc_nsz(float %X, float %Y, float %Z) {
 ; CHECK-LABEL: @test7_reassoc_nsz(
-; CHECK-NEXT:    [[A:%.*]] = fsub reassoc nsz float 0.000000e+00, [[X:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = fmul reassoc nsz float [[A]], [[Y:%.*]]
-; CHECK-NEXT:    [[C:%.*]] = fadd reassoc nsz float [[B]], [[Z:%.*]]
-; CHECK-NEXT:    ret float [[C]]
+; CHECK-NEXT:    [[B:%.*]] = fmul reassoc nsz float [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[Z:%.*]], [[B]]
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %A = fsub reassoc nsz float 0.0, %X
   %B = fmul reassoc nsz float %A, %Y
@@ -328,11 +326,10 @@ define float @test12_unary_fneg(float %X1, float %X2, float %X3) {
 
 define float @test12_reassoc_nsz(float %X1, float %X2, float %X3) {
 ; CHECK-LABEL: @test12_reassoc_nsz(
-; CHECK-NEXT:    [[A:%.*]] = fsub reassoc nsz float 0.000000e+00, [[X1:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = fmul reassoc nsz float [[A]], [[X2:%.*]]
-; CHECK-NEXT:    [[C:%.*]] = fmul reassoc nsz float [[X1]], [[X3:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = fadd reassoc nsz float [[B]], [[C]]
-; CHECK-NEXT:    ret float [[D]]
+; CHECK-NEXT:    [[B:%.*]] = fmul reassoc nsz float [[X2:%.*]], [[X1:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = fmul reassoc nsz float [[X3:%.*]], [[X1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[C]], [[B]]
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %A = fsub reassoc nsz float 0.000000e+00, %X1
   %B = fmul reassoc nsz float %A, %X2   ; -X1*X2
@@ -456,13 +453,12 @@ define float @test15_unary_fneg(float %b, float %a) {
   ret float %4
 }
 
+; TODO: check if we can remove dead fsub.
 define float @test15_reassoc_nsz(float %b, float %a) {
 ; CHECK-LABEL: @test15_reassoc_nsz(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[A:%.*]], 1.234000e+03
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc nsz float [[B:%.*]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fsub reassoc nsz float 0.000000e+00, [[A]]
-; CHECK-NEXT:    [[TMP4:%.*]] = fadd reassoc nsz float [[TMP3]], [[TMP2]]
-; CHECK-NEXT:    ret float [[TMP4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float 0.000000e+00, [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc nsz float [[B:%.*]], 1.234000e+03
+; CHECK-NEXT:    ret float [[TMP2]]
 ;
   %1 = fadd reassoc nsz float %a, 1234.0
   %2 = fadd reassoc nsz float %b, %1