[llvm] [InstCombine] Do not require nsz for reassociating fadd (PR #93782)

Thu May 30 01:16:46 PDT 2024

https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/93782

None

>From d7517d51e57ec3e8e3dd54c4386c5272b2d57c2a Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 30 May 2024 08:51:48 +0100
Subject: [PATCH] [InstCombine] Do not require nsz for reassociating fadd

---
 llvm/lib/IR/Instruction.cpp                   |  3 ++-
 .../InstCombine/2006-10-26-VectorReassoc.ll   | 18 +++++++--------
 llvm/test/Transforms/InstCombine/fast-math.ll | 17 +++++---------
 .../LoopVectorize/X86/float-induction-x86.ll  |  6 ++---
 .../PhaseOrdering/fast-basictest.ll           | 15 ++++++++-----
 .../Transforms/Reassociate/fast-MissedTree.ll | 22 +++++++++++++++----
 6 files changed, 47 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 29272e627a1d1..7557909e1e40f 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -1204,9 +1204,10 @@ bool Instruction::isAssociative() const {
 
   switch (Opcode) {
   case FMul:
-  case FAdd:
     return cast<FPMathOperator>(this)->hasAllowReassoc() &&
            cast<FPMathOperator>(this)->hasNoSignedZeros();
+  case FAdd:
+    return cast<FPMathOperator>(this)->hasAllowReassoc();
   default:
     return false;
   }
diff --git a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
index fb860a5e7bdf3..672473f9a9ca5 100644
--- a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
+++ b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@@ -81,12 +81,11 @@ define <4 x float> @test_fadd_reassoc_nsz(<4 x float> %V) {
 }
 
 ; (V + C1) + C2 => V + (C1 + C2)
-; TODO: This doesn't require 'nsz'.  It should fold to V + { 2.0, 4.0, 0.0, 8.0 }
+; Verify this folds with 'reassoc'
 define <4 x float> @test_fadd_reassoc(<4 x float> %V) {
 ; CHECK-LABEL: @test_fadd_reassoc(
-; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT:     [[TMP2:%.*]] = fadd reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
         %Y = fadd reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
         %Z = fadd reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
         ret <4 x float> %Z
@@ -119,7 +118,7 @@ define <4 x float> @test_fadds_cancel_fast(<4 x float> %A, <4 x float> %B) {
 }
 
 ; ( A + C1 ) + ( B + -C1 )
-; Verify this folds to 'A + B' with 'reassoc' and 'nsz' ('nsz' is required)
+; Verify this folds to 'A + B' with 'reassoc' and 'nsz'
 define <4 x float> @test_fadds_cancel_reassoc_nsz(<4 x float> %A, <4 x float> %B) {
 ; CHECK-LABEL: @test_fadds_cancel_reassoc_nsz(
 ; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[A:%.*]], [[B:%.*]]
@@ -131,13 +130,12 @@ define <4 x float> @test_fadds_cancel_reassoc_nsz(<4 x float> %A, <4 x float> %B
 }
 
 ; ( A + C1 ) + ( B + -C1 )
-; Verify the fold is not done with only 'reassoc' ('nsz' is required).
+; Verify this folds to 'A + B + 0' with 'reassoc'
 define <4 x float> @test_fadds_cancel_reassoc(<4 x float> %A, <4 x float> %B) {
 ; CHECK-LABEL: @test_fadds_cancel_reassoc(
-; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT:     [[TMP2:%.*]] = fadd reassoc <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00>
-; CHECK-NEXT:     [[TMP3:%.*]] = fadd reassoc <4 x float> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:     ret <4 x float> [[TMP3]]
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc <4 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:     [[TMP2:%.*]] = fadd reassoc <4 x float> [[TMP1]], zeroinitializer
+; CHECK-NEXT:     ret <4 x float> [[TMP2]]
         %X = fadd reassoc <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
         %Y = fadd reassoc <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
         %Z = fadd reassoc <4 x float> %X, %Y
diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll
index da403555ebe24..febf6293cc0e3 100644
--- a/llvm/test/Transforms/InstCombine/fast-math.ll
+++ b/llvm/test/Transforms/InstCombine/fast-math.ll
@@ -135,11 +135,10 @@ define float @fold5_reassoc_nsz(float %f1) {
   ret float %add1
 }
 
-; TODO: This doesn't require 'nsz'.  It should fold to f1 + 9.0
+; Check again with 'reassoc'.
 define float @fold5_reassoc(float %f1) {
 ; CHECK-LABEL: @fold5_reassoc(
-; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[F1:%.*]], 4.000000e+00
-; CHECK-NEXT:    [[ADD1:%.*]] = fadd reassoc float [[ADD]], 5.000000e+00
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd reassoc float [[F1:%.*]], 9.000000e+00
 ; CHECK-NEXT:    ret float [[ADD1]]
 ;
   %add = fadd float %f1, 4.000000e+00
@@ -382,8 +381,7 @@ define float @fold10(float %f1, float %f2) {
   ret float %t3
 }
 
-; Check again with 'reassoc' and 'nsz'.
-; TODO: We may be able to remove the 'nsz' requirement.
+; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
 define float @fold10_reassoc_nsz(float %f1, float %f2) {
 ; CHECK-LABEL: @fold10_reassoc_nsz(
 ; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]]
@@ -396,14 +394,11 @@ define float @fold10_reassoc_nsz(float %f1, float %f2) {
   ret float %t3
 }
 
-; Observe that the fold is not done with only reassoc (the instructions are
-; canonicalized, but not folded).
-; TODO: As noted above, 'nsz' may not be required for this to be fully folded.
+; Check again with 'reassoc'.
 define float @fold10_reassoc(float %f1, float %f2) {
 ; CHECK-LABEL: @fold10_reassoc(
-; CHECK-NEXT:    [[T1:%.*]] = fadd reassoc float [[F1:%.*]], 2.000000e+00
-; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc float [[F2:%.*]], -3.000000e+00
-; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[T1]], [[T2]]
+; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc float [[F1:%.*]], [[F2:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[T2]], -1.000000e+00
 ; CHECK-NEXT:    ret float [[T3]]
 ;
   %t1 = fadd reassoc float 2.000000e+00, %f1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index 59b8ce42380d9..5344897cf51e1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -374,8 +374,8 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
 ; AUTO_VEC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[VEC_IND:%.*]] = phi <8 x float> [ <float 1.000000e+00, float 4.300000e+01, float 8.500000e+01, float 1.270000e+02, float 1.690000e+02, float 2.110000e+02, float 2.530000e+02, float 2.950000e+02>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[STEP_ADD:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
-; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
-; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
+; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], <float 6.720000e+02, float 6.720000e+02, float 6.720000e+02, float 6.720000e+02, float 6.720000e+02, float 6.720000e+02, float 6.720000e+02, float 6.720000e+02>
+; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], <float 1.008000e+03, float 1.008000e+03, float 1.008000e+03, float 1.008000e+03, float 1.008000e+03, float 1.008000e+03, float 1.008000e+03, float 1.008000e+03>
 ; AUTO_VEC-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 [[INDEX]]
 ; AUTO_VEC-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 32
 ; AUTO_VEC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 64
@@ -393,7 +393,7 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
 ; AUTO_VEC-NEXT:    store <8 x float> [[TMP8]], ptr [[TMP4]], align 4
 ; AUTO_VEC-NEXT:    store <8 x float> [[TMP9]], ptr [[TMP5]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
-; AUTO_VEC-NEXT:    [[VEC_IND_NEXT]] = fadd reassoc <8 x float> [[STEP_ADD3]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
+; AUTO_VEC-NEXT:    [[VEC_IND_NEXT]] = fadd reassoc <8 x float> [[VEC_IND]], <float 1.344000e+03, float 1.344000e+03, float 1.344000e+03, float 1.344000e+03, float 1.344000e+03, float 1.344000e+03, float 1.344000e+03, float 1.344000e+03>
 ; AUTO_VEC-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; AUTO_VEC-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; AUTO_VEC:       middle.block:
diff --git a/llvm/test/Transforms/PhaseOrdering/fast-basictest.ll b/llvm/test/Transforms/PhaseOrdering/fast-basictest.ll
index 0127f05022d71..7b9299b1316d1 100644
--- a/llvm/test/Transforms/PhaseOrdering/fast-basictest.ll
+++ b/llvm/test/Transforms/PhaseOrdering/fast-basictest.ll
@@ -45,11 +45,16 @@ define float @test2_no_FMF(float %reg109, float %reg1111) {
 }
 
 define float @test2_reassoc(float %reg109, float %reg1111) {
-; CHECK-LABEL: @test2_reassoc(
-; CHECK-NEXT:    [[REG115:%.*]] = fadd reassoc float [[REG109:%.*]], -3.000000e+01
-; CHECK-NEXT:    [[REG116:%.*]] = fadd reassoc float [[REG115]], [[REG1111:%.*]]
-; CHECK-NEXT:    [[REG117:%.*]] = fadd reassoc float [[REG116]], 3.000000e+01
-; CHECK-NEXT:    ret float [[REG117]]
+; REASSOC_AND_IC-LABEL: @test2_reassoc(
+; REASSOC_AND_IC-NEXT:    [[REG115:%.*]] = fadd reassoc float [[REG109:%.*]], -3.000000e+01
+; REASSOC_AND_IC-NEXT:    [[REG116:%.*]] = fadd reassoc float [[REG115]], [[REG1111:%.*]]
+; REASSOC_AND_IC-NEXT:    [[REG117:%.*]] = fadd reassoc float [[REG116]], 3.000000e+01
+; REASSOC_AND_IC-NEXT:    ret float [[REG117]]
+;
+; O2-LABEL: @test2_reassoc(
+; O2-NEXT:    [[OP_RDX:%.*]] = fadd reassoc float [[REG109:%.*]], 0.000000e+00
+; O2-NEXT:    [[OP_RDX1:%.*]] = fadd reassoc float [[OP_RDX]], [[REG1111:%.*]]
+; O2-NEXT:    ret float [[OP_RDX1]]
 ;
   %reg115 = fadd reassoc float %reg109, -3.000000e+01
   %reg116 = fadd reassoc float %reg115, %reg1111
diff --git a/llvm/test/Transforms/Reassociate/fast-MissedTree.ll b/llvm/test/Transforms/Reassociate/fast-MissedTree.ll
index 11bde8642e5d6..bdb9c64b0db63 100644
--- a/llvm/test/Transforms/Reassociate/fast-MissedTree.ll
+++ b/llvm/test/Transforms/Reassociate/fast-MissedTree.ll
@@ -24,12 +24,12 @@ define float @test1_reassoc_nsz(float %A, float %B) {
   ret float %Z
 }
 
-; Verify the fold is not done with only 'reassoc' ('nsz' is required).
+; Check again with only 'reassoc'.
+; 'nsz' is required to remove the fadd 0.0.
 define float @test1_reassoc(float %A, float %B) {
 ; CHECK-LABEL: @test1_reassoc(
-; CHECK-NEXT:    [[W:%.*]] = fadd reassoc float %B, -5.000000e+00
-; CHECK-NEXT:    [[Y:%.*]] = fadd reassoc float %A, 5.000000e+00
-; CHECK-NEXT:    [[Z:%.*]] = fadd reassoc float [[Y]], [[W]]
+; CHECK-NEXT:    [[W:%.*]] = fadd reassoc float %A, %B
+; CHECK-NEXT:    [[Z:%.*]] = fadd reassoc float [[W]], 0.000000e+00
 ; CHECK-NEXT:    ret float [[Z]]
 ;
   %W = fadd reassoc float %B, -5.0
@@ -37,3 +37,17 @@ define float @test1_reassoc(float %A, float %B) {
   %Z = fadd reassoc float %W, %Y
   ret float %Z
 }
+
+; Verify the fold is not done without FMF.
+define float @test1_none(float %A, float %B) {
+; CHECK-LABEL: @test1_none(
+; CHECK-NEXT:    [[W:%.*]] = fadd float %B, -5.000000e+00
+; CHECK-NEXT:    [[Y:%.*]] = fadd float %A, 5.000000e+00
+; CHECK-NEXT:    [[Z:%.*]] = fadd float [[Y]], [[W]]
+; CHECK-NEXT:    ret float [[Z]]
+;
+  %W = fadd float %B, -5.0
+  %Y = fadd float %A, 5.0
+  %Z = fadd float %W, %Y
+  ret float %Z
+}