[llvm] [InstCombine] Do not require nsz for reassociating fadd (PR #93782)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu May 30 01:16:46 PDT 2024
https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/93782
None
>From d7517d51e57ec3e8e3dd54c4386c5272b2d57c2a Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 30 May 2024 08:51:48 +0100
Subject: [PATCH] [InstCombine] Do not require nsz for reassociating fadd
---
llvm/lib/IR/Instruction.cpp | 3 ++-
.../InstCombine/2006-10-26-VectorReassoc.ll | 18 +++++++--------
llvm/test/Transforms/InstCombine/fast-math.ll | 17 +++++---------
.../LoopVectorize/X86/float-induction-x86.ll | 6 ++---
.../PhaseOrdering/fast-basictest.ll | 15 ++++++++-----
.../Transforms/Reassociate/fast-MissedTree.ll | 22 +++++++++++++++----
6 files changed, 47 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 29272e627a1d1..7557909e1e40f 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -1204,9 +1204,10 @@ bool Instruction::isAssociative() const {
switch (Opcode) {
case FMul:
- case FAdd:
return cast<FPMathOperator>(this)->hasAllowReassoc() &&
cast<FPMathOperator>(this)->hasNoSignedZeros();
+ case FAdd:
+ return cast<FPMathOperator>(this)->hasAllowReassoc();
default:
return false;
}
diff --git a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
index fb860a5e7bdf3..672473f9a9ca5 100644
--- a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
+++ b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@@ -81,12 +81,11 @@ define <4 x float> @test_fadd_reassoc_nsz(<4 x float> %V) {
}
; (V + C1) + C2 => V + (C1 + C2)
-; TODO: This doesn't require 'nsz'. It should fold to V + { 2.0, 4.0, 0.0, 8.0 }
+; Verify this folds with 'reassoc'
define <4 x float> @test_fadd_reassoc(<4 x float> %V) {
; CHECK-LABEL: @test_fadd_reassoc(
-; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT: ret <4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00>
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
%Y = fadd reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Z = fadd reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
ret <4 x float> %Z
@@ -119,7 +118,7 @@ define <4 x float> @test_fadds_cancel_fast(<4 x float> %A, <4 x float> %B) {
}
; ( A + C1 ) + ( B + -C1 )
-; Verify this folds to 'A + B' with 'reassoc' and 'nsz' ('nsz' is required)
+; Verify this folds to 'A + B' with 'reassoc' and 'nsz'
define <4 x float> @test_fadds_cancel_reassoc_nsz(<4 x float> %A, <4 x float> %B) {
; CHECK-LABEL: @test_fadds_cancel_reassoc_nsz(
; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[A:%.*]], [[B:%.*]]
@@ -131,13 +130,12 @@ define <4 x float> @test_fadds_cancel_reassoc_nsz(<4 x float> %A, <4 x float> %B
}
; ( A + C1 ) + ( B + -C1 )
-; Verify the fold is not done with only 'reassoc' ('nsz' is required).
+; Verify this folds to 'A + B + 0' with 'reassoc'
define <4 x float> @test_fadds_cancel_reassoc(<4 x float> %A, <4 x float> %B) {
; CHECK-LABEL: @test_fadds_cancel_reassoc(
-; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00>
-; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc <4 x float> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: ret <4 x float> [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <4 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <4 x float> [[TMP1]], zeroinitializer
+; CHECK-NEXT: ret <4 x float> [[TMP2]]
%X = fadd reassoc <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Y = fadd reassoc <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
%Z = fadd reassoc <4 x float> %X, %Y
diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll
index da403555ebe24..febf6293cc0e3 100644
--- a/llvm/test/Transforms/InstCombine/fast-math.ll
+++ b/llvm/test/Transforms/InstCombine/fast-math.ll
@@ -135,11 +135,10 @@ define float @fold5_reassoc_nsz(float %f1) {
ret float %add1
}
-; TODO: This doesn't require 'nsz'. It should fold to f1 + 9.0
+; Check again with 'reassoc'.
define float @fold5_reassoc(float %f1) {
; CHECK-LABEL: @fold5_reassoc(
-; CHECK-NEXT: [[ADD:%.*]] = fadd float [[F1:%.*]], 4.000000e+00
-; CHECK-NEXT: [[ADD1:%.*]] = fadd reassoc float [[ADD]], 5.000000e+00
+; CHECK-NEXT: [[ADD1:%.*]] = fadd reassoc float [[F1:%.*]], 9.000000e+00
; CHECK-NEXT: ret float [[ADD1]]
;
%add = fadd float %f1, 4.000000e+00
@@ -382,8 +381,7 @@ define float @fold10(float %f1, float %f2) {
ret float %t3
}
-; Check again with 'reassoc' and 'nsz'.
-; TODO: We may be able to remove the 'nsz' requirement.
+; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
define float @fold10_reassoc_nsz(float %f1, float %f2) {
; CHECK-LABEL: @fold10_reassoc_nsz(
; CHECK-NEXT: [[T2:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]]
@@ -396,14 +394,11 @@ define float @fold10_reassoc_nsz(float %f1, float %f2) {
ret float %t3
}
-; Observe that the fold is not done with only reassoc (the instructions are
-; canonicalized, but not folded).
-; TODO: As noted above, 'nsz' may not be required for this to be fully folded.
+; Check again with 'reassoc'.
define float @fold10_reassoc(float %f1, float %f2) {
; CHECK-LABEL: @fold10_reassoc(
-; CHECK-NEXT: [[T1:%.*]] = fadd reassoc float [[F1:%.*]], 2.000000e+00
-; CHECK-NEXT: [[T2:%.*]] = fadd reassoc float [[F2:%.*]], -3.000000e+00
-; CHECK-NEXT: [[T3:%.*]] = fadd reassoc float [[T1]], [[T2]]
+; CHECK-NEXT: [[T2:%.*]] = fadd reassoc float [[F1:%.*]], [[F2:%.*]]
+; CHECK-NEXT: [[T3:%.*]] = fadd reassoc float [[T2]], -1.000000e+00
; CHECK-NEXT: ret float [[T3]]
;
%t1 = fadd reassoc float 2.000000e+00, %f1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index 59b8ce42380d9..5344897cf51e1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -374,8 +374,8 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <8 x float> [ <float 1.000000e+00, float 4.300000e+01, float 8.500000e+01, float 1.270000e+02, float 1.690000e+02, float 2.110000e+02, float 2.530000e+02, float 2.950000e+02>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
-; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
-; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
+; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], <float 6.720000e+02, float 6.720000e+02, float 6.720000e+02, float 6.720000e+02, float 6.720000e+02, float 6.720000e+02, float 6.720000e+02, float 6.720000e+02>
+; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], <float 1.008000e+03, float 1.008000e+03, float 1.008000e+03, float 1.008000e+03, float 1.008000e+03, float 1.008000e+03, float 1.008000e+03, float 1.008000e+03>
; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 [[INDEX]]
; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 32
; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 64
@@ -393,7 +393,7 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
; AUTO_VEC-NEXT: store <8 x float> [[TMP8]], ptr [[TMP4]], align 4
; AUTO_VEC-NEXT: store <8 x float> [[TMP9]], ptr [[TMP5]], align 4
; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
-; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd reassoc <8 x float> [[STEP_ADD3]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
+; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd reassoc <8 x float> [[VEC_IND]], <float 1.344000e+03, float 1.344000e+03, float 1.344000e+03, float 1.344000e+03, float 1.344000e+03, float 1.344000e+03, float 1.344000e+03, float 1.344000e+03>
; AUTO_VEC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; AUTO_VEC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; AUTO_VEC: middle.block:
diff --git a/llvm/test/Transforms/PhaseOrdering/fast-basictest.ll b/llvm/test/Transforms/PhaseOrdering/fast-basictest.ll
index 0127f05022d71..7b9299b1316d1 100644
--- a/llvm/test/Transforms/PhaseOrdering/fast-basictest.ll
+++ b/llvm/test/Transforms/PhaseOrdering/fast-basictest.ll
@@ -45,11 +45,16 @@ define float @test2_no_FMF(float %reg109, float %reg1111) {
}
define float @test2_reassoc(float %reg109, float %reg1111) {
-; CHECK-LABEL: @test2_reassoc(
-; CHECK-NEXT: [[REG115:%.*]] = fadd reassoc float [[REG109:%.*]], -3.000000e+01
-; CHECK-NEXT: [[REG116:%.*]] = fadd reassoc float [[REG115]], [[REG1111:%.*]]
-; CHECK-NEXT: [[REG117:%.*]] = fadd reassoc float [[REG116]], 3.000000e+01
-; CHECK-NEXT: ret float [[REG117]]
+; REASSOC_AND_IC-LABEL: @test2_reassoc(
+; REASSOC_AND_IC-NEXT: [[REG115:%.*]] = fadd reassoc float [[REG109:%.*]], -3.000000e+01
+; REASSOC_AND_IC-NEXT: [[REG116:%.*]] = fadd reassoc float [[REG115]], [[REG1111:%.*]]
+; REASSOC_AND_IC-NEXT: [[REG117:%.*]] = fadd reassoc float [[REG116]], 3.000000e+01
+; REASSOC_AND_IC-NEXT: ret float [[REG117]]
+;
+; O2-LABEL: @test2_reassoc(
+; O2-NEXT: [[OP_RDX:%.*]] = fadd reassoc float [[REG109:%.*]], 0.000000e+00
+; O2-NEXT: [[OP_RDX1:%.*]] = fadd reassoc float [[OP_RDX]], [[REG1111:%.*]]
+; O2-NEXT: ret float [[OP_RDX1]]
;
%reg115 = fadd reassoc float %reg109, -3.000000e+01
%reg116 = fadd reassoc float %reg115, %reg1111
diff --git a/llvm/test/Transforms/Reassociate/fast-MissedTree.ll b/llvm/test/Transforms/Reassociate/fast-MissedTree.ll
index 11bde8642e5d6..bdb9c64b0db63 100644
--- a/llvm/test/Transforms/Reassociate/fast-MissedTree.ll
+++ b/llvm/test/Transforms/Reassociate/fast-MissedTree.ll
@@ -24,12 +24,12 @@ define float @test1_reassoc_nsz(float %A, float %B) {
ret float %Z
}
-; Verify the fold is not done with only 'reassoc' ('nsz' is required).
+; Check again with only 'reassoc'.
+; 'nsz' is required to remove the fadd 0.0.
define float @test1_reassoc(float %A, float %B) {
; CHECK-LABEL: @test1_reassoc(
-; CHECK-NEXT: [[W:%.*]] = fadd reassoc float %B, -5.000000e+00
-; CHECK-NEXT: [[Y:%.*]] = fadd reassoc float %A, 5.000000e+00
-; CHECK-NEXT: [[Z:%.*]] = fadd reassoc float [[Y]], [[W]]
+; CHECK-NEXT: [[W:%.*]] = fadd reassoc float %A, %B
+; CHECK-NEXT: [[Z:%.*]] = fadd reassoc float [[W]], 0.000000e+00
; CHECK-NEXT: ret float [[Z]]
;
%W = fadd reassoc float %B, -5.0
@@ -37,3 +37,17 @@ define float @test1_reassoc(float %A, float %B) {
%Z = fadd reassoc float %W, %Y
ret float %Z
}
+
+; Verify the fold is not done without FMF.
+define float @test1_none(float %A, float %B) {
+; CHECK-LABEL: @test1_none(
+; CHECK-NEXT: [[W:%.*]] = fadd float %B, -5.000000e+00
+; CHECK-NEXT: [[Y:%.*]] = fadd float %A, 5.000000e+00
+; CHECK-NEXT: [[Z:%.*]] = fadd float [[Y]], [[W]]
+; CHECK-NEXT: ret float [[Z]]
+;
+ %W = fadd float %B, -5.0
+ %Y = fadd float %A, 5.0
+ %Z = fadd float %W, %Y
+ ret float %Z
+}
More information about the llvm-commits
mailing list