[llvm] [VectorCombine] Preserves the maximal legal FPMathFlags during foldShuffleToIdentity (PR #94295)
Henry Jiang via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 4 10:21:32 PDT 2024
https://github.com/mustartt updated https://github.com/llvm/llvm-project/pull/94295
>From 5ced37af58d5715ca715c74bd55e6e1d7711dcf8 Mon Sep 17 00:00:00 2001
From: Henry Jiang <henry.jiang1 at ibm.com>
Date: Mon, 3 Jun 2024 15:46:28 -0400
Subject: [PATCH 1/3] Preserves the maximal legal FPMathFlags during
foldShuffleToIdentity
---
.../Transforms/Vectorize/VectorCombine.cpp | 70 +++++++++++++++----
.../AArch64/shuffletoidentity.ll | 47 ++++++++++++-
2 files changed, 102 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 7ecfe5218ef67..8eb23e1112bcd 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Vectorize/VectorCombine.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -26,6 +27,8 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -1736,23 +1739,64 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
Ops[Idx] = generateNewInstTree(generateInstLaneVectorFromOperand(Item, Idx),
Ty, IdentityLeafs, SplatLeafs, Builder);
}
+
+ FastMathFlags FMF;
+ FMF.setFast();
+ for_each(Item, [&FMF, Lane = FrontLane](const InstLane &E) {
+ if (E.second != Lane)
+ return;
+ auto *I = cast<Instruction>(E.first);
+ if (isa<FPMathOperator>(I))
+ FMF &= I->getFastMathFlags();
+ });
+
Builder.SetInsertPoint(I);
Type *DstTy =
FixedVectorType::get(I->getType()->getScalarType(), Ty->getNumElements());
- if (auto *BI = dyn_cast<BinaryOperator>(I))
- return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(), Ops[0],
- Ops[1]);
- if (auto *CI = dyn_cast<CmpInst>(I))
- return Builder.CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
- if (auto *SI = dyn_cast<SelectInst>(I))
- return Builder.CreateSelect(Ops[0], Ops[1], Ops[2], "", SI);
- if (auto *CI = dyn_cast<CastInst>(I))
- return Builder.CreateCast((Instruction::CastOps)CI->getOpcode(), Ops[0],
- DstTy);
- if (II)
- return Builder.CreateIntrinsic(DstTy, II->getIntrinsicID(), Ops);
+ if (auto *BI = dyn_cast<BinaryOperator>(I)) {
+ auto *Value = Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(),
+ Ops[0], Ops[1]);
+ if (auto *Inst = dyn_cast<Instruction>(Value); isa<FPMathOperator>(Inst)) {
+ Inst->setFastMathFlags(FMF);
+ }
+ return Value;
+ }
+ if (auto *CI = dyn_cast<CmpInst>(I)) {
+ auto *Value = Builder.CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
+ if (auto *Inst = dyn_cast<Instruction>(Value); isa<FPMathOperator>(Inst)) {
+ Inst->setFastMathFlags(FMF);
+ }
+ return Value;
+ }
+ if (auto *SI = dyn_cast<SelectInst>(I)) {
+ auto *Value = Builder.CreateSelect(Ops[0], Ops[1], Ops[2], "", SI);
+ if (auto *Inst = dyn_cast<Instruction>(Value); isa<FPMathOperator>(Inst)) {
+ Inst->setFastMathFlags(FMF);
+ }
+ return Value;
+ }
+ if (auto *CI = dyn_cast<CastInst>(I)) {
+ auto *Value = Builder.CreateCast((Instruction::CastOps)CI->getOpcode(),
+ Ops[0], DstTy);
+ if (auto *Inst = dyn_cast<Instruction>(Value); isa<FPMathOperator>(Inst)) {
+ Inst->setFastMathFlags(FMF);
+ }
+ return Value;
+ }
+ if (II) {
+ auto *Value = Builder.CreateIntrinsic(DstTy, II->getIntrinsicID(), Ops);
+ if (auto *Inst = dyn_cast<Instruction>(Value); isa<FPMathOperator>(Inst)) {
+ Inst->setFastMathFlags(FMF);
+ }
+ return Value;
+ }
assert(isa<UnaryInstruction>(I) && "Unexpected instruction type in Generate");
- return Builder.CreateUnOp((Instruction::UnaryOps)I->getOpcode(), Ops[0]);
+ auto *Value =
+ Builder.CreateUnOp((Instruction::UnaryOps)I->getOpcode(), Ops[0]);
+ if (auto *Inst = dyn_cast<Instruction>(Value); isa<FPMathOperator>(Inst)) {
+ Inst->setFastMathFlags(FMF);
+ }
+ return Value;
}
// Starting from a shuffle, look up through operands tracking the shuffled index
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index c2e9be5688967..3e54099fc0970 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -828,10 +828,10 @@ define void @v8f64interleave(i64 %0, ptr %1, ptr %x, double %z) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Z:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x double>, ptr [[TMP1:%.*]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = fmul <16 x double> [[WIDE_VEC]], [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <16 x double> [[WIDE_VEC]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i64 [[TMP0:%.*]]
; CHECK-NEXT: [[WIDE_VEC34:%.*]] = load <16 x double>, ptr [[TMP4]], align 8
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd <16 x double> [[WIDE_VEC34]], [[TMP3]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x double> [[WIDE_VEC34]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[TMP0]], 7
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 -56
@@ -937,5 +937,48 @@ define <4 x float> @fadd_mismatched_types(<4 x float> %x, <4 x float> %y) {
ret <4 x float> %extshuf
}
+define void @maximal_legal_fpmath(ptr %addr1, ptr %addr2, ptr %result, float %val) {
+; CHECK-LABEL: define void @maximal_legal_fpmath(
+; CHECK-SAME: ptr [[ADDR1:%.*]], ptr [[ADDR2:%.*]], ptr [[RESULT:%.*]], float [[VAL:%.*]]) {
+; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[VAL]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[SPLATINSERT]], <4 x float> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[VEC1:%.*]] = load <16 x float>, ptr [[ADDR1]], align 4
+; CHECK-NEXT: [[VEC2:%.*]] = load <16 x float>, ptr [[ADDR2]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <16 x float> [[TMP1]], [[VEC2]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd reassoc contract <16 x float> [[VEC1]], [[TMP2]]
+; CHECK-NEXT: store <16 x float> [[INTERLEAVED_VEC]], ptr [[RESULT]], align 4
+; CHECK-NEXT: ret void
+;
+ %splatinsert = insertelement <4 x float> poison, float %val, i64 0
+ %incoming.vec = shufflevector <4 x float> %splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
+
+ %vec1 = load <16 x float>, ptr %addr1, align 4
+ %strided.vec1 = shufflevector <16 x float> %vec1, <16 x float> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+ %strided.vec2 = shufflevector <16 x float> %vec1, <16 x float> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+ %strided.vec3 = shufflevector <16 x float> %vec1, <16 x float> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+ %strided.vec4 = shufflevector <16 x float> %vec1, <16 x float> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+
+ %vec2 = load <16 x float>, ptr %addr2, align 4
+ %strided.vec6 = shufflevector <16 x float> %vec2, <16 x float> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+ %strided.vec7 = shufflevector <16 x float> %vec2, <16 x float> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+ %strided.vec8 = shufflevector <16 x float> %vec2, <16 x float> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+ %strided.vec9 = shufflevector <16 x float> %vec2, <16 x float> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+
+ %1 = fmul fast <4 x float> %incoming.vec, %strided.vec6
+ %2 = fadd fast <4 x float> %strided.vec1, %1
+ %3 = fmul contract <4 x float> %incoming.vec, %strided.vec7
+ %4 = fadd fast <4 x float> %strided.vec2, %3
+ %5 = fmul contract reassoc <4 x float> %incoming.vec, %strided.vec8
+ %6 = fadd fast <4 x float> %strided.vec3, %5
+ %7 = fmul contract reassoc <4 x float> %incoming.vec, %strided.vec9
+ %8 = fadd contract reassoc <4 x float> %strided.vec4, %7
+
+ %9 = shufflevector <4 x float> %2, <4 x float> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %10 = shufflevector <4 x float> %6, <4 x float> %8, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %interleaved.vec = shufflevector <8 x float> %9, <8 x float> %10, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+ store <16 x float> %interleaved.vec, ptr %result, align 4
+
+ ret void
+}
declare void @use(<4 x i8>)
>From ed622ce9e1a257e8c9fd99d418a02a35ec1b364d Mon Sep 17 00:00:00 2001
From: Henry Jiang <henry.jiang1 at ibm.com>
Date: Tue, 4 Jun 2024 12:23:25 -0400
Subject: [PATCH 2/3] Propagate all IR Flags when folding shuffles to identity.
---
.../Transforms/Vectorize/VectorCombine.cpp | 41 +++++++------------
1 file changed, 14 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 8eb23e1112bcd..4ed611be69e8c 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -30,9 +30,11 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include <iterator>
#include <numeric>
#include <queue>
@@ -1740,15 +1742,12 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
Ty, IdentityLeafs, SplatLeafs, Builder);
}
- FastMathFlags FMF;
- FMF.setFast();
- for_each(Item, [&FMF, Lane = FrontLane](const InstLane &E) {
- if (E.second != Lane)
- return;
- auto *I = cast<Instruction>(E.first);
- if (isa<FPMathOperator>(I))
- FMF &= I->getFastMathFlags();
- });
+ SmallVector<Value *, 8> ValueList;
+ for (const auto &Lane : Item) {
+ if (Lane.second != FrontLane || !Lane.first)
+ continue;
+ ValueList.push_back(Lane.first);
+ }
Builder.SetInsertPoint(I);
Type *DstTy =
@@ -1756,46 +1755,34 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
if (auto *BI = dyn_cast<BinaryOperator>(I)) {
auto *Value = Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(),
Ops[0], Ops[1]);
- if (auto *Inst = dyn_cast<Instruction>(Value); isa<FPMathOperator>(Inst)) {
- Inst->setFastMathFlags(FMF);
- }
+ propagateIRFlags(Value, ValueList);
return Value;
}
if (auto *CI = dyn_cast<CmpInst>(I)) {
auto *Value = Builder.CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
- if (auto *Inst = dyn_cast<Instruction>(Value); isa<FPMathOperator>(Inst)) {
- Inst->setFastMathFlags(FMF);
- }
+ propagateIRFlags(Value, ValueList);
return Value;
}
if (auto *SI = dyn_cast<SelectInst>(I)) {
auto *Value = Builder.CreateSelect(Ops[0], Ops[1], Ops[2], "", SI);
- if (auto *Inst = dyn_cast<Instruction>(Value); isa<FPMathOperator>(Inst)) {
- Inst->setFastMathFlags(FMF);
- }
+ propagateIRFlags(Value, ValueList);
return Value;
}
if (auto *CI = dyn_cast<CastInst>(I)) {
auto *Value = Builder.CreateCast((Instruction::CastOps)CI->getOpcode(),
Ops[0], DstTy);
- if (auto *Inst = dyn_cast<Instruction>(Value); isa<FPMathOperator>(Inst)) {
- Inst->setFastMathFlags(FMF);
- }
+ propagateIRFlags(Value, ValueList);
return Value;
}
if (II) {
auto *Value = Builder.CreateIntrinsic(DstTy, II->getIntrinsicID(), Ops);
- if (auto *Inst = dyn_cast<Instruction>(Value); isa<FPMathOperator>(Inst)) {
- Inst->setFastMathFlags(FMF);
- }
+ propagateIRFlags(Value, ValueList);
return Value;
}
assert(isa<UnaryInstruction>(I) && "Unexpected instruction type in Generate");
auto *Value =
Builder.CreateUnOp((Instruction::UnaryOps)I->getOpcode(), Ops[0]);
- if (auto *Inst = dyn_cast<Instruction>(Value); isa<FPMathOperator>(Inst)) {
- Inst->setFastMathFlags(FMF);
- }
+ propagateIRFlags(Value, ValueList);
return Value;
}
>From e382ba14160520667f5c7e64f36c092ab4dcf02f Mon Sep 17 00:00:00 2001
From: Henry Jiang <henry.jiang1 at ibm.com>
Date: Tue, 4 Jun 2024 13:20:36 -0400
Subject: [PATCH 3/3] Removed unused headers and propagate IR Flags using all
lanes
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 12 +++---------
1 file changed, 3 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4ed611be69e8c..e608c7fb60468 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -27,14 +27,10 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
-#include <iterator>
#include <numeric>
#include <queue>
@@ -1743,11 +1739,9 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
}
SmallVector<Value *, 8> ValueList;
- for (const auto &Lane : Item) {
- if (Lane.second != FrontLane || !Lane.first)
- continue;
- ValueList.push_back(Lane.first);
- }
+ for (const auto &Lane : Item)
+ if (Lane.first)
+ ValueList.push_back(Lane.first);
Builder.SetInsertPoint(I);
Type *DstTy =
More information about the llvm-commits
mailing list