[llvm] 6b85fb1 - [SLP]Consider (f)sub, being operand of llvm.(f)abs/icmp eq/ne 0, commutative.
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 11 11:56:43 PDT 2024
Author: Alexey Bataev
Date: 2024-04-11T14:56:39-04:00
New Revision: 6b85fb1ef8586181fbdb02ca11a3bb0e9006aabd
URL: https://github.com/llvm/llvm-project/commit/6b85fb1ef8586181fbdb02ca11a3bb0e9006aabd
DIFF: https://github.com/llvm/llvm-project/commit/6b85fb1ef8586181fbdb02ca11a3bb0e9006aabd.diff
LOG: [SLP]Consider (f)sub, being operand of llvm.(f)abs/icmp eq/ne 0, commutative.
If (f)sub is only operand of llvm.(f)abs or icmp eq/ne 0 (int only), we can consider it as commutative operation, just need to drop wrapping flags for ineteger
operation.
https://alive2.llvm.org/ce/z/GxvxjB for correctness of abs with dropped
flags.
Reviewers: RKSimon
Reviewed By: RKSimon
Pull Request: https://github.com/llvm/llvm-project/pull/86196
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/store-abs-minbitwidth.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2995cb8435f769..55dc0867e2ea9b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -305,7 +305,32 @@ static bool isCommutative(Instruction *I) {
if (auto *Cmp = dyn_cast<CmpInst>(I))
return Cmp->isCommutative();
if (auto *BO = dyn_cast<BinaryOperator>(I))
- return BO->isCommutative();
+ return BO->isCommutative() ||
+ (BO->getOpcode() == Instruction::Sub &&
+ !BO->hasNUsesOrMore(UsesLimit) &&
+ all_of(
+ BO->uses(),
+ [](const Use &U) {
+ // Commutative, if icmp eq/ne sub, 0
+ ICmpInst::Predicate Pred;
+ if (match(U.getUser(),
+ m_ICmp(Pred, m_Specific(U.get()), m_Zero())) &&
+ (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE))
+ return true;
+ // Commutative, if abs(sub nsw, true) or abs(sub, false).
+ ConstantInt *Flag;
+ return match(U.getUser(),
+ m_Intrinsic<Intrinsic::abs>(
+ m_Specific(U.get()), m_ConstantInt(Flag))) &&
+ (!cast<Instruction>(U.get())->hasNoSignedWrap() ||
+ Flag->isOne());
+ })) ||
+ (BO->getOpcode() == Instruction::FSub &&
+ !BO->hasNUsesOrMore(UsesLimit) &&
+ all_of(BO->uses(), [](const Use &U) {
+ return match(U.getUser(),
+ m_Intrinsic<Intrinsic::fabs>(m_Specific(U.get())));
+ }));
return I->isCommutative();
}
@@ -6838,7 +6863,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Sort operands of the instructions so that each side is more likely to
// have the same opcode.
- if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
+ if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
TE->setOperand(0, Left);
@@ -12566,8 +12591,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
RHS);
propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end());
- if (auto *I = dyn_cast<Instruction>(V))
+ if (auto *I = dyn_cast<Instruction>(V)) {
V = propagateMetadata(I, E->Scalars);
+ // Drop nuw flags for abs(sub(commutative), true).
+ if (!MinBWs.contains(E) && ShuffleOrOp == Instruction::Sub &&
+ any_of(E->Scalars, [](Value *V) {
+ return isCommutative(cast<Instruction>(V));
+ }))
+ I->setHasNoUnsignedWrap(/*b=*/false);
+ }
V = FinalShuffle(V, E, VecTy);
@@ -12893,6 +12925,19 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
propagateIRFlags(V0, OpScalars, E->getMainOp(), It == MinBWs.end());
propagateIRFlags(V1, AltScalars, E->getAltOp(), It == MinBWs.end());
+ auto DropNuwFlag = [&](Value *Vec, unsigned Opcode) {
+ // Drop nuw flags for abs(sub(commutative), true).
+ if (auto *I = dyn_cast<Instruction>(Vec);
+ I && Opcode == Instruction::Sub && !MinBWs.contains(E) &&
+ any_of(E->Scalars, [](Value *V) {
+ auto *I = cast<Instruction>(V);
+ return I->getOpcode() == Instruction::Sub &&
+ isCommutative(cast<Instruction>(V));
+ }))
+ I->setHasNoUnsignedWrap(/*b=*/false);
+ };
+ DropNuwFlag(V0, E->getOpcode());
+ DropNuwFlag(V1, E->getAltOpcode());
Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
if (auto *I = dyn_cast<Instruction>(V)) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/store-abs-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/store-abs-minbitwidth.ll
index df7312e3d2b56c..64bfb242752c10 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/store-abs-minbitwidth.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/store-abs-minbitwidth.ll
@@ -4,19 +4,9 @@
define i32 @test(ptr noalias %in, ptr noalias %inn, ptr %out) {
; CHECK-LABEL: @test(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[IN:%.*]], align 1
-; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, ptr [[IN]], i64 2
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[GEP_2]], align 1
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i8>, ptr [[INN:%.*]], align 1
-; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i8, ptr [[INN]], i64 2
-; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i8>, ptr [[GEP_5]], align 1
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr [[IN:%.*]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[INN:%.*]], align 1
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[TMP7]] to <4 x i16>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i8> [[TMP4]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i8> [[TMP11]] to <4 x i16>
; CHECK-NEXT: [[TMP13:%.*]] = sub <4 x i16> [[TMP12]], [[TMP8]]
; CHECK-NEXT: [[TMP15:%.*]] = call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[TMP13]], i1 false)
More information about the llvm-commits
mailing list