[llvm] a8f13db - [InstCombine] fold shuffle of fabs
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 3 11:27:17 PST 2023
Author: Sanjay Patel
Date: 2023-02-03T14:23:17-05:00
New Revision: a8f13dbdeb31be37ee15b5febb7cc2137bbece67
URL: https://github.com/llvm/llvm-project/commit/a8f13dbdeb31be37ee15b5febb7cc2137bbece67
DIFF: https://github.com/llvm/llvm-project/commit/a8f13dbdeb31be37ee15b5febb7cc2137bbece67.diff
LOG: [InstCombine] fold shuffle of fabs
shuffle (fabs X), Mask --> fabs (shuffle X, Mask)
shuffle (fabs X), (fabs Y), Mask --> fabs (shuf X, Y, Mask)
https://alive2.llvm.org/ce/z/JH2nkf
This generalizes the existing fneg transforms to also work with fabs.
A likely follow-up would generalize this further to move any unary
intrinsic op.
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
llvm/test/Transforms/InstCombine/vec_shuffle.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 61e62adbe3273..401bcf2ef7157 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2407,37 +2407,51 @@ static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
}
-/// Canonicalize FP negate after shuffle.
-static Instruction *foldFNegShuffle(ShuffleVectorInst &Shuf,
- InstCombiner::BuilderTy &Builder) {
- Instruction *FNeg0;
+/// Canonicalize FP negate/abs after shuffle.
+static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
+ InstCombiner::BuilderTy &Builder) {
+ auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
Value *X;
- if (!match(Shuf.getOperand(0), m_CombineAnd(m_Instruction(FNeg0),
- m_FNeg(m_Value(X)))))
+ if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
return nullptr;
- // shuffle (fneg X), Mask --> fneg (shuffle X, Mask)
- if (FNeg0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
+ bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
+
+ // Match 1-input (unary) shuffle.
+ // shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
+ if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
- return UnaryOperator::CreateFNegFMF(NewShuf, FNeg0);
+ if (IsFNeg)
+ return UnaryOperator::CreateFNegFMF(NewShuf, S0);
+
+ Function *FAbs = Intrinsic::getDeclaration(Shuf.getModule(),
+ Intrinsic::fabs, Shuf.getType());
+ CallInst *NewF = CallInst::Create(FAbs, {NewShuf});
+ NewF->setFastMathFlags(S0->getFastMathFlags());
+ return NewF;
}
- Instruction *FNeg1;
+ // Match 2-input (binary) shuffle.
+ auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
Value *Y;
- if (!match(Shuf.getOperand(1), m_CombineAnd(m_Instruction(FNeg1),
- m_FNeg(m_Value(Y)))))
+ if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
+ S0->getOpcode() != S1->getOpcode() ||
+ (!S0->hasOneUse() && !S1->hasOneUse()))
return nullptr;
- // shuffle (fneg X), (fneg Y), Mask --> fneg (shuffle X, Y, Mask)
- if (FNeg0->hasOneUse() || FNeg1->hasOneUse()) {
- Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
- Instruction *NewFNeg = UnaryOperator::CreateFNeg(NewShuf);
- NewFNeg->copyIRFlags(FNeg0);
- NewFNeg->andIRFlags(FNeg1);
- return NewFNeg;
+ // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
+ Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
+ Instruction *NewF;
+ if (IsFNeg) {
+ NewF = UnaryOperator::CreateFNeg(NewShuf);
+ } else {
+ Function *FAbs = Intrinsic::getDeclaration(Shuf.getModule(),
+ Intrinsic::fabs, Shuf.getType());
+ NewF = CallInst::Create(FAbs, {NewShuf});
}
-
- return nullptr;
+ NewF->copyIRFlags(S0);
+ NewF->andIRFlags(S1);
+ return NewF;
}
/// Canonicalize casts after shuffle.
@@ -2815,7 +2829,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (Instruction *I = narrowVectorSelect(SVI, Builder))
return I;
- if (Instruction *I = foldFNegShuffle(SVI, Builder))
+ if (Instruction *I = foldShuffleOfUnaryOps(SVI, Builder))
return I;
if (Instruction *I = foldCastShuffle(SVI, Builder))
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index f632f8b96810f..516c99c428818 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -1797,8 +1797,8 @@ define <4 x i32> @PR46872(<4 x i32> %x) {
define <2 x float> @fabs_unary_shuf(<2 x float> %x) {
; CHECK-LABEL: @fabs_unary_shuf(
-; CHECK-NEXT: [[NX:%.*]] = call nnan nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[NX]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[R:%.*]] = call nnan nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP1]])
; CHECK-NEXT: ret <2 x float> [[R]]
;
%nx = call nsz nnan <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
@@ -1808,8 +1808,8 @@ define <2 x float> @fabs_unary_shuf(<2 x float> %x) {
define <4 x half> @fabs_unary_shuf_widen(<2 x half> %x) {
; CHECK-LABEL: @fabs_unary_shuf_widen(
-; CHECK-NEXT: [[NX:%.*]] = call ninf <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x half> [[NX]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 undef>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[X:%.*]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 undef>
+; CHECK-NEXT: [[R:%.*]] = call ninf <4 x half> @llvm.fabs.v4f16(<4 x half> [[TMP1]])
; CHECK-NEXT: ret <4 x half> [[R]]
;
%nx = call ninf <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
@@ -1819,8 +1819,8 @@ define <4 x half> @fabs_unary_shuf_widen(<2 x half> %x) {
define <2 x double> @fabs_unary_shuf_narrow(<4 x double> %x) {
; CHECK-LABEL: @fabs_unary_shuf_narrow(
-; CHECK-NEXT: [[NX:%.*]] = call nsz <4 x double> @llvm.fabs.v4f64(<4 x double> [[X:%.*]])
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x double> [[NX]], <4 x double> poison, <2 x i32> <i32 3, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[X:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 0>
+; CHECK-NEXT: [[R:%.*]] = call nsz <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP1]])
; CHECK-NEXT: ret <2 x double> [[R]]
;
%nx = call nsz <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
@@ -1828,6 +1828,8 @@ define <2 x double> @fabs_unary_shuf_narrow(<4 x double> %x) {
ret <2 x double> %r
}
+; negative test - extra use prevents canonicalization
+
define <2 x float> @fabs_unary_shuf_use(<2 x float> %x) {
; CHECK-LABEL: @fabs_unary_shuf_use(
; CHECK-NEXT: [[NX:%.*]] = call nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]])
@@ -1841,11 +1843,12 @@ define <2 x float> @fabs_unary_shuf_use(<2 x float> %x) {
ret <2 x float> %r
}
+; intersect FMF
+
define <4 x float> @fabs_shuf(<4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: @fabs_shuf(
-; CHECK-NEXT: [[NX:%.*]] = call ninf nsz <4 x float> @llvm.fabs.v4f32(<4 x float> [[X:%.*]])
-; CHECK-NEXT: [[NY:%.*]] = call nnan ninf <4 x float> @llvm.fabs.v4f32(<4 x float> [[Y:%.*]])
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[NX]], <4 x float> [[NY]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[R:%.*]] = call ninf <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP1]])
; CHECK-NEXT: ret <4 x float> [[R]]
;
%nx = call nsz ninf <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
@@ -1854,12 +1857,14 @@ define <4 x float> @fabs_shuf(<4 x float> %x, <4 x float> %y) {
ret <4 x float> %r
}
+; length-changing shuffle and extra use are ok
+
define <4 x float> @fabs_shuf_widen_use1(<2 x float> %x, <2 x float> %y) {
; CHECK-LABEL: @fabs_shuf_widen_use1(
; CHECK-NEXT: [[NX:%.*]] = call nnan <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]])
; CHECK-NEXT: call void @use(<2 x float> [[NX]])
-; CHECK-NEXT: [[NY:%.*]] = call nnan <2 x float> @llvm.fabs.v2f32(<2 x float> [[Y:%.*]])
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[NX]], <2 x float> [[NY]], <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y:%.*]], <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[R:%.*]] = call nnan <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP1]])
; CHECK-NEXT: ret <4 x float> [[R]]
;
%nx = call nnan <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
@@ -1869,12 +1874,14 @@ define <4 x float> @fabs_shuf_widen_use1(<2 x float> %x, <2 x float> %y) {
ret <4 x float> %r
}
+; length-changing shuffle and extra use are ok
+
define <2 x float> @fabs_shuf_narrow_use2(<4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: @fabs_shuf_narrow_use2(
-; CHECK-NEXT: [[NX:%.*]] = call nnan nsz <4 x float> @llvm.fabs.v4f32(<4 x float> [[X:%.*]])
; CHECK-NEXT: [[NY:%.*]] = call nnan nsz <4 x float> @llvm.fabs.v4f32(<4 x float> [[Y:%.*]])
; CHECK-NEXT: call void @use4(<4 x float> [[NY]])
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[NX]], <4 x float> [[NY]], <2 x i32> <i32 3, i32 5>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y]], <2 x i32> <i32 3, i32 5>
+; CHECK-NEXT: [[R:%.*]] = call nnan nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP1]])
; CHECK-NEXT: ret <2 x float> [[R]]
;
%nx = call nsz nnan <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
@@ -1884,6 +1891,8 @@ define <2 x float> @fabs_shuf_narrow_use2(<4 x float> %x, <4 x float> %y) {
ret <2 x float> %r
}
+; negative test - too many extra uses
+
define <2 x float> @fabs_shuf_use3(<2 x float> %x, <2 x float> %y) {
; CHECK-LABEL: @fabs_shuf_use3(
; CHECK-NEXT: [[NX:%.*]] = call nnan <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]])
@@ -2016,6 +2025,8 @@ define <2 x float> @fneg_shuf_use3(<2 x float> %x, <2 x float> %y) {
ret <2 x float> %r
}
+; negative test - mixed opcodes
+
define <4 x float> @fabs_fneg_shuf(<4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: @fabs_fneg_shuf(
; CHECK-NEXT: [[NX:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[X:%.*]])
More information about the llvm-commits
mailing list