[llvm] 49950cb - [SLP] restrict matching of load combine candidates
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Tue May 11 05:53:49 PDT 2021
Author: Sanjay Patel
Date: 2021-05-11T08:46:40-04:00
New Revision: 49950cb1f6f699cbb9d8f141c0c043d4795c3417
URL: https://github.com/llvm/llvm-project/commit/49950cb1f6f699cbb9d8f141c0c043d4795c3417
DIFF: https://github.com/llvm/llvm-project/commit/49950cb1f6f699cbb9d8f141c0c043d4795c3417.diff
LOG: [SLP] restrict matching of load combine candidates
The test example from https://llvm.org/PR50256 (and reduced here)
shows that we can match a load combine candidate even when there
are no "or" instructions. We can avoid that by confirming that we
do see an "or". This doesn't apply when matching an or-reduction
because that match begins from the operands of the reduction.
Differential Revision: https://reviews.llvm.org/D102074
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e656b189c7793..222062da13fca 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4066,21 +4066,27 @@ bool BoUpSLP::isFullyVectorizableTinyTree() const {
}
static bool isLoadCombineCandidateImpl(Value *Root, unsigned NumElts,
- TargetTransformInfo *TTI) {
+ TargetTransformInfo *TTI,
+ bool MustMatchOrInst) {
// Look past the root to find a source value. Arbitrarily follow the
// path through operand 0 of any 'or'. Also, peek through optional
// shift-left-by-multiple-of-8-bits.
Value *ZextLoad = Root;
const APInt *ShAmtC;
+ bool FoundOr = false;
while (!isa<ConstantExpr>(ZextLoad) &&
(match(ZextLoad, m_Or(m_Value(), m_Value())) ||
(match(ZextLoad, m_Shl(m_Value(), m_APInt(ShAmtC))) &&
- ShAmtC->urem(8) == 0)))
- ZextLoad = cast<BinaryOperator>(ZextLoad)->getOperand(0);
-
+ ShAmtC->urem(8) == 0))) {
+ auto *BinOp = cast<BinaryOperator>(ZextLoad);
+ ZextLoad = BinOp->getOperand(0);
+ if (BinOp->getOpcode() == Instruction::Or)
+ FoundOr = true;
+ }
// Check if the input is an extended load of the required or/shift expression.
Value *LoadPtr;
- if (ZextLoad == Root || !match(ZextLoad, m_ZExt(m_Load(m_Value(LoadPtr)))))
+ if ((MustMatchOrInst && !FoundOr) || ZextLoad == Root ||
+ !match(ZextLoad, m_ZExt(m_Load(m_Value(LoadPtr)))))
return false;
// Require that the total load bit width is a legal integer type.
@@ -4105,7 +4111,8 @@ bool BoUpSLP::isLoadCombineReductionCandidate(RecurKind RdxKind) const {
unsigned NumElts = VectorizableTree[0]->Scalars.size();
Value *FirstReduced = VectorizableTree[0]->Scalars[0];
- return isLoadCombineCandidateImpl(FirstReduced, NumElts, TTI);
+ return isLoadCombineCandidateImpl(FirstReduced, NumElts, TTI,
+ /* MatchOr */ false);
}
bool BoUpSLP::isLoadCombineCandidate() const {
@@ -4115,7 +4122,7 @@ bool BoUpSLP::isLoadCombineCandidate() const {
for (Value *Scalar : VectorizableTree[0]->Scalars) {
Value *X;
if (!match(Scalar, m_Store(m_Value(X), m_Value())) ||
- !isLoadCombineCandidateImpl(X, NumElts, TTI))
+ !isLoadCombineCandidateImpl(X, NumElts, TTI, /* MatchOr */ true))
return false;
}
return true;
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll
index 89a01b2fba3b2..3129ba79dafaa 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll
@@ -4,7 +4,6 @@
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"
-; FIXME:
; This should not be matched as a load combining candidate.
; There are no 'or' operations, so it can't be a bswap or
; other pattern that we are expecting the backend to handle.
@@ -26,54 +25,14 @@ define void @PR50256(i8* %a, i16* %b, i32 %n) {
; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 13
; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 14
; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 15
-; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[A]], align 1
-; CHECK-NEXT: [[I1:%.*]] = load i8, i8* [[ARRAYIDX_1]], align 1
-; CHECK-NEXT: [[I2:%.*]] = load i8, i8* [[ARRAYIDX_2]], align 1
-; CHECK-NEXT: [[I3:%.*]] = load i8, i8* [[ARRAYIDX_3]], align 1
-; CHECK-NEXT: [[I4:%.*]] = load i8, i8* [[ARRAYIDX_4]], align 1
-; CHECK-NEXT: [[I5:%.*]] = load i8, i8* [[ARRAYIDX_5]], align 1
-; CHECK-NEXT: [[I6:%.*]] = load i8, i8* [[ARRAYIDX_6]], align 1
-; CHECK-NEXT: [[I7:%.*]] = load i8, i8* [[ARRAYIDX_7]], align 1
-; CHECK-NEXT: [[I8:%.*]] = load i8, i8* [[ARRAYIDX_8]], align 1
-; CHECK-NEXT: [[I9:%.*]] = load i8, i8* [[ARRAYIDX_9]], align 1
-; CHECK-NEXT: [[I10:%.*]] = load i8, i8* [[ARRAYIDX_10]], align 1
-; CHECK-NEXT: [[I11:%.*]] = load i8, i8* [[ARRAYIDX_11]], align 1
-; CHECK-NEXT: [[I12:%.*]] = load i8, i8* [[ARRAYIDX_12]], align 1
-; CHECK-NEXT: [[I13:%.*]] = load i8, i8* [[ARRAYIDX_13]], align 1
-; CHECK-NEXT: [[I14:%.*]] = load i8, i8* [[ARRAYIDX_14]], align 1
-; CHECK-NEXT: [[I15:%.*]] = load i8, i8* [[ARRAYIDX_15]], align 1
-; CHECK-NEXT: [[CONV5:%.*]] = zext i8 [[I]] to i16
-; CHECK-NEXT: [[CONV5_1:%.*]] = zext i8 [[I1]] to i16
-; CHECK-NEXT: [[CONV5_2:%.*]] = zext i8 [[I2]] to i16
-; CHECK-NEXT: [[CONV5_3:%.*]] = zext i8 [[I3]] to i16
-; CHECK-NEXT: [[CONV5_4:%.*]] = zext i8 [[I4]] to i16
-; CHECK-NEXT: [[CONV5_5:%.*]] = zext i8 [[I5]] to i16
-; CHECK-NEXT: [[CONV5_6:%.*]] = zext i8 [[I6]] to i16
-; CHECK-NEXT: [[CONV5_7:%.*]] = zext i8 [[I7]] to i16
-; CHECK-NEXT: [[CONV5_8:%.*]] = zext i8 [[I8]] to i16
-; CHECK-NEXT: [[CONV5_9:%.*]] = zext i8 [[I9]] to i16
-; CHECK-NEXT: [[CONV5_10:%.*]] = zext i8 [[I10]] to i16
-; CHECK-NEXT: [[CONV5_11:%.*]] = zext i8 [[I11]] to i16
-; CHECK-NEXT: [[CONV5_12:%.*]] = zext i8 [[I12]] to i16
-; CHECK-NEXT: [[CONV5_13:%.*]] = zext i8 [[I13]] to i16
-; CHECK-NEXT: [[CONV5_14:%.*]] = zext i8 [[I14]] to i16
-; CHECK-NEXT: [[CONV5_15:%.*]] = zext i8 [[I15]] to i16
-; CHECK-NEXT: [[SHL:%.*]] = shl nuw i16 [[CONV5]], 8
-; CHECK-NEXT: [[SHL_1:%.*]] = shl nuw i16 [[CONV5_1]], 8
-; CHECK-NEXT: [[SHL_2:%.*]] = shl nuw i16 [[CONV5_2]], 8
-; CHECK-NEXT: [[SHL_3:%.*]] = shl nuw i16 [[CONV5_3]], 8
-; CHECK-NEXT: [[SHL_4:%.*]] = shl nuw i16 [[CONV5_4]], 8
-; CHECK-NEXT: [[SHL_5:%.*]] = shl nuw i16 [[CONV5_5]], 8
-; CHECK-NEXT: [[SHL_6:%.*]] = shl nuw i16 [[CONV5_6]], 8
-; CHECK-NEXT: [[SHL_7:%.*]] = shl nuw i16 [[CONV5_7]], 8
-; CHECK-NEXT: [[SHL_8:%.*]] = shl nuw i16 [[CONV5_8]], 8
-; CHECK-NEXT: [[SHL_9:%.*]] = shl nuw i16 [[CONV5_9]], 8
-; CHECK-NEXT: [[SHL_10:%.*]] = shl nuw i16 [[CONV5_10]], 8
-; CHECK-NEXT: [[SHL_11:%.*]] = shl nuw i16 [[CONV5_11]], 8
-; CHECK-NEXT: [[SHL_12:%.*]] = shl nuw i16 [[CONV5_12]], 8
-; CHECK-NEXT: [[SHL_13:%.*]] = shl nuw i16 [[CONV5_13]], 8
-; CHECK-NEXT: [[SHL_14:%.*]] = shl nuw i16 [[CONV5_14]], 8
-; CHECK-NEXT: [[SHL_15:%.*]] = shl nuw i16 [[CONV5_15]], 8
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[A]] to <8 x i8>*
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[ARRAYIDX_8]] to <8 x i8>*
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[TMP3]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
+; CHECK-NEXT: [[TMP6:%.*]] = zext <8 x i8> [[TMP4]] to <8 x i16>
+; CHECK-NEXT: [[TMP7:%.*]] = shl nuw <8 x i16> [[TMP5]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT: [[TMP8:%.*]] = shl nuw <8 x i16> [[TMP6]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 2
; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 3
@@ -89,22 +48,10 @@ define void @PR50256(i8* %a, i16* %b, i32 %n) {
; CHECK-NEXT: [[ARRAYIDX3_13:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 13
; CHECK-NEXT: [[ARRAYIDX3_14:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 14
; CHECK-NEXT: [[ARRAYIDX3_15:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 15
-; CHECK-NEXT: store i16 [[SHL]], i16* [[B]], align 2
-; CHECK-NEXT: store i16 [[SHL_1]], i16* [[ARRAYIDX3_1]], align 2
-; CHECK-NEXT: store i16 [[SHL_2]], i16* [[ARRAYIDX3_2]], align 2
-; CHECK-NEXT: store i16 [[SHL_3]], i16* [[ARRAYIDX3_3]], align 2
-; CHECK-NEXT: store i16 [[SHL_4]], i16* [[ARRAYIDX3_4]], align 2
-; CHECK-NEXT: store i16 [[SHL_5]], i16* [[ARRAYIDX3_5]], align 2
-; CHECK-NEXT: store i16 [[SHL_6]], i16* [[ARRAYIDX3_6]], align 2
-; CHECK-NEXT: store i16 [[SHL_7]], i16* [[ARRAYIDX3_7]], align 2
-; CHECK-NEXT: store i16 [[SHL_8]], i16* [[ARRAYIDX3_8]], align 2
-; CHECK-NEXT: store i16 [[SHL_9]], i16* [[ARRAYIDX3_9]], align 2
-; CHECK-NEXT: store i16 [[SHL_10]], i16* [[ARRAYIDX3_10]], align 2
-; CHECK-NEXT: store i16 [[SHL_11]], i16* [[ARRAYIDX3_11]], align 2
-; CHECK-NEXT: store i16 [[SHL_12]], i16* [[ARRAYIDX3_12]], align 2
-; CHECK-NEXT: store i16 [[SHL_13]], i16* [[ARRAYIDX3_13]], align 2
-; CHECK-NEXT: store i16 [[SHL_14]], i16* [[ARRAYIDX3_14]], align 2
-; CHECK-NEXT: store i16 [[SHL_15]], i16* [[ARRAYIDX3_15]], align 2
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[B]] to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> [[TMP7]], <8 x i16>* [[TMP9]], align 2
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[ARRAYIDX3_8]] to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> [[TMP8]], <8 x i16>* [[TMP10]], align 2
; CHECK-NEXT: ret void
;
%arrayidx.1 = getelementptr inbounds i8, i8* %a, i64 1
More information about the llvm-commits
mailing list