[llvm] [SLP]Remove LoadCombine workaround after handling of the copyables (PR #174205)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 2 04:45:50 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Alexey Bataev (alexey-bataev)
<details>
<summary>Changes</summary>
LoadCombine pattern handling was added as a workaround for the cases,
where the SLP vectorizer could not vectorize the code effectively. With
the copyables support, it can handle it directly.
---
Patch is 45.82 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/174205.diff
5 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (-87)
- (modified) llvm/test/Transforms/PhaseOrdering/X86/loadcombine.ll (+48-204)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll (+32-228)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll (+4-17)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll (+4-17)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 98b64f52457d5..3900acbc8f223 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2254,23 +2254,6 @@ class slpvectorizer::BoUpSLP {
/// effectively than the base graph.
bool isTreeNotExtendable() const;
- /// Assume that a legal-sized 'or'-reduction of shifted/zexted loaded values
- /// can be load combined in the backend. Load combining may not be allowed in
- /// the IR optimizer, so we do not want to alter the pattern. For example,
- /// partially transforming a scalar bswap() pattern into vector code is
- /// effectively impossible for the backend to undo.
- /// TODO: If load combining is allowed in the IR optimizer, this analysis
- /// may not be necessary.
- bool isLoadCombineReductionCandidate(RecurKind RdxKind) const;
-
- /// Assume that a vector of stores of bitwise-or/shifted/zexted loaded values
- /// can be load combined in the backend. Load combining may not be allowed in
- /// the IR optimizer, so we do not want to alter the pattern. For example,
- /// partially transforming a scalar bswap() pattern into vector code is
- /// effectively impossible for the backend to undo.
- /// TODO: If load combining is allowed in the IR optimizer, this analysis
- /// may not be necessary.
- bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const;
bool isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
Align Alignment, const int64_t Diff,
const size_t Sz) const;
@@ -15608,69 +15591,6 @@ bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const {
return true;
}
-static bool isLoadCombineCandidateImpl(Value *Root, unsigned NumElts,
- TargetTransformInfo *TTI,
- bool MustMatchOrInst) {
- // Look past the root to find a source value. Arbitrarily follow the
- // path through operand 0 of any 'or'. Also, peek through optional
- // shift-left-by-multiple-of-8-bits.
- Value *ZextLoad = Root;
- const APInt *ShAmtC;
- bool FoundOr = false;
- while (!isa<ConstantExpr>(ZextLoad) &&
- (match(ZextLoad, m_Or(m_Value(), m_Value())) ||
- (match(ZextLoad, m_Shl(m_Value(), m_APInt(ShAmtC))) &&
- ShAmtC->urem(8) == 0))) {
- auto *BinOp = cast<BinaryOperator>(ZextLoad);
- ZextLoad = BinOp->getOperand(0);
- if (BinOp->getOpcode() == Instruction::Or)
- FoundOr = true;
- }
- // Check if the input is an extended load of the required or/shift expression.
- Value *Load;
- if ((MustMatchOrInst && !FoundOr) || ZextLoad == Root ||
- !match(ZextLoad, m_ZExt(m_Value(Load))) || !isa<LoadInst>(Load))
- return false;
-
- // Require that the total load bit width is a legal integer type.
- // For example, <8 x i8> --> i64 is a legal integer on a 64-bit target.
- // But <16 x i8> --> i128 is not, so the backend probably can't reduce it.
- Type *SrcTy = Load->getType();
- unsigned LoadBitWidth = SrcTy->getIntegerBitWidth() * NumElts;
- if (!TTI->isTypeLegal(IntegerType::get(Root->getContext(), LoadBitWidth)))
- return false;
-
- // Everything matched - assume that we can fold the whole sequence using
- // load combining.
- LLVM_DEBUG(dbgs() << "SLP: Assume load combining for tree starting at "
- << *(cast<Instruction>(Root)) << "\n");
-
- return true;
-}
-
-bool BoUpSLP::isLoadCombineReductionCandidate(RecurKind RdxKind) const {
- if (RdxKind != RecurKind::Or)
- return false;
-
- unsigned NumElts = VectorizableTree[0]->Scalars.size();
- Value *FirstReduced = VectorizableTree[0]->Scalars[0];
- return isLoadCombineCandidateImpl(FirstReduced, NumElts, TTI,
- /* MatchOr */ false);
-}
-
-bool BoUpSLP::isLoadCombineCandidate(ArrayRef<Value *> Stores) const {
- // Peek through a final sequence of stores and check if all operations are
- // likely to be load-combined.
- unsigned NumElts = Stores.size();
- for (Value *Scalar : Stores) {
- Value *X;
- if (!match(Scalar, m_Store(m_Value(X), m_Value())) ||
- !isLoadCombineCandidateImpl(X, NumElts, TTI, /* MatchOr */ true))
- return false;
- }
- return true;
-}
-
bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
if (!DebugCounter::shouldExecute(VectorizedGraphs))
return true;
@@ -23497,8 +23417,6 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
return false;
}
}
- if (R.isLoadCombineCandidate(Chain))
- return true;
R.buildTree(Chain);
// Check if tree tiny and store itself or its value is not vectorized.
if (R.isTreeTinyAndNotFullyVectorizable()) {
@@ -25112,11 +25030,6 @@ class HorizontalReduction {
V.analyzedReductionVals(VL);
continue;
}
- if (V.isLoadCombineReductionCandidate(RdxKind)) {
- if (!AdjustReducedVals())
- V.analyzedReductionVals(VL);
- continue;
- }
V.reorderTopToBottom();
// No need to reorder the root node at all for reassociative reduction.
V.reorderBottomToTop(/*IgnoreReorder=*/RdxFMF.allowReassoc() ||
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/loadcombine.ll b/llvm/test/Transforms/PhaseOrdering/X86/loadcombine.ll
index fe49ba9d61d98..d44ae86484316 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/loadcombine.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/loadcombine.ll
@@ -70,23 +70,10 @@ define i32 @loadCombine_4consecutive_1243(ptr %p) {
define i32 @loadCombine_4consecutive_1324(ptr %p) {
; CHECK-LABEL: @loadCombine_4consecutive_1324(
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
-; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8
-; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16
-; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24
-; CHECK-NEXT: [[O1:%.*]] = or disjoint i32 [[S2]], [[E1]]
-; CHECK-NEXT: [[O2:%.*]] = or disjoint i32 [[O1]], [[S3]]
-; CHECK-NEXT: [[O3:%.*]] = or disjoint i32 [[O2]], [[S4]]
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <4 x i32> [[TMP2]], <i32 0, i32 8, i32 16, i32 24>
+; CHECK-NEXT: [[O3:%.*]] = tail call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP3]])
; CHECK-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
@@ -114,23 +101,10 @@ define i32 @loadCombine_4consecutive_1324(ptr %p) {
define i32 @loadCombine_4consecutive_1342(ptr %p) {
; CHECK-LABEL: @loadCombine_4consecutive_1342(
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
-; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8
-; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16
-; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24
-; CHECK-NEXT: [[O1:%.*]] = or disjoint i32 [[S2]], [[E1]]
-; CHECK-NEXT: [[O2:%.*]] = or disjoint i32 [[O1]], [[S3]]
-; CHECK-NEXT: [[O3:%.*]] = or disjoint i32 [[O2]], [[S4]]
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <4 x i32> [[TMP2]], <i32 0, i32 8, i32 16, i32 24>
+; CHECK-NEXT: [[O3:%.*]] = tail call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP3]])
; CHECK-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
@@ -158,23 +132,10 @@ define i32 @loadCombine_4consecutive_1342(ptr %p) {
define i32 @loadCombine_4consecutive_1423(ptr %p) {
; CHECK-LABEL: @loadCombine_4consecutive_1423(
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
-; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8
-; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16
-; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24
-; CHECK-NEXT: [[O1:%.*]] = or disjoint i32 [[S2]], [[E1]]
-; CHECK-NEXT: [[O2:%.*]] = or disjoint i32 [[O1]], [[S3]]
-; CHECK-NEXT: [[O3:%.*]] = or disjoint i32 [[O2]], [[S4]]
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <4 x i32> [[TMP2]], <i32 0, i32 8, i32 16, i32 24>
+; CHECK-NEXT: [[O3:%.*]] = tail call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP3]])
; CHECK-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
@@ -202,23 +163,10 @@ define i32 @loadCombine_4consecutive_1423(ptr %p) {
define i32 @loadCombine_4consecutive_1432(ptr %p) {
; CHECK-LABEL: @loadCombine_4consecutive_1432(
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
-; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8
-; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16
-; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24
-; CHECK-NEXT: [[O1:%.*]] = or disjoint i32 [[S2]], [[E1]]
-; CHECK-NEXT: [[O2:%.*]] = or disjoint i32 [[O1]], [[S3]]
-; CHECK-NEXT: [[O3:%.*]] = or disjoint i32 [[O2]], [[S4]]
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <4 x i32> [[TMP2]], <i32 0, i32 8, i32 16, i32 24>
+; CHECK-NEXT: [[O3:%.*]] = tail call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP3]])
; CHECK-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
@@ -369,23 +317,10 @@ define i32 @loadCombine_4consecutive_2341(ptr %p) {
define i32 @loadCombine_4consecutive_2413(ptr %p) {
; CHECK-LABEL: @loadCombine_4consecutive_2413(
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
-; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8
-; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16
-; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24
-; CHECK-NEXT: [[O1:%.*]] = or disjoint i32 [[S2]], [[E1]]
-; CHECK-NEXT: [[O2:%.*]] = or disjoint i32 [[O1]], [[S3]]
-; CHECK-NEXT: [[O3:%.*]] = or disjoint i32 [[O2]], [[S4]]
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <4 x i32> [[TMP2]], <i32 0, i32 8, i32 16, i32 24>
+; CHECK-NEXT: [[O3:%.*]] = tail call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP3]])
; CHECK-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
@@ -413,23 +348,10 @@ define i32 @loadCombine_4consecutive_2413(ptr %p) {
define i32 @loadCombine_4consecutive_2431(ptr %p) {
; CHECK-LABEL: @loadCombine_4consecutive_2431(
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
-; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8
-; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16
-; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24
-; CHECK-NEXT: [[O1:%.*]] = or disjoint i32 [[S2]], [[E1]]
-; CHECK-NEXT: [[O2:%.*]] = or disjoint i32 [[O1]], [[S3]]
-; CHECK-NEXT: [[O3:%.*]] = or disjoint i32 [[O2]], [[S4]]
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <4 x i32> [[TMP2]], <i32 0, i32 8, i32 16, i32 24>
+; CHECK-NEXT: [[O3:%.*]] = tail call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP3]])
; CHECK-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
@@ -457,23 +379,10 @@ define i32 @loadCombine_4consecutive_2431(ptr %p) {
define i32 @loadCombine_4consecutive_3124(ptr %p) {
; CHECK-LABEL: @loadCombine_4consecutive_3124(
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
-; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8
-; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16
-; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24
-; CHECK-NEXT: [[O1:%.*]] = or disjoint i32 [[S2]], [[E1]]
-; CHECK-NEXT: [[O2:%.*]] = or disjoint i32 [[O1]], [[S3]]
-; CHECK-NEXT: [[O3:%.*]] = or disjoint i32 [[O2]], [[S4]]
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <4 x i32> [[TMP2]], <i32 0, i32 8, i32 16, i32 24>
+; CHECK-NEXT: [[O3:%.*]] = tail call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP3]])
; CHECK-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
@@ -501,23 +410,10 @@ define i32 @loadCombine_4consecutive_3124(ptr %p) {
define i32 @loadCombine_4consecutive_3142(ptr %p) {
; CHECK-LABEL: @loadCombine_4consecutive_3142(
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
-; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8
-; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16
-; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24
-; CHECK-NEXT: [[O1:%.*]] = or disjoint i32 [[S2]], [[E1]]
-; CHECK-NEXT: [[O2:%.*]] = or disjoint i32 [[O1]], [[S3]]
-; CHECK-NEXT: [[O3:%.*]] = or disjoint i32 [[O2]], [[S4]]
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <4 x i32> [[TMP2]], <i32 0, i32 8, i32 16, i32 24>
+; CHECK-NEXT: [[O3:%.*]] = tail call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP3]])
; CHECK-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
@@ -668,23 +564,10 @@ define i32 @loadCombine_4consecutive_3421(ptr %p) {
define i32 @loadCombine_4consecutive_4123(ptr %p) {
; CHECK-LABEL: @loadCombine_4consecutive_4123(
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
-; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; CHECK-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
-; CHECK-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
-; CHECK-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
-; CHECK-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
-; CHECK-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
-; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i32 [[E2]], 8
-; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i32 [[E3]], 16
-; CHECK-NEXT: [[S4:%.*]] = shl nuw i32 [[E4]], 24
-; CHECK-NEXT: [[O1:%.*]] = or disjoint i32 [[S2]], [[E1]]
-; CHECK-NEXT: [[O2:%.*]] = or disjoint i32 [[O1]], [[S3]]
-; CHECK-NEXT: [[O3:%.*]] = or disjoint i32 [[O2]], [[S4]]
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <4 x i32> [[TMP2]], <i32 0, i32 8, i32 16, i32 24>
+; CHECK-NEXT: [[O3:%.*]] = tail call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP3]])
; CHECK-NEXT: ret i32 [[O3]]
;
%p1 = getelementptr i8, ptr %p, i32 1
@@ -712,23 +595,10 @@ define i32 @loadCombine_4consecutive_4123(ptr %p) {
define i32 @loadCombine_4consecutive_4132(ptr %p) {
; CHECK-LABEL: @loadCombine_4consecutive_4132(
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i64 2
-; CHECK-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 3
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
-; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
-; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
-; CHECK-NEXT: [[L4:%.*...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/174205
More information about the llvm-commits
mailing list