[llvm] [InstCombine] Added pattern for recognising the construction of packed integers. (PR #147414)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 22 07:53:46 PDT 2025
https://github.com/zGoldthorpe updated https://github.com/llvm/llvm-project/pull/147414
>From d98ec0176f0425173c9ad4769513ccb72d313d0f Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Mon, 7 Jul 2025 17:09:29 -0500
Subject: [PATCH 1/4] Added pattern for folding packed integer constructions.
---
.../Transforms/Vectorize/VectorCombine.cpp | 125 ++++++++++++++++++
.../VectorCombine/packed-integers.ll | 108 +++++++++++++++
2 files changed, 233 insertions(+)
create mode 100644 llvm/test/Transforms/VectorCombine/packed-integers.ll
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index fe8d74c43dfdc..ce73a383d2555 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -125,6 +126,7 @@ class VectorCombine {
bool scalarizeLoadExtract(Instruction &I);
bool scalarizeExtExtract(Instruction &I);
bool foldConcatOfBoolMasks(Instruction &I);
+ bool foldIntegerPackFromVector(Instruction &I);
bool foldPermuteOfBinops(Instruction &I);
bool foldShuffleOfBinops(Instruction &I);
bool foldShuffleOfSelects(Instruction &I);
@@ -1957,6 +1959,126 @@ bool VectorCombine::foldConcatOfBoolMasks(Instruction &I) {
return true;
}
+/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns
+/// which extract vector elements and pack them in the same relative positions.
+static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
+ uint64_t &VecOffset,
+ SmallBitVector &Mask) {
+ static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) {
+ ShlAmt = 0;
+ return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base);
+ };
+
+ // First try to match extractelement -> zext -> shl
+ uint64_t VecIdx, ShlAmt;
+ if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt(
+ m_Value(Vec), m_ConstantInt(VecIdx))),
+ ShlAmt))) {
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ if (!VecTy)
+ return false;
+ auto *EltTy = dyn_cast<IntegerType>(VecTy->getElementType());
+ if (!EltTy)
+ return false;
+
+ const unsigned EltBitWidth = EltTy->getBitWidth();
+ const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth();
+ if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0)
+ return false;
+ const unsigned ShlEltAmt = ShlAmt / EltBitWidth;
+
+ if (ShlEltAmt > VecIdx)
+ return false;
+ VecOffset = VecIdx - ShlEltAmt;
+ Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth);
+ Mask.set(ShlEltAmt);
+ return true;
+ }
+
+ // Now try to match shufflevector -> bitcast
+ Value *Lhs, *Rhs;
+ ArrayRef<int> ShuffleMask;
+ if (!match(V, m_BitCast(m_Shuffle(m_Value(Lhs), m_Value(Rhs),
+ m_Mask(ShuffleMask)))))
+ return false;
+ Mask.resize(ShuffleMask.size());
+
+ if (isa<Constant>(Lhs))
+ std::swap(Lhs, Rhs);
+
+ auto *RhsConst = dyn_cast<Constant>(Rhs);
+ if (!RhsConst)
+ return false;
+
+ auto *LhsTy = dyn_cast<FixedVectorType>(Lhs->getType());
+ if (!LhsTy)
+ return false;
+
+ Vec = Lhs;
+ const unsigned NumLhsElts = LhsTy->getNumElements();
+ bool FoundVecOffset = false;
+ for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) {
+ if (ShuffleMask[Idx] == PoisonMaskElem)
+ return false;
+ const unsigned ShuffleIdx = ShuffleMask[Idx];
+ if (ShuffleIdx >= NumLhsElts) {
+ const unsigned RhsIdx = ShuffleIdx - NumLhsElts;
+ auto *RhsElt =
+ dyn_cast<ConstantInt>(RhsConst->getAggregateElement(RhsIdx));
+ if (!RhsElt || RhsElt->getZExtValue() != 0)
+ return false;
+ continue;
+ }
+
+ if (FoundVecOffset) {
+ if (VecOffset + Idx != ShuffleIdx)
+ return false;
+ } else {
+ if (ShuffleIdx < Idx)
+ return false;
+ VecOffset = ShuffleIdx - Idx;
+ FoundVecOffset = true;
+ }
+ Mask.set(Idx);
+ }
+ return FoundVecOffset;
+}
+/// Try to fold the or of two scalar integers whose contents are packed elements
+/// of the same vector.
+bool VectorCombine::foldIntegerPackFromVector(Instruction &I) {
+ assert(I.getOpcode() == Instruction::Or);
+ Value *LhsVec, *RhsVec;
+ uint64_t LhsVecOffset, RhsVecOffset;
+ SmallBitVector Mask;
+ if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset,
+ Mask))
+ return false;
+ if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset,
+ Mask))
+ return false;
+ if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset)
+ return false;
+
+ // Convert into shufflevector -> bitcast
+ SmallVector<int> ShuffleMask;
+ ShuffleMask.reserve(Mask.size());
+ const unsigned ZeroVecIdx =
+ cast<FixedVectorType>(LhsVec->getType())->getNumElements();
+ for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) {
+ if (Mask.test(Idx))
+ ShuffleMask.push_back(LhsVecOffset + Idx);
+ else
+ ShuffleMask.push_back(ZeroVecIdx);
+ }
+
+ Value *MaskedVec = Builder.CreateShuffleVector(
+ LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask,
+ LhsVec->getName() + ".extract");
+ Value *CastedVec = Builder.CreateBitCast(MaskedVec, I.getType(), I.getName());
+ replaceValue(I, *CastedVec);
+ return true;
+}
+
/// Try to convert "shuffle (binop (shuffle, shuffle)), undef"
/// --> "binop (shuffle), (shuffle)".
bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
@@ -3742,6 +3864,9 @@ bool VectorCombine::run() {
if (Opcode == Instruction::Store)
MadeChange |= foldSingleElementStore(I);
+ if (isa<IntegerType>(I.getType()) && Opcode == Instruction::Or)
+ MadeChange |= foldIntegerPackFromVector(I);
+
// If this is an early pipeline invocation of this pass, we are done.
if (TryEarlyFoldsOnly)
return;
diff --git a/llvm/test/Transforms/VectorCombine/packed-integers.ll b/llvm/test/Transforms/VectorCombine/packed-integers.ll
new file mode 100644
index 0000000000000..f01179bbde13c
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/packed-integers.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=vector-combine %s | FileCheck %s
+
+define i32 @bitcast.v2i(<4 x i8> %v) {
+; CHECK-LABEL: define i32 @bitcast.v2i(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <4 x i8> %v, i32 0
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i32 1
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i8> %v, i32 2
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+ %x.2 = or i32 %x.1, %s.2
+
+ %v.3 = extractelement <4 x i8> %v, i32 3
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.2, %s.3
+
+ ret i32 %x.3
+}
+
+define i32 @bitcast.v2i.tree(<4 x i8> %v) {
+; CHECK-LABEL: define i32 @bitcast.v2i.tree(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %v.0 = extractelement <4 x i8> %v, i32 0
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i32 1
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i8> %v, i32 2
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+
+ %v.3 = extractelement <4 x i8> %v, i32 3
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %s.2, %s.3
+
+ %x = or i32 %x.1, %x.3
+
+ ret i32 %x
+}
+
+define i32 @extract.i32(<8 x i8> %v) {
+; CHECK-LABEL: define i32 @extract.i32(
+; CHECK-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[V_EXTRACT4:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> zeroinitializer, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT4]] to i32
+; CHECK-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <8 x i8> %v, i32 3
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <8 x i8> %v, i32 4
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <8 x i8> %v, i32 5
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+ %x.2 = or i32 %x.1, %s.2
+
+ %v.3 = extractelement <8 x i8> %v, i32 6
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.2, %s.3
+
+ ret i32 %x.3
+}
+
+define i32 @partial(<4 x i8> %v) {
+; CHECK-LABEL: define i32 @partial(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[V_EXTRACT2:%.*]] = shufflevector <4 x i8> [[V]], <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT2]] to i32
+; CHECK-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <4 x i8> %v, i32 0
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i32 1
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.3 = extractelement <4 x i8> %v, i32 3
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.1, %s.3
+
+ ret i32 %x.3
+}
>From 95e74dc814749b20434cf910f4103d0b1a8f3dc2 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Fri, 11 Jul 2025 09:02:16 -0500
Subject: [PATCH 2/4] Incorporated reviewer feedback.
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index ce73a383d2555..4d67af7003de8 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2025,7 +2025,7 @@ static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
const unsigned RhsIdx = ShuffleIdx - NumLhsElts;
auto *RhsElt =
dyn_cast<ConstantInt>(RhsConst->getAggregateElement(RhsIdx));
- if (!RhsElt || RhsElt->getZExtValue() != 0)
+ if (!RhsElt || !RhsElt->isNullValue())
return false;
continue;
}
@@ -2059,17 +2059,12 @@ bool VectorCombine::foldIntegerPackFromVector(Instruction &I) {
if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset)
return false;
- // Convert into shufflevector -> bitcast
- SmallVector<int> ShuffleMask;
- ShuffleMask.reserve(Mask.size());
+ // Convert into shufflevector -> bitcast;
const unsigned ZeroVecIdx =
cast<FixedVectorType>(LhsVec->getType())->getNumElements();
- for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) {
- if (Mask.test(Idx))
- ShuffleMask.push_back(LhsVecOffset + Idx);
- else
- ShuffleMask.push_back(ZeroVecIdx);
- }
+ SmallVector<int> ShuffleMask(Mask.size(), ZeroVecIdx);
+ for (unsigned Idx : Mask.set_bits())
+ ShuffleMask[Idx] = LhsVecOffset + Idx;
Value *MaskedVec = Builder.CreateShuffleVector(
LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask,
>From 7abb1534cd5ce01023aca95c748852e344d8beb4 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <Zach.Goldthorpe at amd.com>
Date: Mon, 21 Jul 2025 15:13:37 -0500
Subject: [PATCH 3/4] Moved pattern to `instcombine`
---
.../InstCombine/InstCombineAndOrXor.cpp | 141 ++++++++++++++++++
.../Transforms/Vectorize/VectorCombine.cpp | 120 ---------------
.../or-packed-int-vecs.ll} | 13 +-
3 files changed, 148 insertions(+), 126 deletions(-)
rename llvm/test/Transforms/{VectorCombine/packed-integers.ll => InstCombine/or-packed-int-vecs.ll} (81%)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index dd16cfaeecd45..49cd60baf9fd5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -11,10 +11,13 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/FloatingPointPredicateUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
@@ -3592,6 +3595,141 @@ static Value *foldOrOfInversions(BinaryOperator &I,
return nullptr;
}
+/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns
+/// which extract vector elements and pack them in the same relative positions.
+static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
+ uint64_t &VecOffset,
+ SmallBitVector &Mask) {
+ static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) {
+ ShlAmt = 0;
+ return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base);
+ };
+
+ // First try to match extractelement -> zext -> shl
+ uint64_t VecIdx, ShlAmt;
+ if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt(
+ m_Value(Vec), m_ConstantInt(VecIdx))),
+ ShlAmt))) {
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ if (!VecTy)
+ return false;
+ auto *EltTy = dyn_cast<IntegerType>(VecTy->getElementType());
+ if (!EltTy)
+ return false;
+
+ const unsigned EltBitWidth = EltTy->getBitWidth();
+ const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth();
+ if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0)
+ return false;
+ const unsigned ShlEltAmt = ShlAmt / EltBitWidth;
+
+ if (ShlEltAmt > VecIdx)
+ return false;
+ VecOffset = VecIdx - ShlEltAmt;
+ Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth);
+ Mask.set(ShlEltAmt);
+ return true;
+ }
+
+ // Now try to match a bitcasted subvector.
+ Instruction *DstVecI;
+ if (!match(V, m_BitCast(m_Instruction(DstVecI))))
+ return false;
+
+ auto *DstTy = dyn_cast<FixedVectorType>(DstVecI->getType());
+ if (!DstTy)
+ return false;
+
+ Mask.resize(DstTy->getNumElements());
+
+ // First check for a subvector obtained from a shufflevector.
+ if (isa<ShuffleVectorInst>(DstVecI)) {
+ Constant *ConstVec;
+ ArrayRef<int> ShuffleMask;
+ if (!match(DstVecI, m_Shuffle(m_Value(Vec), m_Constant(ConstVec),
+ m_Mask(ShuffleMask))))
+ return false;
+
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ if (!VecTy)
+ return false;
+
+ const unsigned NumVecElts = VecTy->getNumElements();
+ bool FoundVecOffset = false;
+ for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) {
+ if (ShuffleMask[Idx] == PoisonMaskElem)
+ return false;
+ const unsigned ShuffleIdx = ShuffleMask[Idx];
+ if (ShuffleIdx >= NumVecElts) {
+ const unsigned ConstIdx = ShuffleIdx - NumVecElts;
+ auto *ConstElt =
+ dyn_cast<ConstantInt>(ConstVec->getAggregateElement(ConstIdx));
+ if (!ConstElt || !ConstElt->isNullValue())
+ return false;
+ continue;
+ }
+
+ if (FoundVecOffset) {
+ if (VecOffset + Idx != ShuffleIdx)
+ return false;
+ } else {
+ if (ShuffleIdx < Idx)
+ return false;
+ VecOffset = ShuffleIdx - Idx;
+ FoundVecOffset = true;
+ }
+ Mask.set(Idx);
+ }
+ return FoundVecOffset;
+ }
+
+ // Check for a subvector obtained as an (insertelement V, 0, idx)
+ uint64_t InsertIdx;
+ if (!match(DstVecI,
+ m_InsertElt(m_Value(Vec), m_Zero(), m_ConstantInt(InsertIdx))))
+ return false;
+
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ if (!VecTy)
+ return false;
+ VecOffset = 0;
+ bool AlreadyInsertedMaskedElt = Mask.test(InsertIdx);
+ Mask.set();
+ if (!AlreadyInsertedMaskedElt)
+ Mask.reset(InsertIdx);
+ return true;
+}
+
+/// Try to fold the or of two scalar integers whose contents are packed elements
+/// of the same vector.
+Instruction *foldIntegerPackFromVector(Instruction &I,
+ InstCombiner::BuilderTy &Builder) {
+ assert(I.getOpcode() == Instruction::Or);
+ Value *LhsVec, *RhsVec;
+ uint64_t LhsVecOffset, RhsVecOffset;
+ SmallBitVector Mask;
+ if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset,
+ Mask))
+ return nullptr;
+ if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset,
+ Mask))
+ return nullptr;
+ if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset)
+ return nullptr;
+
+ // Convert into shufflevector -> bitcast;
+ const unsigned ZeroVecIdx =
+ cast<FixedVectorType>(LhsVec->getType())->getNumElements();
+ SmallVector<int> ShuffleMask(Mask.size(), ZeroVecIdx);
+ for (unsigned Idx : Mask.set_bits())
+ ShuffleMask[Idx] = LhsVecOffset + Idx;
+
+ Value *MaskedVec = Builder.CreateShuffleVector(
+ LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask,
+ I.getName() + ".v");
+ return CastInst::Create(Instruction::BitCast, MaskedVec, I.getType());
+}
+
// A decomposition of ((X & Mask) * Factor). The NUW / NSW bools
// track these properities for preservation. Note that we can decompose
// equivalent select form of this expression (e.g. (!(X & Mask) ? 0 : Mask *
@@ -3688,6 +3826,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Instruction *X = foldComplexAndOrPatterns(I, Builder))
return X;
+ if (Instruction *X = foldIntegerPackFromVector(I, Builder))
+ return X;
+
// (A & B) | (C & D) -> A ^ D where A == ~C && B == ~D
// (A & B) | (C & D) -> A ^ C where A == ~D && B == ~C
if (Value *V = foldOrOfInversions(I, Builder))
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4d67af7003de8..fe8d74c43dfdc 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
-#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -126,7 +125,6 @@ class VectorCombine {
bool scalarizeLoadExtract(Instruction &I);
bool scalarizeExtExtract(Instruction &I);
bool foldConcatOfBoolMasks(Instruction &I);
- bool foldIntegerPackFromVector(Instruction &I);
bool foldPermuteOfBinops(Instruction &I);
bool foldShuffleOfBinops(Instruction &I);
bool foldShuffleOfSelects(Instruction &I);
@@ -1959,121 +1957,6 @@ bool VectorCombine::foldConcatOfBoolMasks(Instruction &I) {
return true;
}
-/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns
-/// which extract vector elements and pack them in the same relative positions.
-static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
- uint64_t &VecOffset,
- SmallBitVector &Mask) {
- static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) {
- ShlAmt = 0;
- return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base);
- };
-
- // First try to match extractelement -> zext -> shl
- uint64_t VecIdx, ShlAmt;
- if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt(
- m_Value(Vec), m_ConstantInt(VecIdx))),
- ShlAmt))) {
- auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
- if (!VecTy)
- return false;
- auto *EltTy = dyn_cast<IntegerType>(VecTy->getElementType());
- if (!EltTy)
- return false;
-
- const unsigned EltBitWidth = EltTy->getBitWidth();
- const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth();
- if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0)
- return false;
- const unsigned ShlEltAmt = ShlAmt / EltBitWidth;
-
- if (ShlEltAmt > VecIdx)
- return false;
- VecOffset = VecIdx - ShlEltAmt;
- Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth);
- Mask.set(ShlEltAmt);
- return true;
- }
-
- // Now try to match shufflevector -> bitcast
- Value *Lhs, *Rhs;
- ArrayRef<int> ShuffleMask;
- if (!match(V, m_BitCast(m_Shuffle(m_Value(Lhs), m_Value(Rhs),
- m_Mask(ShuffleMask)))))
- return false;
- Mask.resize(ShuffleMask.size());
-
- if (isa<Constant>(Lhs))
- std::swap(Lhs, Rhs);
-
- auto *RhsConst = dyn_cast<Constant>(Rhs);
- if (!RhsConst)
- return false;
-
- auto *LhsTy = dyn_cast<FixedVectorType>(Lhs->getType());
- if (!LhsTy)
- return false;
-
- Vec = Lhs;
- const unsigned NumLhsElts = LhsTy->getNumElements();
- bool FoundVecOffset = false;
- for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) {
- if (ShuffleMask[Idx] == PoisonMaskElem)
- return false;
- const unsigned ShuffleIdx = ShuffleMask[Idx];
- if (ShuffleIdx >= NumLhsElts) {
- const unsigned RhsIdx = ShuffleIdx - NumLhsElts;
- auto *RhsElt =
- dyn_cast<ConstantInt>(RhsConst->getAggregateElement(RhsIdx));
- if (!RhsElt || !RhsElt->isNullValue())
- return false;
- continue;
- }
-
- if (FoundVecOffset) {
- if (VecOffset + Idx != ShuffleIdx)
- return false;
- } else {
- if (ShuffleIdx < Idx)
- return false;
- VecOffset = ShuffleIdx - Idx;
- FoundVecOffset = true;
- }
- Mask.set(Idx);
- }
- return FoundVecOffset;
-}
-/// Try to fold the or of two scalar integers whose contents are packed elements
-/// of the same vector.
-bool VectorCombine::foldIntegerPackFromVector(Instruction &I) {
- assert(I.getOpcode() == Instruction::Or);
- Value *LhsVec, *RhsVec;
- uint64_t LhsVecOffset, RhsVecOffset;
- SmallBitVector Mask;
- if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset,
- Mask))
- return false;
- if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset,
- Mask))
- return false;
- if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset)
- return false;
-
- // Convert into shufflevector -> bitcast;
- const unsigned ZeroVecIdx =
- cast<FixedVectorType>(LhsVec->getType())->getNumElements();
- SmallVector<int> ShuffleMask(Mask.size(), ZeroVecIdx);
- for (unsigned Idx : Mask.set_bits())
- ShuffleMask[Idx] = LhsVecOffset + Idx;
-
- Value *MaskedVec = Builder.CreateShuffleVector(
- LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask,
- LhsVec->getName() + ".extract");
- Value *CastedVec = Builder.CreateBitCast(MaskedVec, I.getType(), I.getName());
- replaceValue(I, *CastedVec);
- return true;
-}
-
/// Try to convert "shuffle (binop (shuffle, shuffle)), undef"
/// --> "binop (shuffle), (shuffle)".
bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
@@ -3859,9 +3742,6 @@ bool VectorCombine::run() {
if (Opcode == Instruction::Store)
MadeChange |= foldSingleElementStore(I);
- if (isa<IntegerType>(I.getType()) && Opcode == Instruction::Or)
- MadeChange |= foldIntegerPackFromVector(I);
-
// If this is an early pipeline invocation of this pass, we are done.
if (TryEarlyFoldsOnly)
return;
diff --git a/llvm/test/Transforms/VectorCombine/packed-integers.ll b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
similarity index 81%
rename from llvm/test/Transforms/VectorCombine/packed-integers.ll
rename to llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
index f01179bbde13c..1bb520a323474 100644
--- a/llvm/test/Transforms/VectorCombine/packed-integers.ll
+++ b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes=vector-combine %s | FileCheck %s
+; RUN: opt -S -passes=instcombine %s | FileCheck %s
define i32 @bitcast.v2i(<4 x i8> %v) {
; CHECK-LABEL: define i32 @bitcast.v2i(
@@ -59,9 +59,10 @@ define i32 @bitcast.v2i.tree(<4 x i8> %v) {
define i32 @extract.i32(<8 x i8> %v) {
; CHECK-LABEL: define i32 @extract.i32(
; CHECK-SAME: <8 x i8> [[V:%.*]]) {
-; CHECK-NEXT: [[V_EXTRACT4:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> zeroinitializer, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT4]] to i32
-; CHECK-NEXT: ret i32 [[X_3]]
+; CHECK-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i8> [[X_3_V_EXTRACT]] to <2 x i32>
+; CHECK-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i32> [[X_3_V_BC]], i64 0
+; CHECK-NEXT: ret i32 [[X_3_V_EXTRACT1]]
;
%v.0 = extractelement <8 x i8> %v, i32 3
%z.0 = zext i8 %v.0 to i32
@@ -87,8 +88,8 @@ define i32 @extract.i32(<8 x i8> %v) {
define i32 @partial(<4 x i8> %v) {
; CHECK-LABEL: define i32 @partial(
; CHECK-SAME: <4 x i8> [[V:%.*]]) {
-; CHECK-NEXT: [[V_EXTRACT2:%.*]] = shufflevector <4 x i8> [[V]], <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
-; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT2]] to i32
+; CHECK-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i8> [[V]], i8 0, i64 2
+; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V1]] to i32
; CHECK-NEXT: ret i32 [[X_3]]
;
%v.0 = extractelement <4 x i8> %v, i32 0
>From e3638d56a52c81d479f6ec32005e0c066f561a52 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <Zach.Goldthorpe at amd.com>
Date: Tue, 22 Jul 2025 09:53:10 -0500
Subject: [PATCH 4/4] Added endian coverage to tests.
---
llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
index 1bb520a323474..1d6442a28eb59 100644
--- a/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
+++ b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes=instcombine %s | FileCheck %s
+; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=CHECK
+; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=CHECK
define i32 @bitcast.v2i(<4 x i8> %v) {
; CHECK-LABEL: define i32 @bitcast.v2i(
More information about the llvm-commits
mailing list