[llvm] [VectorCombine] Added pattern for recognising the construction of packed integers. (PR #147414)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 11 07:06:04 PDT 2025
https://github.com/zGoldthorpe updated https://github.com/llvm/llvm-project/pull/147414
>From d98ec0176f0425173c9ad4769513ccb72d313d0f Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Mon, 7 Jul 2025 17:09:29 -0500
Subject: [PATCH 1/2] Added pattern for folding packed integer constructions.
---
.../Transforms/Vectorize/VectorCombine.cpp | 125 ++++++++++++++++++
.../VectorCombine/packed-integers.ll | 108 +++++++++++++++
2 files changed, 233 insertions(+)
create mode 100644 llvm/test/Transforms/VectorCombine/packed-integers.ll
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index fe8d74c43dfdc..ce73a383d2555 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -125,6 +126,7 @@ class VectorCombine {
bool scalarizeLoadExtract(Instruction &I);
bool scalarizeExtExtract(Instruction &I);
bool foldConcatOfBoolMasks(Instruction &I);
+ bool foldIntegerPackFromVector(Instruction &I);
bool foldPermuteOfBinops(Instruction &I);
bool foldShuffleOfBinops(Instruction &I);
bool foldShuffleOfSelects(Instruction &I);
@@ -1957,6 +1959,126 @@ bool VectorCombine::foldConcatOfBoolMasks(Instruction &I) {
return true;
}
+/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns
+/// which extract vector elements and pack them in the same relative positions.
+static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
+ uint64_t &VecOffset,
+ SmallBitVector &Mask) {
+ static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) {
+ ShlAmt = 0;
+ return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base);
+ };
+
+ // First try to match extractelement -> zext -> shl
+ uint64_t VecIdx, ShlAmt;
+ if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt(
+ m_Value(Vec), m_ConstantInt(VecIdx))),
+ ShlAmt))) {
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ if (!VecTy)
+ return false;
+ auto *EltTy = dyn_cast<IntegerType>(VecTy->getElementType());
+ if (!EltTy)
+ return false;
+
+ const unsigned EltBitWidth = EltTy->getBitWidth();
+ const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth();
+ if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0)
+ return false;
+ const unsigned ShlEltAmt = ShlAmt / EltBitWidth;
+
+ if (ShlEltAmt > VecIdx)
+ return false;
+ VecOffset = VecIdx - ShlEltAmt;
+ Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth);
+ Mask.set(ShlEltAmt);
+ return true;
+ }
+
+ // Now try to match shufflevector -> bitcast
+ Value *Lhs, *Rhs;
+ ArrayRef<int> ShuffleMask;
+ if (!match(V, m_BitCast(m_Shuffle(m_Value(Lhs), m_Value(Rhs),
+ m_Mask(ShuffleMask)))))
+ return false;
+ Mask.resize(ShuffleMask.size());
+
+ if (isa<Constant>(Lhs))
+ std::swap(Lhs, Rhs);
+
+ auto *RhsConst = dyn_cast<Constant>(Rhs);
+ if (!RhsConst)
+ return false;
+
+ auto *LhsTy = dyn_cast<FixedVectorType>(Lhs->getType());
+ if (!LhsTy)
+ return false;
+
+ Vec = Lhs;
+ const unsigned NumLhsElts = LhsTy->getNumElements();
+ bool FoundVecOffset = false;
+ for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) {
+ if (ShuffleMask[Idx] == PoisonMaskElem)
+ return false;
+ const unsigned ShuffleIdx = ShuffleMask[Idx];
+ if (ShuffleIdx >= NumLhsElts) {
+ const unsigned RhsIdx = ShuffleIdx - NumLhsElts;
+ auto *RhsElt =
+ dyn_cast<ConstantInt>(RhsConst->getAggregateElement(RhsIdx));
+ if (!RhsElt || RhsElt->getZExtValue() != 0)
+ return false;
+ continue;
+ }
+
+ if (FoundVecOffset) {
+ if (VecOffset + Idx != ShuffleIdx)
+ return false;
+ } else {
+ if (ShuffleIdx < Idx)
+ return false;
+ VecOffset = ShuffleIdx - Idx;
+ FoundVecOffset = true;
+ }
+ Mask.set(Idx);
+ }
+ return FoundVecOffset;
+}
+/// Try to fold the or of two scalar integers whose contents are packed elements
+/// of the same vector.
+bool VectorCombine::foldIntegerPackFromVector(Instruction &I) {
+ assert(I.getOpcode() == Instruction::Or);
+ Value *LhsVec, *RhsVec;
+ uint64_t LhsVecOffset, RhsVecOffset;
+ SmallBitVector Mask;
+ if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset,
+ Mask))
+ return false;
+ if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset,
+ Mask))
+ return false;
+ if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset)
+ return false;
+
+ // Convert into shufflevector -> bitcast
+ SmallVector<int> ShuffleMask;
+ ShuffleMask.reserve(Mask.size());
+ const unsigned ZeroVecIdx =
+ cast<FixedVectorType>(LhsVec->getType())->getNumElements();
+ for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) {
+ if (Mask.test(Idx))
+ ShuffleMask.push_back(LhsVecOffset + Idx);
+ else
+ ShuffleMask.push_back(ZeroVecIdx);
+ }
+
+ Value *MaskedVec = Builder.CreateShuffleVector(
+ LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask,
+ LhsVec->getName() + ".extract");
+ Value *CastedVec = Builder.CreateBitCast(MaskedVec, I.getType(), I.getName());
+ replaceValue(I, *CastedVec);
+ return true;
+}
+
/// Try to convert "shuffle (binop (shuffle, shuffle)), undef"
/// --> "binop (shuffle), (shuffle)".
bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
@@ -3742,6 +3864,9 @@ bool VectorCombine::run() {
if (Opcode == Instruction::Store)
MadeChange |= foldSingleElementStore(I);
+ if (isa<IntegerType>(I.getType()) && Opcode == Instruction::Or)
+ MadeChange |= foldIntegerPackFromVector(I);
+
// If this is an early pipeline invocation of this pass, we are done.
if (TryEarlyFoldsOnly)
return;
diff --git a/llvm/test/Transforms/VectorCombine/packed-integers.ll b/llvm/test/Transforms/VectorCombine/packed-integers.ll
new file mode 100644
index 0000000000000..f01179bbde13c
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/packed-integers.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=vector-combine %s | FileCheck %s
+
+define i32 @bitcast.v2i(<4 x i8> %v) {
+; CHECK-LABEL: define i32 @bitcast.v2i(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <4 x i8> %v, i32 0
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i32 1
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i8> %v, i32 2
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+ %x.2 = or i32 %x.1, %s.2
+
+ %v.3 = extractelement <4 x i8> %v, i32 3
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.2, %s.3
+
+ ret i32 %x.3
+}
+
+define i32 @bitcast.v2i.tree(<4 x i8> %v) {
+; CHECK-LABEL: define i32 @bitcast.v2i.tree(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %v.0 = extractelement <4 x i8> %v, i32 0
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i32 1
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i8> %v, i32 2
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+
+ %v.3 = extractelement <4 x i8> %v, i32 3
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %s.2, %s.3
+
+ %x = or i32 %x.1, %x.3
+
+ ret i32 %x
+}
+
+define i32 @extract.i32(<8 x i8> %v) {
+; CHECK-LABEL: define i32 @extract.i32(
+; CHECK-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[V_EXTRACT4:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> zeroinitializer, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT4]] to i32
+; CHECK-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <8 x i8> %v, i32 3
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <8 x i8> %v, i32 4
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <8 x i8> %v, i32 5
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+ %x.2 = or i32 %x.1, %s.2
+
+ %v.3 = extractelement <8 x i8> %v, i32 6
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.2, %s.3
+
+ ret i32 %x.3
+}
+
+define i32 @partial(<4 x i8> %v) {
+; CHECK-LABEL: define i32 @partial(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[V_EXTRACT2:%.*]] = shufflevector <4 x i8> [[V]], <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT2]] to i32
+; CHECK-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <4 x i8> %v, i32 0
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i32 1
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.3 = extractelement <4 x i8> %v, i32 3
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.1, %s.3
+
+ ret i32 %x.3
+}
>From 95e74dc814749b20434cf910f4103d0b1a8f3dc2 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Fri, 11 Jul 2025 09:02:16 -0500
Subject: [PATCH 2/2] Incorporated reviewer feedback.
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index ce73a383d2555..4d67af7003de8 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2025,7 +2025,7 @@ static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
const unsigned RhsIdx = ShuffleIdx - NumLhsElts;
auto *RhsElt =
dyn_cast<ConstantInt>(RhsConst->getAggregateElement(RhsIdx));
- if (!RhsElt || RhsElt->getZExtValue() != 0)
+ if (!RhsElt || !RhsElt->isNullValue())
return false;
continue;
}
@@ -2059,17 +2059,12 @@ bool VectorCombine::foldIntegerPackFromVector(Instruction &I) {
if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset)
return false;
- // Convert into shufflevector -> bitcast
- SmallVector<int> ShuffleMask;
- ShuffleMask.reserve(Mask.size());
+ // Convert into shufflevector -> bitcast;
const unsigned ZeroVecIdx =
cast<FixedVectorType>(LhsVec->getType())->getNumElements();
- for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) {
- if (Mask.test(Idx))
- ShuffleMask.push_back(LhsVecOffset + Idx);
- else
- ShuffleMask.push_back(ZeroVecIdx);
- }
+ SmallVector<int> ShuffleMask(Mask.size(), ZeroVecIdx);
+ for (unsigned Idx : Mask.set_bits())
+ ShuffleMask[Idx] = LhsVecOffset + Idx;
Value *MaskedVec = Builder.CreateShuffleVector(
LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask,
More information about the llvm-commits
mailing list