[llvm] [InstCombine] Added pattern for recognising the construction of packed integers. (PR #147414)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 30 08:33:01 PDT 2025
https://github.com/zGoldthorpe updated https://github.com/llvm/llvm-project/pull/147414
>From d98ec0176f0425173c9ad4769513ccb72d313d0f Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Mon, 7 Jul 2025 17:09:29 -0500
Subject: [PATCH 1/6] Added pattern for folding packed integer constructions.
---
.../Transforms/Vectorize/VectorCombine.cpp | 125 ++++++++++++++++++
.../VectorCombine/packed-integers.ll | 108 +++++++++++++++
2 files changed, 233 insertions(+)
create mode 100644 llvm/test/Transforms/VectorCombine/packed-integers.ll
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index fe8d74c43dfdc..ce73a383d2555 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -125,6 +126,7 @@ class VectorCombine {
bool scalarizeLoadExtract(Instruction &I);
bool scalarizeExtExtract(Instruction &I);
bool foldConcatOfBoolMasks(Instruction &I);
+ bool foldIntegerPackFromVector(Instruction &I);
bool foldPermuteOfBinops(Instruction &I);
bool foldShuffleOfBinops(Instruction &I);
bool foldShuffleOfSelects(Instruction &I);
@@ -1957,6 +1959,126 @@ bool VectorCombine::foldConcatOfBoolMasks(Instruction &I) {
return true;
}
+/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns
+/// which extract vector elements and pack them in the same relative positions.
+static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
+ uint64_t &VecOffset,
+ SmallBitVector &Mask) {
+ static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) {
+ ShlAmt = 0;
+ return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base);
+ };
+
+ // First try to match extractelement -> zext -> shl
+ uint64_t VecIdx, ShlAmt;
+ if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt(
+ m_Value(Vec), m_ConstantInt(VecIdx))),
+ ShlAmt))) {
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ if (!VecTy)
+ return false;
+ auto *EltTy = dyn_cast<IntegerType>(VecTy->getElementType());
+ if (!EltTy)
+ return false;
+
+ const unsigned EltBitWidth = EltTy->getBitWidth();
+ const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth();
+ if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0)
+ return false;
+ const unsigned ShlEltAmt = ShlAmt / EltBitWidth;
+
+ if (ShlEltAmt > VecIdx)
+ return false;
+ VecOffset = VecIdx - ShlEltAmt;
+ Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth);
+ Mask.set(ShlEltAmt);
+ return true;
+ }
+
+ // Now try to match shufflevector -> bitcast
+ Value *Lhs, *Rhs;
+ ArrayRef<int> ShuffleMask;
+ if (!match(V, m_BitCast(m_Shuffle(m_Value(Lhs), m_Value(Rhs),
+ m_Mask(ShuffleMask)))))
+ return false;
+ Mask.resize(ShuffleMask.size());
+
+ if (isa<Constant>(Lhs))
+ std::swap(Lhs, Rhs);
+
+ auto *RhsConst = dyn_cast<Constant>(Rhs);
+ if (!RhsConst)
+ return false;
+
+ auto *LhsTy = dyn_cast<FixedVectorType>(Lhs->getType());
+ if (!LhsTy)
+ return false;
+
+ Vec = Lhs;
+ const unsigned NumLhsElts = LhsTy->getNumElements();
+ bool FoundVecOffset = false;
+ for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) {
+ if (ShuffleMask[Idx] == PoisonMaskElem)
+ return false;
+ const unsigned ShuffleIdx = ShuffleMask[Idx];
+ if (ShuffleIdx >= NumLhsElts) {
+ const unsigned RhsIdx = ShuffleIdx - NumLhsElts;
+ auto *RhsElt =
+ dyn_cast<ConstantInt>(RhsConst->getAggregateElement(RhsIdx));
+ if (!RhsElt || RhsElt->getZExtValue() != 0)
+ return false;
+ continue;
+ }
+
+ if (FoundVecOffset) {
+ if (VecOffset + Idx != ShuffleIdx)
+ return false;
+ } else {
+ if (ShuffleIdx < Idx)
+ return false;
+ VecOffset = ShuffleIdx - Idx;
+ FoundVecOffset = true;
+ }
+ Mask.set(Idx);
+ }
+ return FoundVecOffset;
+}
+/// Try to fold the or of two scalar integers whose contents are packed elements
+/// of the same vector.
+bool VectorCombine::foldIntegerPackFromVector(Instruction &I) {
+ assert(I.getOpcode() == Instruction::Or);
+ Value *LhsVec, *RhsVec;
+ uint64_t LhsVecOffset, RhsVecOffset;
+ SmallBitVector Mask;
+ if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset,
+ Mask))
+ return false;
+ if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset,
+ Mask))
+ return false;
+ if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset)
+ return false;
+
+ // Convert into shufflevector -> bitcast
+ SmallVector<int> ShuffleMask;
+ ShuffleMask.reserve(Mask.size());
+ const unsigned ZeroVecIdx =
+ cast<FixedVectorType>(LhsVec->getType())->getNumElements();
+ for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) {
+ if (Mask.test(Idx))
+ ShuffleMask.push_back(LhsVecOffset + Idx);
+ else
+ ShuffleMask.push_back(ZeroVecIdx);
+ }
+
+ Value *MaskedVec = Builder.CreateShuffleVector(
+ LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask,
+ LhsVec->getName() + ".extract");
+ Value *CastedVec = Builder.CreateBitCast(MaskedVec, I.getType(), I.getName());
+ replaceValue(I, *CastedVec);
+ return true;
+}
+
/// Try to convert "shuffle (binop (shuffle, shuffle)), undef"
/// --> "binop (shuffle), (shuffle)".
bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
@@ -3742,6 +3864,9 @@ bool VectorCombine::run() {
if (Opcode == Instruction::Store)
MadeChange |= foldSingleElementStore(I);
+ if (isa<IntegerType>(I.getType()) && Opcode == Instruction::Or)
+ MadeChange |= foldIntegerPackFromVector(I);
+
// If this is an early pipeline invocation of this pass, we are done.
if (TryEarlyFoldsOnly)
return;
diff --git a/llvm/test/Transforms/VectorCombine/packed-integers.ll b/llvm/test/Transforms/VectorCombine/packed-integers.ll
new file mode 100644
index 0000000000000..f01179bbde13c
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/packed-integers.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=vector-combine %s | FileCheck %s
+
+define i32 @bitcast.v2i(<4 x i8> %v) {
+; CHECK-LABEL: define i32 @bitcast.v2i(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <4 x i8> %v, i32 0
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i32 1
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i8> %v, i32 2
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+ %x.2 = or i32 %x.1, %s.2
+
+ %v.3 = extractelement <4 x i8> %v, i32 3
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.2, %s.3
+
+ ret i32 %x.3
+}
+
+define i32 @bitcast.v2i.tree(<4 x i8> %v) {
+; CHECK-LABEL: define i32 @bitcast.v2i.tree(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %v.0 = extractelement <4 x i8> %v, i32 0
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i32 1
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i8> %v, i32 2
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+
+ %v.3 = extractelement <4 x i8> %v, i32 3
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %s.2, %s.3
+
+ %x = or i32 %x.1, %x.3
+
+ ret i32 %x
+}
+
+define i32 @extract.i32(<8 x i8> %v) {
+; CHECK-LABEL: define i32 @extract.i32(
+; CHECK-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[V_EXTRACT4:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> zeroinitializer, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT4]] to i32
+; CHECK-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <8 x i8> %v, i32 3
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <8 x i8> %v, i32 4
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <8 x i8> %v, i32 5
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+ %x.2 = or i32 %x.1, %s.2
+
+ %v.3 = extractelement <8 x i8> %v, i32 6
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.2, %s.3
+
+ ret i32 %x.3
+}
+
+define i32 @partial(<4 x i8> %v) {
+; CHECK-LABEL: define i32 @partial(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[V_EXTRACT2:%.*]] = shufflevector <4 x i8> [[V]], <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT2]] to i32
+; CHECK-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <4 x i8> %v, i32 0
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i32 1
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.3 = extractelement <4 x i8> %v, i32 3
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.1, %s.3
+
+ ret i32 %x.3
+}
>From 95e74dc814749b20434cf910f4103d0b1a8f3dc2 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Fri, 11 Jul 2025 09:02:16 -0500
Subject: [PATCH 2/6] Incorporated reviewer feedback.
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index ce73a383d2555..4d67af7003de8 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2025,7 +2025,7 @@ static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
const unsigned RhsIdx = ShuffleIdx - NumLhsElts;
auto *RhsElt =
dyn_cast<ConstantInt>(RhsConst->getAggregateElement(RhsIdx));
- if (!RhsElt || RhsElt->getZExtValue() != 0)
+ if (!RhsElt || !RhsElt->isNullValue())
return false;
continue;
}
@@ -2059,17 +2059,12 @@ bool VectorCombine::foldIntegerPackFromVector(Instruction &I) {
if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset)
return false;
- // Convert into shufflevector -> bitcast
- SmallVector<int> ShuffleMask;
- ShuffleMask.reserve(Mask.size());
+ // Convert into shufflevector -> bitcast;
const unsigned ZeroVecIdx =
cast<FixedVectorType>(LhsVec->getType())->getNumElements();
- for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) {
- if (Mask.test(Idx))
- ShuffleMask.push_back(LhsVecOffset + Idx);
- else
- ShuffleMask.push_back(ZeroVecIdx);
- }
+ SmallVector<int> ShuffleMask(Mask.size(), ZeroVecIdx);
+ for (unsigned Idx : Mask.set_bits())
+ ShuffleMask[Idx] = LhsVecOffset + Idx;
Value *MaskedVec = Builder.CreateShuffleVector(
LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask,
>From 7abb1534cd5ce01023aca95c748852e344d8beb4 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <Zach.Goldthorpe at amd.com>
Date: Mon, 21 Jul 2025 15:13:37 -0500
Subject: [PATCH 3/6] Moved pattern to `instcombine`
---
.../InstCombine/InstCombineAndOrXor.cpp | 141 ++++++++++++++++++
.../Transforms/Vectorize/VectorCombine.cpp | 120 ---------------
.../or-packed-int-vecs.ll} | 13 +-
3 files changed, 148 insertions(+), 126 deletions(-)
rename llvm/test/Transforms/{VectorCombine/packed-integers.ll => InstCombine/or-packed-int-vecs.ll} (81%)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index dd16cfaeecd45..49cd60baf9fd5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -11,10 +11,13 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/FloatingPointPredicateUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
@@ -3592,6 +3595,141 @@ static Value *foldOrOfInversions(BinaryOperator &I,
return nullptr;
}
+/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns
+/// which extract vector elements and pack them in the same relative positions.
+static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
+ uint64_t &VecOffset,
+ SmallBitVector &Mask) {
+ static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) {
+ ShlAmt = 0;
+ return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base);
+ };
+
+ // First try to match extractelement -> zext -> shl
+ uint64_t VecIdx, ShlAmt;
+ if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt(
+ m_Value(Vec), m_ConstantInt(VecIdx))),
+ ShlAmt))) {
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ if (!VecTy)
+ return false;
+ auto *EltTy = dyn_cast<IntegerType>(VecTy->getElementType());
+ if (!EltTy)
+ return false;
+
+ const unsigned EltBitWidth = EltTy->getBitWidth();
+ const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth();
+ if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0)
+ return false;
+ const unsigned ShlEltAmt = ShlAmt / EltBitWidth;
+
+ if (ShlEltAmt > VecIdx)
+ return false;
+ VecOffset = VecIdx - ShlEltAmt;
+ Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth);
+ Mask.set(ShlEltAmt);
+ return true;
+ }
+
+ // Now try to match a bitcasted subvector.
+ Instruction *DstVecI;
+ if (!match(V, m_BitCast(m_Instruction(DstVecI))))
+ return false;
+
+ auto *DstTy = dyn_cast<FixedVectorType>(DstVecI->getType());
+ if (!DstTy)
+ return false;
+
+ Mask.resize(DstTy->getNumElements());
+
+ // First check for a subvector obtained from a shufflevector.
+ if (isa<ShuffleVectorInst>(DstVecI)) {
+ Constant *ConstVec;
+ ArrayRef<int> ShuffleMask;
+ if (!match(DstVecI, m_Shuffle(m_Value(Vec), m_Constant(ConstVec),
+ m_Mask(ShuffleMask))))
+ return false;
+
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ if (!VecTy)
+ return false;
+
+ const unsigned NumVecElts = VecTy->getNumElements();
+ bool FoundVecOffset = false;
+ for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) {
+ if (ShuffleMask[Idx] == PoisonMaskElem)
+ return false;
+ const unsigned ShuffleIdx = ShuffleMask[Idx];
+ if (ShuffleIdx >= NumVecElts) {
+ const unsigned ConstIdx = ShuffleIdx - NumVecElts;
+ auto *ConstElt =
+ dyn_cast<ConstantInt>(ConstVec->getAggregateElement(ConstIdx));
+ if (!ConstElt || !ConstElt->isNullValue())
+ return false;
+ continue;
+ }
+
+ if (FoundVecOffset) {
+ if (VecOffset + Idx != ShuffleIdx)
+ return false;
+ } else {
+ if (ShuffleIdx < Idx)
+ return false;
+ VecOffset = ShuffleIdx - Idx;
+ FoundVecOffset = true;
+ }
+ Mask.set(Idx);
+ }
+ return FoundVecOffset;
+ }
+
+ // Check for a subvector obtained as an (insertelement V, 0, idx)
+ uint64_t InsertIdx;
+ if (!match(DstVecI,
+ m_InsertElt(m_Value(Vec), m_Zero(), m_ConstantInt(InsertIdx))))
+ return false;
+
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ if (!VecTy)
+ return false;
+ VecOffset = 0;
+ bool AlreadyInsertedMaskedElt = Mask.test(InsertIdx);
+ Mask.set();
+ if (!AlreadyInsertedMaskedElt)
+ Mask.reset(InsertIdx);
+ return true;
+}
+
+/// Try to fold the or of two scalar integers whose contents are packed elements
+/// of the same vector.
+Instruction *foldIntegerPackFromVector(Instruction &I,
+ InstCombiner::BuilderTy &Builder) {
+ assert(I.getOpcode() == Instruction::Or);
+ Value *LhsVec, *RhsVec;
+ uint64_t LhsVecOffset, RhsVecOffset;
+ SmallBitVector Mask;
+ if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset,
+ Mask))
+ return nullptr;
+ if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset,
+ Mask))
+ return nullptr;
+ if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset)
+ return nullptr;
+
+ // Convert into shufflevector -> bitcast;
+ const unsigned ZeroVecIdx =
+ cast<FixedVectorType>(LhsVec->getType())->getNumElements();
+ SmallVector<int> ShuffleMask(Mask.size(), ZeroVecIdx);
+ for (unsigned Idx : Mask.set_bits())
+ ShuffleMask[Idx] = LhsVecOffset + Idx;
+
+ Value *MaskedVec = Builder.CreateShuffleVector(
+ LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask,
+ I.getName() + ".v");
+ return CastInst::Create(Instruction::BitCast, MaskedVec, I.getType());
+}
+
// A decomposition of ((X & Mask) * Factor). The NUW / NSW bools
// track these properities for preservation. Note that we can decompose
// equivalent select form of this expression (e.g. (!(X & Mask) ? 0 : Mask *
@@ -3688,6 +3826,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Instruction *X = foldComplexAndOrPatterns(I, Builder))
return X;
+ if (Instruction *X = foldIntegerPackFromVector(I, Builder))
+ return X;
+
// (A & B) | (C & D) -> A ^ D where A == ~C && B == ~D
// (A & B) | (C & D) -> A ^ C where A == ~D && B == ~C
if (Value *V = foldOrOfInversions(I, Builder))
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4d67af7003de8..fe8d74c43dfdc 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
-#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -126,7 +125,6 @@ class VectorCombine {
bool scalarizeLoadExtract(Instruction &I);
bool scalarizeExtExtract(Instruction &I);
bool foldConcatOfBoolMasks(Instruction &I);
- bool foldIntegerPackFromVector(Instruction &I);
bool foldPermuteOfBinops(Instruction &I);
bool foldShuffleOfBinops(Instruction &I);
bool foldShuffleOfSelects(Instruction &I);
@@ -1959,121 +1957,6 @@ bool VectorCombine::foldConcatOfBoolMasks(Instruction &I) {
return true;
}
-/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns
-/// which extract vector elements and pack them in the same relative positions.
-static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
- uint64_t &VecOffset,
- SmallBitVector &Mask) {
- static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) {
- ShlAmt = 0;
- return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base);
- };
-
- // First try to match extractelement -> zext -> shl
- uint64_t VecIdx, ShlAmt;
- if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt(
- m_Value(Vec), m_ConstantInt(VecIdx))),
- ShlAmt))) {
- auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
- if (!VecTy)
- return false;
- auto *EltTy = dyn_cast<IntegerType>(VecTy->getElementType());
- if (!EltTy)
- return false;
-
- const unsigned EltBitWidth = EltTy->getBitWidth();
- const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth();
- if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0)
- return false;
- const unsigned ShlEltAmt = ShlAmt / EltBitWidth;
-
- if (ShlEltAmt > VecIdx)
- return false;
- VecOffset = VecIdx - ShlEltAmt;
- Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth);
- Mask.set(ShlEltAmt);
- return true;
- }
-
- // Now try to match shufflevector -> bitcast
- Value *Lhs, *Rhs;
- ArrayRef<int> ShuffleMask;
- if (!match(V, m_BitCast(m_Shuffle(m_Value(Lhs), m_Value(Rhs),
- m_Mask(ShuffleMask)))))
- return false;
- Mask.resize(ShuffleMask.size());
-
- if (isa<Constant>(Lhs))
- std::swap(Lhs, Rhs);
-
- auto *RhsConst = dyn_cast<Constant>(Rhs);
- if (!RhsConst)
- return false;
-
- auto *LhsTy = dyn_cast<FixedVectorType>(Lhs->getType());
- if (!LhsTy)
- return false;
-
- Vec = Lhs;
- const unsigned NumLhsElts = LhsTy->getNumElements();
- bool FoundVecOffset = false;
- for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) {
- if (ShuffleMask[Idx] == PoisonMaskElem)
- return false;
- const unsigned ShuffleIdx = ShuffleMask[Idx];
- if (ShuffleIdx >= NumLhsElts) {
- const unsigned RhsIdx = ShuffleIdx - NumLhsElts;
- auto *RhsElt =
- dyn_cast<ConstantInt>(RhsConst->getAggregateElement(RhsIdx));
- if (!RhsElt || !RhsElt->isNullValue())
- return false;
- continue;
- }
-
- if (FoundVecOffset) {
- if (VecOffset + Idx != ShuffleIdx)
- return false;
- } else {
- if (ShuffleIdx < Idx)
- return false;
- VecOffset = ShuffleIdx - Idx;
- FoundVecOffset = true;
- }
- Mask.set(Idx);
- }
- return FoundVecOffset;
-}
-/// Try to fold the or of two scalar integers whose contents are packed elements
-/// of the same vector.
-bool VectorCombine::foldIntegerPackFromVector(Instruction &I) {
- assert(I.getOpcode() == Instruction::Or);
- Value *LhsVec, *RhsVec;
- uint64_t LhsVecOffset, RhsVecOffset;
- SmallBitVector Mask;
- if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset,
- Mask))
- return false;
- if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset,
- Mask))
- return false;
- if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset)
- return false;
-
- // Convert into shufflevector -> bitcast;
- const unsigned ZeroVecIdx =
- cast<FixedVectorType>(LhsVec->getType())->getNumElements();
- SmallVector<int> ShuffleMask(Mask.size(), ZeroVecIdx);
- for (unsigned Idx : Mask.set_bits())
- ShuffleMask[Idx] = LhsVecOffset + Idx;
-
- Value *MaskedVec = Builder.CreateShuffleVector(
- LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask,
- LhsVec->getName() + ".extract");
- Value *CastedVec = Builder.CreateBitCast(MaskedVec, I.getType(), I.getName());
- replaceValue(I, *CastedVec);
- return true;
-}
-
/// Try to convert "shuffle (binop (shuffle, shuffle)), undef"
/// --> "binop (shuffle), (shuffle)".
bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
@@ -3859,9 +3742,6 @@ bool VectorCombine::run() {
if (Opcode == Instruction::Store)
MadeChange |= foldSingleElementStore(I);
- if (isa<IntegerType>(I.getType()) && Opcode == Instruction::Or)
- MadeChange |= foldIntegerPackFromVector(I);
-
// If this is an early pipeline invocation of this pass, we are done.
if (TryEarlyFoldsOnly)
return;
diff --git a/llvm/test/Transforms/VectorCombine/packed-integers.ll b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
similarity index 81%
rename from llvm/test/Transforms/VectorCombine/packed-integers.ll
rename to llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
index f01179bbde13c..1bb520a323474 100644
--- a/llvm/test/Transforms/VectorCombine/packed-integers.ll
+++ b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes=vector-combine %s | FileCheck %s
+; RUN: opt -S -passes=instcombine %s | FileCheck %s
define i32 @bitcast.v2i(<4 x i8> %v) {
; CHECK-LABEL: define i32 @bitcast.v2i(
@@ -59,9 +59,10 @@ define i32 @bitcast.v2i.tree(<4 x i8> %v) {
define i32 @extract.i32(<8 x i8> %v) {
; CHECK-LABEL: define i32 @extract.i32(
; CHECK-SAME: <8 x i8> [[V:%.*]]) {
-; CHECK-NEXT: [[V_EXTRACT4:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> zeroinitializer, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT4]] to i32
-; CHECK-NEXT: ret i32 [[X_3]]
+; CHECK-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i8> [[X_3_V_EXTRACT]] to <2 x i32>
+; CHECK-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i32> [[X_3_V_BC]], i64 0
+; CHECK-NEXT: ret i32 [[X_3_V_EXTRACT1]]
;
%v.0 = extractelement <8 x i8> %v, i32 3
%z.0 = zext i8 %v.0 to i32
@@ -87,8 +88,8 @@ define i32 @extract.i32(<8 x i8> %v) {
define i32 @partial(<4 x i8> %v) {
; CHECK-LABEL: define i32 @partial(
; CHECK-SAME: <4 x i8> [[V:%.*]]) {
-; CHECK-NEXT: [[V_EXTRACT2:%.*]] = shufflevector <4 x i8> [[V]], <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
-; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT2]] to i32
+; CHECK-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i8> [[V]], i8 0, i64 2
+; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V1]] to i32
; CHECK-NEXT: ret i32 [[X_3]]
;
%v.0 = extractelement <4 x i8> %v, i32 0
>From e3638d56a52c81d479f6ec32005e0c066f561a52 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <Zach.Goldthorpe at amd.com>
Date: Tue, 22 Jul 2025 09:53:10 -0500
Subject: [PATCH 4/6] Added endian coverage to tests.
---
llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
index 1bb520a323474..1d6442a28eb59 100644
--- a/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
+++ b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes=instcombine %s | FileCheck %s
+; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=CHECK
+; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=CHECK
define i32 @bitcast.v2i(<4 x i8> %v) {
; CHECK-LABEL: define i32 @bitcast.v2i(
>From 2d929bd8aafbb3c48f57820fa33ac9eeb6c443a1 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <Zach.Goldthorpe at amd.com>
Date: Tue, 22 Jul 2025 16:20:26 -0500
Subject: [PATCH 5/6] Corrected handling of big-endian vectors.
---
.../InstCombine/InstCombineAndOrXor.cpp | 47 +-
.../InstCombine/or-packed-int-vecs.ll | 674 ++++++++++++++++--
2 files changed, 664 insertions(+), 57 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 49cd60baf9fd5..852b5fff53b3d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3595,11 +3595,19 @@ static Value *foldOrOfInversions(BinaryOperator &I,
return nullptr;
}
-/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns
-/// which extract vector elements and pack them in the same relative positions.
+/// Match \p V as "shufflevector -> bitcast" or "extractelement -> zext -> shl"
+/// patterns, which extract vector elements and pack them in the same relative
+/// positions.
+///
+/// \p Vec is the underlying vector being extracted from.
+/// \p Mask is a bitmask identifying which packed elements are obtained from the
+/// vector.
+/// \p VecOffset is the vector element corresponding to index 0 of the
+/// mask.
static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
- uint64_t &VecOffset,
- SmallBitVector &Mask) {
+ int64_t &VecOffset,
+ SmallBitVector &Mask,
+ const DataLayout &DL) {
static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) {
ShlAmt = 0;
return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base);
@@ -3621,13 +3629,15 @@ static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth();
if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0)
return false;
+ const unsigned TargetEltWidth = TargetBitWidth / EltBitWidth;
const unsigned ShlEltAmt = ShlAmt / EltBitWidth;
- if (ShlEltAmt > VecIdx)
- return false;
- VecOffset = VecIdx - ShlEltAmt;
- Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth);
- Mask.set(ShlEltAmt);
+ const unsigned MaskIdx =
+ DL.isLittleEndian() ? ShlEltAmt : TargetEltWidth - ShlEltAmt - 1;
+
+ VecOffset = static_cast<int64_t>(VecIdx) - static_cast<int64_t>(MaskIdx);
+ Mask.resize(TargetEltWidth);
+ Mask.set(MaskIdx);
return true;
}
@@ -3700,19 +3710,20 @@ static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
return true;
}
-/// Try to fold the or of two scalar integers whose contents are packed elements
-/// of the same vector.
+/// Try to fold the join of two scalar integers whose contents are packed
+/// elements of the same vector.
Instruction *foldIntegerPackFromVector(Instruction &I,
- InstCombiner::BuilderTy &Builder) {
+ InstCombiner::BuilderTy &Builder,
+ const DataLayout &DL) {
assert(I.getOpcode() == Instruction::Or);
Value *LhsVec, *RhsVec;
- uint64_t LhsVecOffset, RhsVecOffset;
+ int64_t LhsVecOffset, RhsVecOffset;
SmallBitVector Mask;
if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset,
- Mask))
+ Mask, DL))
return nullptr;
if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset,
- Mask))
+ Mask, DL))
return nullptr;
if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset)
return nullptr;
@@ -3721,8 +3732,10 @@ Instruction *foldIntegerPackFromVector(Instruction &I,
const unsigned ZeroVecIdx =
cast<FixedVectorType>(LhsVec->getType())->getNumElements();
SmallVector<int> ShuffleMask(Mask.size(), ZeroVecIdx);
- for (unsigned Idx : Mask.set_bits())
+ for (unsigned Idx : Mask.set_bits()) {
+ assert(LhsVecOffset + Idx >= 0);
ShuffleMask[Idx] = LhsVecOffset + Idx;
+ }
Value *MaskedVec = Builder.CreateShuffleVector(
LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask,
@@ -3826,7 +3839,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Instruction *X = foldComplexAndOrPatterns(I, Builder))
return X;
- if (Instruction *X = foldIntegerPackFromVector(I, Builder))
+ if (Instruction *X = foldIntegerPackFromVector(I, Builder, DL))
return X;
// (A & B) | (C & D) -> A ^ D where A == ~C && B == ~D
diff --git a/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
index 1d6442a28eb59..24d16553b7792 100644
--- a/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
+++ b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
@@ -1,27 +1,45 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=CHECK
-; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=CHECK
+; RUN: opt < %s -passes=instcombine -data-layout="E" -S | FileCheck %s --check-prefixes=CHECK-BE
+; RUN: opt < %s -passes=instcombine -data-layout="e" -S | FileCheck %s --check-prefixes=CHECK-LE
-define i32 @bitcast.v2i(<4 x i8> %v) {
-; CHECK-LABEL: define i32 @bitcast.v2i(
-; CHECK-SAME: <4 x i8> [[V:%.*]]) {
-; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32
-; CHECK-NEXT: ret i32 [[X_3]]
+define i32 @bitcast.v2i.le(<4 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @bitcast.v2i.le(
+; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 0
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 1
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <4 x i8> [[V]], i64 2
+; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32
+; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16
+; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i32 [[X_1]], [[S_2]]
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]]
+; CHECK-BE-NEXT: ret i32 [[X_3]]
;
- %v.0 = extractelement <4 x i8> %v, i32 0
+; CHECK-LE-LABEL: define i32 @bitcast.v2i.le(
+; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-LE-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <4 x i8> %v, i64 0
%z.0 = zext i8 %v.0 to i32
- %v.1 = extractelement <4 x i8> %v, i32 1
+ %v.1 = extractelement <4 x i8> %v, i64 1
%z.1 = zext i8 %v.1 to i32
%s.1 = shl i32 %z.1, 8
%x.1 = or i32 %z.0, %s.1
- %v.2 = extractelement <4 x i8> %v, i32 2
+ %v.2 = extractelement <4 x i8> %v, i64 2
%z.2 = zext i8 %v.2 to i32
%s.2 = shl i32 %z.2, 16
%x.2 = or i32 %x.1, %s.2
- %v.3 = extractelement <4 x i8> %v, i32 3
+ %v.3 = extractelement <4 x i8> %v, i64 3
%z.3 = zext i8 %v.3 to i32
%s.3 = shl i32 %z.3, 24
%x.3 = or i32 %x.2, %s.3
@@ -29,25 +47,178 @@ define i32 @bitcast.v2i(<4 x i8> %v) {
ret i32 %x.3
}
-define i32 @bitcast.v2i.tree(<4 x i8> %v) {
-; CHECK-LABEL: define i32 @bitcast.v2i.tree(
-; CHECK-SAME: <4 x i8> [[V:%.*]]) {
-; CHECK-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32
-; CHECK-NEXT: ret i32 [[X]]
+define i32 @bitcast.v2i.be(<4 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @bitcast.v2i.be(
+; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-BE-NEXT: ret i32 [[X_3]]
+;
+; CHECK-LE-LABEL: define i32 @bitcast.v2i.be(
+; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 2
+; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8
+; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <4 x i8> [[V]], i64 1
+; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32
+; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16
+; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i32 [[X_1]], [[S_2]]
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 0
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]]
+; CHECK-LE-NEXT: ret i32 [[X_3]]
;
- %v.0 = extractelement <4 x i8> %v, i32 0
+ %v.0 = extractelement <4 x i8> %v, i64 3
%z.0 = zext i8 %v.0 to i32
- %v.1 = extractelement <4 x i8> %v, i32 1
+ %v.1 = extractelement <4 x i8> %v, i64 2
%z.1 = zext i8 %v.1 to i32
%s.1 = shl i32 %z.1, 8
%x.1 = or i32 %z.0, %s.1
- %v.2 = extractelement <4 x i8> %v, i32 2
+ %v.2 = extractelement <4 x i8> %v, i64 1
%z.2 = zext i8 %v.2 to i32
%s.2 = shl i32 %z.2, 16
+ %x.2 = or i32 %x.1, %s.2
- %v.3 = extractelement <4 x i8> %v, i32 3
+ %v.3 = extractelement <4 x i8> %v, i64 0
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.2, %s.3
+
+ ret i32 %x.3
+}
+
+define i64 @bitcast.v2i.le.i16(<4 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @bitcast.v2i.le.i16(
+; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 0
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 1
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <4 x i16> [[V]], i64 2
+; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64
+; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32
+; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]]
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]]
+; CHECK-BE-NEXT: ret i64 [[X_3]]
+;
+; CHECK-LE-LABEL: define i64 @bitcast.v2i.le.i16(
+; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[V]] to i64
+; CHECK-LE-NEXT: ret i64 [[X_3]]
+;
+ %v.0 = extractelement <4 x i16> %v, i64 0
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <4 x i16> %v, i64 1
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 16
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i16> %v, i64 2
+ %z.2 = zext i16 %v.2 to i64
+ %s.2 = shl i64 %z.2, 32
+ %x.2 = or i64 %x.1, %s.2
+
+ %v.3 = extractelement <4 x i16> %v, i64 3
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %x.2, %s.3
+
+ ret i64 %x.3
+}
+
+define i64 @bitcast.v2i.be.i16(<4 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @bitcast.v2i.be.i16(
+; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[V]] to i64
+; CHECK-BE-NEXT: ret i64 [[X_3]]
+;
+; CHECK-LE-LABEL: define i64 @bitcast.v2i.be.i16(
+; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 2
+; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16
+; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <4 x i16> [[V]], i64 1
+; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64
+; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32
+; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]]
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 0
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]]
+; CHECK-LE-NEXT: ret i64 [[X_3]]
+;
+ %v.0 = extractelement <4 x i16> %v, i64 3
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <4 x i16> %v, i64 2
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 16
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i16> %v, i64 1
+ %z.2 = zext i16 %v.2 to i64
+ %s.2 = shl i64 %z.2, 32
+ %x.2 = or i64 %x.1, %s.2
+
+ %v.3 = extractelement <4 x i16> %v, i64 0
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %x.2, %s.3
+
+ ret i64 %x.3
+}
+
+define i32 @bitcast.v2i.le.tree(<4 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @bitcast.v2i.le.tree(
+; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 0
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 1
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <4 x i8> [[V]], i64 2
+; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32
+; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[S_2]], [[S_3]]
+; CHECK-BE-NEXT: [[X:%.*]] = or disjoint i32 [[X_1]], [[X_3]]
+; CHECK-BE-NEXT: ret i32 [[X]]
+;
+; CHECK-LE-LABEL: define i32 @bitcast.v2i.le.tree(
+; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-LE-NEXT: ret i32 [[X]]
+;
+ %v.0 = extractelement <4 x i8> %v, i64 0
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i64 1
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i8> %v, i64 2
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+
+ %v.3 = extractelement <4 x i8> %v, i64 3
%z.3 = zext i8 %v.3 to i32
%s.3 = shl i32 %z.3, 24
%x.3 = or i32 %s.2, %s.3
@@ -57,28 +228,184 @@ define i32 @bitcast.v2i.tree(<4 x i8> %v) {
ret i32 %x
}
-define i32 @extract.i32(<8 x i8> %v) {
-; CHECK-LABEL: define i32 @extract.i32(
-; CHECK-SAME: <8 x i8> [[V:%.*]]) {
-; CHECK-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i8> [[X_3_V_EXTRACT]] to <2 x i32>
-; CHECK-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i32> [[X_3_V_BC]], i64 0
-; CHECK-NEXT: ret i32 [[X_3_V_EXTRACT1]]
+define i32 @bitcast.v2i.be.tree(<4 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @bitcast.v2i.be.tree(
+; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-BE-NEXT: ret i32 [[X]]
+;
+; CHECK-LE-LABEL: define i32 @bitcast.v2i.be.tree(
+; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 2
+; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8
+; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <4 x i8> [[V]], i64 1
+; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32
+; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 0
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[S_2]], [[S_3]]
+; CHECK-LE-NEXT: [[X:%.*]] = or disjoint i32 [[X_1]], [[X_3]]
+; CHECK-LE-NEXT: ret i32 [[X]]
;
- %v.0 = extractelement <8 x i8> %v, i32 3
+ %v.0 = extractelement <4 x i8> %v, i64 3
%z.0 = zext i8 %v.0 to i32
- %v.1 = extractelement <8 x i8> %v, i32 4
+ %v.1 = extractelement <4 x i8> %v, i64 2
%z.1 = zext i8 %v.1 to i32
%s.1 = shl i32 %z.1, 8
%x.1 = or i32 %z.0, %s.1
- %v.2 = extractelement <8 x i8> %v, i32 5
+ %v.2 = extractelement <4 x i8> %v, i64 1
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+
+ %v.3 = extractelement <4 x i8> %v, i64 0
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %s.2, %s.3
+
+ %x = or i32 %x.1, %x.3
+
+ ret i32 %x
+}
+
+define i64 @bitcast.v2i.le.tree.i16(<4 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @bitcast.v2i.le.tree.i16(
+; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 0
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 1
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <4 x i16> [[V]], i64 2
+; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64
+; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[S_2]], [[S_3]]
+; CHECK-BE-NEXT: [[X:%.*]] = or disjoint i64 [[X_1]], [[X_3]]
+; CHECK-BE-NEXT: ret i64 [[X]]
+;
+; CHECK-LE-LABEL: define i64 @bitcast.v2i.le.tree.i16(
+; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X:%.*]] = bitcast <4 x i16> [[V]] to i64
+; CHECK-LE-NEXT: ret i64 [[X]]
+;
+ %v.0 = extractelement <4 x i16> %v, i64 0
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <4 x i16> %v, i64 1
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 16
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i16> %v, i64 2
+ %z.2 = zext i16 %v.2 to i64
+ %s.2 = shl i64 %z.2, 32
+
+ %v.3 = extractelement <4 x i16> %v, i64 3
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %s.2, %s.3
+
+ %x = or i64 %x.1, %x.3
+
+ ret i64 %x
+}
+
+define i64 @bitcast.v2i.be.tree.i16(<4 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @bitcast.v2i.be.tree.i16(
+; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X:%.*]] = bitcast <4 x i16> [[V]] to i64
+; CHECK-BE-NEXT: ret i64 [[X]]
+;
+; CHECK-LE-LABEL: define i64 @bitcast.v2i.be.tree.i16(
+; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 2
+; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16
+; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <4 x i16> [[V]], i64 1
+; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64
+; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 0
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[S_2]], [[S_3]]
+; CHECK-LE-NEXT: [[X:%.*]] = or disjoint i64 [[X_1]], [[X_3]]
+; CHECK-LE-NEXT: ret i64 [[X]]
+;
+ %v.0 = extractelement <4 x i16> %v, i64 3
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <4 x i16> %v, i64 2
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 16
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i16> %v, i64 1
+ %z.2 = zext i16 %v.2 to i64
+ %s.2 = shl i64 %z.2, 32
+
+ %v.3 = extractelement <4 x i16> %v, i64 0
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %s.2, %s.3
+
+ %x = or i64 %x.1, %x.3
+
+ ret i64 %x
+}
+
+define i32 @extract.le.i32(<8 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @extract.le.i32(
+; CHECK-BE-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <8 x i8> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <8 x i8> [[V]], i64 4
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <8 x i8> [[V]], i64 5
+; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32
+; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16
+; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i32 [[X_1]], [[S_2]]
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <8 x i8> [[V]], i64 6
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]]
+; CHECK-BE-NEXT: ret i32 [[X_3]]
+;
+; CHECK-LE-LABEL: define i32 @extract.le.i32(
+; CHECK-LE-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-LE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i8> [[X_3_V_EXTRACT]] to <2 x i32>
+; CHECK-LE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i32> [[X_3_V_BC]], i64 0
+; CHECK-LE-NEXT: ret i32 [[X_3_V_EXTRACT1]]
+;
+ %v.0 = extractelement <8 x i8> %v, i64 3
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <8 x i8> %v, i64 4
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <8 x i8> %v, i64 5
%z.2 = zext i8 %v.2 to i32
%s.2 = shl i32 %z.2, 16
%x.2 = or i32 %x.1, %s.2
- %v.3 = extractelement <8 x i8> %v, i32 6
+ %v.3 = extractelement <8 x i8> %v, i64 6
%z.3 = zext i8 %v.3 to i32
%s.3 = shl i32 %z.3, 24
%x.3 = or i32 %x.2, %s.3
@@ -86,25 +413,292 @@ define i32 @extract.i32(<8 x i8> %v) {
ret i32 %x.3
}
-define i32 @partial(<4 x i8> %v) {
-; CHECK-LABEL: define i32 @partial(
-; CHECK-SAME: <4 x i8> [[V:%.*]]) {
-; CHECK-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i8> [[V]], i8 0, i64 2
-; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V1]] to i32
-; CHECK-NEXT: ret i32 [[X_3]]
+define i32 @extract.be.i32(<8 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @extract.be.i32(
+; CHECK-BE-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-BE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i8> [[X_3_V_EXTRACT]] to <2 x i32>
+; CHECK-BE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i32> [[X_3_V_BC]], i64 0
+; CHECK-BE-NEXT: ret i32 [[X_3_V_EXTRACT1]]
+;
+; CHECK-LE-LABEL: define i32 @extract.be.i32(
+; CHECK-LE-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <8 x i8> [[V]], i64 6
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <8 x i8> [[V]], i64 5
+; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8
+; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <8 x i8> [[V]], i64 4
+; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32
+; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16
+; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i32 [[X_1]], [[S_2]]
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <8 x i8> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]]
+; CHECK-LE-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <8 x i8> %v, i64 6
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <8 x i8> %v, i64 5
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <8 x i8> %v, i64 4
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+ %x.2 = or i32 %x.1, %s.2
+
+ %v.3 = extractelement <8 x i8> %v, i64 3
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.2, %s.3
+
+ ret i32 %x.3
+}
+
+define i64 @extract.le.i64(<8 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @extract.le.i64(
+; CHECK-BE-SAME: <8 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <8 x i16> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <8 x i16> [[V]], i64 4
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <8 x i16> [[V]], i64 5
+; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64
+; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32
+; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]]
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <8 x i16> [[V]], i64 6
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]]
+; CHECK-BE-NEXT: ret i64 [[X_3]]
+;
+; CHECK-LE-LABEL: define i64 @extract.le.i64(
+; CHECK-LE-SAME: <8 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i16> [[V]], <8 x i16> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-LE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i16> [[X_3_V_EXTRACT]] to <2 x i64>
+; CHECK-LE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i64> [[X_3_V_BC]], i64 0
+; CHECK-LE-NEXT: ret i64 [[X_3_V_EXTRACT1]]
+;
+ %v.0 = extractelement <8 x i16> %v, i64 3
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <8 x i16> %v, i64 4
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 16
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.2 = extractelement <8 x i16> %v, i64 5
+ %z.2 = zext i16 %v.2 to i64
+ %s.2 = shl i64 %z.2, 32
+ %x.2 = or i64 %x.1, %s.2
+
+ %v.3 = extractelement <8 x i16> %v, i64 6
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %x.2, %s.3
+
+ ret i64 %x.3
+}
+
+define i64 @extract.be.i64(<8 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @extract.be.i64(
+; CHECK-BE-SAME: <8 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i16> [[V]], <8 x i16> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-BE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i16> [[X_3_V_EXTRACT]] to <2 x i64>
+; CHECK-BE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i64> [[X_3_V_BC]], i64 0
+; CHECK-BE-NEXT: ret i64 [[X_3_V_EXTRACT1]]
+;
+; CHECK-LE-LABEL: define i64 @extract.be.i64(
+; CHECK-LE-SAME: <8 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <8 x i16> [[V]], i64 6
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <8 x i16> [[V]], i64 5
+; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16
+; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <8 x i16> [[V]], i64 4
+; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64
+; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32
+; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]]
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <8 x i16> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]]
+; CHECK-LE-NEXT: ret i64 [[X_3]]
+;
+ %v.0 = extractelement <8 x i16> %v, i64 6
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <8 x i16> %v, i64 5
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 16
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.2 = extractelement <8 x i16> %v, i64 4
+ %z.2 = zext i16 %v.2 to i64
+ %s.2 = shl i64 %z.2, 32
+ %x.2 = or i64 %x.1, %s.2
+
+ %v.3 = extractelement <8 x i16> %v, i64 3
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %x.2, %s.3
+
+ ret i64 %x.3
+}
+
+define i32 @partial.le(<4 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @partial.le(
+; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 0
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 1
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_1]], [[S_3]]
+; CHECK-BE-NEXT: ret i32 [[X_3]]
;
- %v.0 = extractelement <4 x i8> %v, i32 0
+; CHECK-LE-LABEL: define i32 @partial.le(
+; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i8> [[V]], i8 0, i64 2
+; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V1]] to i32
+; CHECK-LE-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <4 x i8> %v, i64 0
%z.0 = zext i8 %v.0 to i32
- %v.1 = extractelement <4 x i8> %v, i32 1
+ %v.1 = extractelement <4 x i8> %v, i64 1
%z.1 = zext i8 %v.1 to i32
%s.1 = shl i32 %z.1, 8
%x.1 = or i32 %z.0, %s.1
- %v.3 = extractelement <4 x i8> %v, i32 3
+ %v.3 = extractelement <4 x i8> %v, i64 3
%z.3 = zext i8 %v.3 to i32
%s.3 = shl i32 %z.3, 24
%x.3 = or i32 %x.1, %s.3
ret i32 %x.3
}
+
+define i32 @partial.be(<4 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @partial.be(
+; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i8> [[V]], i8 0, i64 2
+; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V1]] to i32
+; CHECK-BE-NEXT: ret i32 [[X_3]]
+;
+; CHECK-LE-LABEL: define i32 @partial.be(
+; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 1
+; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 16
+; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 0
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_1]], [[S_3]]
+; CHECK-LE-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <4 x i8> %v, i64 3
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i64 1
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 16
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.3 = extractelement <4 x i8> %v, i64 0
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.1, %s.3
+
+ ret i32 %x.3
+}
+
+
+define i64 @partial.le.i16(<4 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @partial.le.i16(
+; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 0
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 1
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_1]], [[S_3]]
+; CHECK-BE-NEXT: ret i64 [[X_3]]
+;
+; CHECK-LE-LABEL: define i64 @partial.le.i16(
+; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i16> [[V]], i16 0, i64 2
+; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[X_3_V1]] to i64
+; CHECK-LE-NEXT: ret i64 [[X_3]]
+;
+ %v.0 = extractelement <4 x i16> %v, i64 0
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <4 x i16> %v, i64 1
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 16
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.3 = extractelement <4 x i16> %v, i64 3
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %x.1, %s.3
+
+ ret i64 %x.3
+}
+
+define i64 @partial.be.i16(<4 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @partial.be.i16(
+; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i16> [[V]], i16 0, i64 2
+; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[X_3_V1]] to i64
+; CHECK-BE-NEXT: ret i64 [[X_3]]
+;
+; CHECK-LE-LABEL: define i64 @partial.be.i16(
+; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 1
+; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 32
+; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 0
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_1]], [[S_3]]
+; CHECK-LE-NEXT: ret i64 [[X_3]]
+;
+ %v.0 = extractelement <4 x i16> %v, i64 3
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <4 x i16> %v, i64 1
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 32
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.3 = extractelement <4 x i16> %v, i64 0
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %x.1, %s.3
+
+ ret i64 %x.3
+}
>From d9477769e8997fbb2f6e0caf7d34fb7bda94f501 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <Zach.Goldthorpe at amd.com>
Date: Wed, 30 Jul 2025 10:11:25 -0500
Subject: [PATCH 6/6] Incorporated reviewer feedback
---
.../InstCombine/InstCombineAndOrXor.cpp | 22 +-
.../InstCombine/or-packed-int-vecs.ll | 226 +++++++++++++++++-
2 files changed, 235 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 852b5fff53b3d..7f337249629dd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3642,21 +3642,21 @@ static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
}
// Now try to match a bitcasted subvector.
- Instruction *DstVecI;
- if (!match(V, m_BitCast(m_Instruction(DstVecI))))
+ Instruction *SrcVecI;
+ if (!match(V, m_BitCast(m_Instruction(SrcVecI))))
return false;
- auto *DstTy = dyn_cast<FixedVectorType>(DstVecI->getType());
- if (!DstTy)
+ auto *SrcTy = dyn_cast<FixedVectorType>(SrcVecI->getType());
+ if (!SrcTy)
return false;
- Mask.resize(DstTy->getNumElements());
+ Mask.resize(SrcTy->getNumElements());
// First check for a subvector obtained from a shufflevector.
- if (isa<ShuffleVectorInst>(DstVecI)) {
+ if (isa<ShuffleVectorInst>(SrcVecI)) {
Constant *ConstVec;
ArrayRef<int> ShuffleMask;
- if (!match(DstVecI, m_Shuffle(m_Value(Vec), m_Constant(ConstVec),
+ if (!match(SrcVecI, m_Shuffle(m_Value(Vec), m_Constant(ConstVec),
m_Mask(ShuffleMask))))
return false;
@@ -3695,7 +3695,7 @@ static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
// Check for a subvector obtained as an (insertelement V, 0, idx)
uint64_t InsertIdx;
- if (!match(DstVecI,
+ if (!match(SrcVecI,
m_InsertElt(m_Value(Vec), m_Zero(), m_ConstantInt(InsertIdx))))
return false;
@@ -3712,9 +3712,9 @@ static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
/// Try to fold the join of two scalar integers whose contents are packed
/// elements of the same vector.
-Instruction *foldIntegerPackFromVector(Instruction &I,
- InstCombiner::BuilderTy &Builder,
- const DataLayout &DL) {
+static Instruction *foldIntegerPackFromVector(Instruction &I,
+ InstCombiner::BuilderTy &Builder,
+ const DataLayout &DL) {
assert(I.getOpcode() == Instruction::Or);
Value *LhsVec, *RhsVec;
int64_t LhsVecOffset, RhsVecOffset;
diff --git a/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
index 24d16553b7792..9391fb5ddae97 100644
--- a/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
+++ b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -passes=instcombine -data-layout="E" -S | FileCheck %s --check-prefixes=CHECK-BE
-; RUN: opt < %s -passes=instcombine -data-layout="e" -S | FileCheck %s --check-prefixes=CHECK-LE
+; RUN: opt %s -passes=instcombine -data-layout="E" -S | FileCheck %s --check-prefixes=CHECK,CHECK-BE
+; RUN: opt %s -passes=instcombine -data-layout="e" -S | FileCheck %s --check-prefixes=CHECK,CHECK-LE
define i32 @bitcast.v2i.le(<4 x i8> %v) {
; CHECK-BE-LABEL: define i32 @bitcast.v2i.le(
@@ -702,3 +702,225 @@ define i64 @partial.be.i16(<4 x i16> %v) {
ret i64 %x.3
}
+
+define i32 @partial.extract.le.i32(<8 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @partial.extract.le.i32(
+; CHECK-BE-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <8 x i8> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <8 x i8> [[V]], i64 4
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <8 x i8> [[V]], i64 6
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_1]], [[S_3]]
+; CHECK-BE-NEXT: ret i32 [[X_3]]
+;
+; CHECK-LE-LABEL: define i32 @partial.extract.le.i32(
+; CHECK-LE-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X_3_V:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <4 x i32> <i32 3, i32 4, i32 8, i32 6>
+; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V]] to i32
+; CHECK-LE-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <8 x i8> %v, i64 3
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <8 x i8> %v, i64 4
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.3 = extractelement <8 x i8> %v, i64 6
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.1, %s.3
+
+ ret i32 %x.3
+}
+
+define i32 @partial.extract.be.i32(<8 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @partial.extract.be.i32(
+; CHECK-BE-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X_3_V:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <4 x i32> <i32 3, i32 4, i32 8, i32 6>
+; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V]] to i32
+; CHECK-BE-NEXT: ret i32 [[X_3]]
+;
+; CHECK-LE-LABEL: define i32 @partial.extract.be.i32(
+; CHECK-LE-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <8 x i8> [[V]], i64 6
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <8 x i8> [[V]], i64 4
+; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32
+; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16
+; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i32 [[S_2]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <8 x i8> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]]
+; CHECK-LE-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <8 x i8> %v, i64 6
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.2 = extractelement <8 x i8> %v, i64 4
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+ %x.2 = or i32 %z.0, %s.2
+
+ %v.3 = extractelement <8 x i8> %v, i64 3
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.2, %s.3
+
+ ret i32 %x.3
+}
+
+define i64 @partial.extract.le.i64(<8 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @partial.extract.le.i64(
+; CHECK-BE-SAME: <8 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <8 x i16> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <8 x i16> [[V]], i64 4
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <8 x i16> [[V]], i64 5
+; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64
+; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32
+; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]]
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <8 x i16> [[V]], i64 6
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]]
+; CHECK-BE-NEXT: ret i64 [[X_3]]
+;
+; CHECK-LE-LABEL: define i64 @partial.extract.le.i64(
+; CHECK-LE-SAME: <8 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i16> [[V]], <8 x i16> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-LE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i16> [[X_3_V_EXTRACT]] to <2 x i64>
+; CHECK-LE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i64> [[X_3_V_BC]], i64 0
+; CHECK-LE-NEXT: ret i64 [[X_3_V_EXTRACT1]]
+;
+ %v.0 = extractelement <8 x i16> %v, i64 3
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <8 x i16> %v, i64 4
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 16
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.2 = extractelement <8 x i16> %v, i64 5
+ %z.2 = zext i16 %v.2 to i64
+ %s.2 = shl i64 %z.2, 32
+ %x.2 = or i64 %x.1, %s.2
+
+ %v.3 = extractelement <8 x i16> %v, i64 6
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %x.2, %s.3
+
+ ret i64 %x.3
+}
+
+define i64 @partial.extract.be.i64(<8 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @partial.extract.be.i64(
+; CHECK-BE-SAME: <8 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X_3_V:%.*]] = shufflevector <8 x i16> [[V]], <8 x i16> <i16 0, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>, <4 x i32> <i32 3, i32 4, i32 8, i32 6>
+; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[X_3_V]] to i64
+; CHECK-BE-NEXT: ret i64 [[X_3]]
+;
+; CHECK-LE-LABEL: define i64 @partial.extract.be.i64(
+; CHECK-LE-SAME: <8 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <8 x i16> [[V]], i64 6
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <8 x i16> [[V]], i64 4
+; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64
+; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32
+; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i64 [[S_2]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <8 x i16> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]]
+; CHECK-LE-NEXT: ret i64 [[X_3]]
+;
+ %v.0 = extractelement <8 x i16> %v, i64 6
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.2 = extractelement <8 x i16> %v, i64 4
+ %z.2 = zext i16 %v.2 to i64
+ %s.2 = shl i64 %z.2, 32
+ %x.2 = or i64 %z.0, %s.2
+
+ %v.3 = extractelement <8 x i16> %v, i64 3
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %x.2, %s.3
+
+ ret i64 %x.3
+}
+
+define <2 x i16> @shufflecast.v2v(<4 x i8> %v) {
+; CHECK-LABEL: define <2 x i16> @shufflecast.v2v(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[W_3:%.*]] = bitcast <4 x i8> [[V]] to <2 x i16>
+; CHECK-NEXT: ret <2 x i16> [[W_3]]
+;
+ %v.0 = shufflevector <4 x i8> %v, <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ %c.0 = bitcast <4 x i8> %v.0 to <2 x i16>
+
+ %v.1 = shufflevector <4 x i8> %v, <4 x i8> zeroinitializer, <4 x i32> <i32 4, i32 1, i32 4, i32 4>
+ %c.1 = bitcast <4 x i8> %v.1 to <2 x i16>
+ %w.1 = or <2 x i16> %c.0, %c.1
+
+ %v.2 = shufflevector <4 x i8> %v, <4 x i8> zeroinitializer, <4 x i32> <i32 4, i32 4, i32 2, i32 4>
+ %c.2 = bitcast <4 x i8> %v.2 to <2 x i16>
+ %w.2 = or <2 x i16> %w.1, %c.2
+
+ %v.3 = shufflevector <4 x i8> %v, <4 x i8> zeroinitializer, <4 x i32> <i32 4, i32 4, i32 4, i32 3>
+ %c.3 = bitcast <4 x i8> %v.3 to <2 x i16>
+ %w.3 = or <2 x i16> %w.2, %c.3
+
+ ret <2 x i16> %w.3
+}
+
+define <2 x i32> @shufflecast.v2v.i16(<4 x i16> %v) {
+; CHECK-LABEL: define <2 x i32> @shufflecast.v2v.i16(
+; CHECK-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-NEXT: [[W_3:%.*]] = bitcast <4 x i16> [[V]] to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> [[W_3]]
+;
+ %v.0 = shufflevector <4 x i16> %v, <4 x i16> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ %c.0 = bitcast <4 x i16> %v.0 to <2 x i32>
+
+ %v.1 = shufflevector <4 x i16> %v, <4 x i16> zeroinitializer, <4 x i32> <i32 4, i32 1, i32 4, i32 4>
+ %c.1 = bitcast <4 x i16> %v.1 to <2 x i32>
+ %w.1 = or <2 x i32> %c.0, %c.1
+
+ %v.2 = shufflevector <4 x i16> %v, <4 x i16> zeroinitializer, <4 x i32> <i32 4, i32 4, i32 2, i32 4>
+ %c.2 = bitcast <4 x i16> %v.2 to <2 x i32>
+ %w.2 = or <2 x i32> %w.1, %c.2
+
+ %v.3 = shufflevector <4 x i16> %v, <4 x i16> zeroinitializer, <4 x i32> <i32 4, i32 4, i32 4, i32 3>
+ %c.3 = bitcast <4 x i16> %v.3 to <2 x i32>
+ %w.3 = or <2 x i32> %w.2, %c.3
+
+ ret <2 x i32> %w.3
+}
+
+define i32 @bitcast.v2i.half(<2 x half> %v) {
+; CHECK-LABEL: define i32 @bitcast.v2i.half(
+; CHECK-SAME: <2 x half> [[V:%.*]]) {
+; CHECK-NEXT: [[X:%.*]] = bitcast <2 x half> [[V]] to i32
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %v.0 = insertelement <2 x half> %v, half 0.0, i64 1
+ %x.0 = bitcast <2 x half> %v.0 to i32
+
+ %v.1 = insertelement <2 x half> %v, half 0.0, i64 0
+ %x.1 = bitcast <2 x half> %v.1 to i32
+
+ %x = or i32 %x.0, %x.1
+ ret i32 %x
+}
More information about the llvm-commits
mailing list