[llvm] 71d6762 - [InstCombine] Added pattern for recognising the construction of packed integers. (#147414)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 30 09:58:52 PDT 2025
Author: zGoldthorpe
Date: 2025-07-30T10:58:49-06:00
New Revision: 71d6762309a7db67770bdbd39572ef04e6a1ea59
URL: https://github.com/llvm/llvm-project/commit/71d6762309a7db67770bdbd39572ef04e6a1ea59
DIFF: https://github.com/llvm/llvm-project/commit/71d6762309a7db67770bdbd39572ef04e6a1ea59.diff
LOG: [InstCombine] Added pattern for recognising the construction of packed integers. (#147414)
This patch extends the instruction combiner to simplify the construction
of a packed scalar integer from a vector type, such as:
```llvm
target datalayout = "e"
define i32 @src(<4 x i8> %v) {
%v.0 = extractelement <4 x i8> %v, i32 0
%z.0 = zext i8 %v.0 to i32
%v.1 = extractelement <4 x i8> %v, i32 1
%z.1 = zext i8 %v.1 to i32
%s.1 = shl i32 %z.1, 8
%x.1 = or i32 %z.0, %s.1
%v.2 = extractelement <4 x i8> %v, i32 2
%z.2 = zext i8 %v.2 to i32
%s.2 = shl i32 %z.2, 16
%x.2 = or i32 %x.1, %s.2
%v.3 = extractelement <4 x i8> %v, i32 3
%z.3 = zext i8 %v.3 to i32
%s.3 = shl i32 %z.3, 24
%x.3 = or i32 %x.2, %s.3
ret i32 %x.3
}
; ===============
define i32 @tgt(<4 x i8> %v) {
%x.3 = bitcast <4 x i8> %v to i32
ret i32 %x.3
}
```
Alive2 proofs (little-endian):
[YKdMeg](https://alive2.llvm.org/ce/z/YKdMeg)
Alive2 proofs (big-endian):
[vU6iKc](https://alive2.llvm.org/ce/z/vU6iKc)
Added:
llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
Modified:
llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b231c04319106..d7971e8e3caea 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -11,10 +11,13 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/FloatingPointPredicateUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
@@ -3589,6 +3592,154 @@ static Value *foldOrOfInversions(BinaryOperator &I,
return nullptr;
}
+/// Match \p V as "shufflevector -> bitcast" or "extractelement -> zext -> shl"
+/// patterns, which extract vector elements and pack them in the same relative
+/// positions.
+///
+/// \p Vec is the underlying vector being extracted from.
+/// \p Mask is a bitmask identifying which packed elements are obtained from the
+/// vector.
+/// \p VecOffset is the vector element corresponding to index 0 of the
+/// mask.
+static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
+ int64_t &VecOffset,
+ SmallBitVector &Mask,
+ const DataLayout &DL) {
+ static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) {
+ ShlAmt = 0;
+ return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base);
+ };
+
+ // First try to match extractelement -> zext -> shl
+ uint64_t VecIdx, ShlAmt;
+ if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt(
+ m_Value(Vec), m_ConstantInt(VecIdx))),
+ ShlAmt))) {
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ if (!VecTy)
+ return false;
+ auto *EltTy = dyn_cast<IntegerType>(VecTy->getElementType());
+ if (!EltTy)
+ return false;
+
+ const unsigned EltBitWidth = EltTy->getBitWidth();
+ const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth();
+ if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0)
+ return false;
+ const unsigned TargetEltWidth = TargetBitWidth / EltBitWidth;
+ const unsigned ShlEltAmt = ShlAmt / EltBitWidth;
+
+ const unsigned MaskIdx =
+ DL.isLittleEndian() ? ShlEltAmt : TargetEltWidth - ShlEltAmt - 1;
+
+ VecOffset = static_cast<int64_t>(VecIdx) - static_cast<int64_t>(MaskIdx);
+ Mask.resize(TargetEltWidth);
+ Mask.set(MaskIdx);
+ return true;
+ }
+
+ // Now try to match a bitcasted subvector.
+ Instruction *SrcVecI;
+ if (!match(V, m_BitCast(m_Instruction(SrcVecI))))
+ return false;
+
+ auto *SrcTy = dyn_cast<FixedVectorType>(SrcVecI->getType());
+ if (!SrcTy)
+ return false;
+
+ Mask.resize(SrcTy->getNumElements());
+
+ // First check for a subvector obtained from a shufflevector.
+ if (isa<ShuffleVectorInst>(SrcVecI)) {
+ Constant *ConstVec;
+ ArrayRef<int> ShuffleMask;
+ if (!match(SrcVecI, m_Shuffle(m_Value(Vec), m_Constant(ConstVec),
+ m_Mask(ShuffleMask))))
+ return false;
+
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ if (!VecTy)
+ return false;
+
+ const unsigned NumVecElts = VecTy->getNumElements();
+ bool FoundVecOffset = false;
+ for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) {
+ if (ShuffleMask[Idx] == PoisonMaskElem)
+ return false;
+ const unsigned ShuffleIdx = ShuffleMask[Idx];
+ if (ShuffleIdx >= NumVecElts) {
+ const unsigned ConstIdx = ShuffleIdx - NumVecElts;
+ auto *ConstElt =
+ dyn_cast<ConstantInt>(ConstVec->getAggregateElement(ConstIdx));
+ if (!ConstElt || !ConstElt->isNullValue())
+ return false;
+ continue;
+ }
+
+ if (FoundVecOffset) {
+ if (VecOffset + Idx != ShuffleIdx)
+ return false;
+ } else {
+ if (ShuffleIdx < Idx)
+ return false;
+ VecOffset = ShuffleIdx - Idx;
+ FoundVecOffset = true;
+ }
+ Mask.set(Idx);
+ }
+ return FoundVecOffset;
+ }
+
+ // Check for a subvector obtained as an (insertelement V, 0, idx)
+ uint64_t InsertIdx;
+ if (!match(SrcVecI,
+ m_InsertElt(m_Value(Vec), m_Zero(), m_ConstantInt(InsertIdx))))
+ return false;
+
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ if (!VecTy)
+ return false;
+ VecOffset = 0;
+ bool AlreadyInsertedMaskedElt = Mask.test(InsertIdx);
+ Mask.set();
+ if (!AlreadyInsertedMaskedElt)
+ Mask.reset(InsertIdx);
+ return true;
+}
+
+/// Try to fold the join of two scalar integers whose contents are packed
+/// elements of the same vector.
+static Instruction *foldIntegerPackFromVector(Instruction &I,
+ InstCombiner::BuilderTy &Builder,
+ const DataLayout &DL) {
+ assert(I.getOpcode() == Instruction::Or);
+ Value *LhsVec, *RhsVec;
+ int64_t LhsVecOffset, RhsVecOffset;
+ SmallBitVector Mask;
+ if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset,
+ Mask, DL))
+ return nullptr;
+ if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset,
+ Mask, DL))
+ return nullptr;
+ if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset)
+ return nullptr;
+
+ // Convert into shufflevector -> bitcast;
+ const unsigned ZeroVecIdx =
+ cast<FixedVectorType>(LhsVec->getType())->getNumElements();
+ SmallVector<int> ShuffleMask(Mask.size(), ZeroVecIdx);
+ for (unsigned Idx : Mask.set_bits()) {
+ assert(LhsVecOffset + Idx >= 0);
+ ShuffleMask[Idx] = LhsVecOffset + Idx;
+ }
+
+ Value *MaskedVec = Builder.CreateShuffleVector(
+ LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask,
+ I.getName() + ".v");
+ return CastInst::Create(Instruction::BitCast, MaskedVec, I.getType());
+}
+
// A decomposition of ((X & Mask) * Factor). The NUW / NSW bools
// track these properities for preservation. Note that we can decompose
// equivalent select form of this expression (e.g. (!(X & Mask) ? 0 : Mask *
@@ -3766,6 +3917,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Instruction *X = foldComplexAndOrPatterns(I, Builder))
return X;
+ if (Instruction *X = foldIntegerPackFromVector(I, Builder, DL))
+ return X;
+
// (A & B) | (C & D) -> A ^ D where A == ~C && B == ~D
// (A & B) | (C & D) -> A ^ C where A == ~D && B == ~C
if (Value *V = foldOrOfInversions(I, Builder))
diff --git a/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
new file mode 100644
index 0000000000000..9391fb5ddae97
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll
@@ -0,0 +1,926 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt %s -passes=instcombine -data-layout="E" -S | FileCheck %s --check-prefixes=CHECK,CHECK-BE
+; RUN: opt %s -passes=instcombine -data-layout="e" -S | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+
+define i32 @bitcast.v2i.le(<4 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @bitcast.v2i.le(
+; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 0
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 1
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <4 x i8> [[V]], i64 2
+; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32
+; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16
+; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i32 [[X_1]], [[S_2]]
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]]
+; CHECK-BE-NEXT: ret i32 [[X_3]]
+;
+; CHECK-LE-LABEL: define i32 @bitcast.v2i.le(
+; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-LE-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <4 x i8> %v, i64 0
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i64 1
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i8> %v, i64 2
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+ %x.2 = or i32 %x.1, %s.2
+
+ %v.3 = extractelement <4 x i8> %v, i64 3
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.2, %s.3
+
+ ret i32 %x.3
+}
+
+define i32 @bitcast.v2i.be(<4 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @bitcast.v2i.be(
+; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-BE-NEXT: ret i32 [[X_3]]
+;
+; CHECK-LE-LABEL: define i32 @bitcast.v2i.be(
+; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 2
+; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8
+; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <4 x i8> [[V]], i64 1
+; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32
+; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16
+; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i32 [[X_1]], [[S_2]]
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 0
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]]
+; CHECK-LE-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <4 x i8> %v, i64 3
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i64 2
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i8> %v, i64 1
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+ %x.2 = or i32 %x.1, %s.2
+
+ %v.3 = extractelement <4 x i8> %v, i64 0
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.2, %s.3
+
+ ret i32 %x.3
+}
+
+define i64 @bitcast.v2i.le.i16(<4 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @bitcast.v2i.le.i16(
+; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 0
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 1
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <4 x i16> [[V]], i64 2
+; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64
+; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32
+; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]]
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]]
+; CHECK-BE-NEXT: ret i64 [[X_3]]
+;
+; CHECK-LE-LABEL: define i64 @bitcast.v2i.le.i16(
+; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[V]] to i64
+; CHECK-LE-NEXT: ret i64 [[X_3]]
+;
+ %v.0 = extractelement <4 x i16> %v, i64 0
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <4 x i16> %v, i64 1
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 16
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i16> %v, i64 2
+ %z.2 = zext i16 %v.2 to i64
+ %s.2 = shl i64 %z.2, 32
+ %x.2 = or i64 %x.1, %s.2
+
+ %v.3 = extractelement <4 x i16> %v, i64 3
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %x.2, %s.3
+
+ ret i64 %x.3
+}
+
+define i64 @bitcast.v2i.be.i16(<4 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @bitcast.v2i.be.i16(
+; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[V]] to i64
+; CHECK-BE-NEXT: ret i64 [[X_3]]
+;
+; CHECK-LE-LABEL: define i64 @bitcast.v2i.be.i16(
+; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 2
+; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16
+; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <4 x i16> [[V]], i64 1
+; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64
+; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32
+; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]]
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 0
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]]
+; CHECK-LE-NEXT: ret i64 [[X_3]]
+;
+ %v.0 = extractelement <4 x i16> %v, i64 3
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <4 x i16> %v, i64 2
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 16
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i16> %v, i64 1
+ %z.2 = zext i16 %v.2 to i64
+ %s.2 = shl i64 %z.2, 32
+ %x.2 = or i64 %x.1, %s.2
+
+ %v.3 = extractelement <4 x i16> %v, i64 0
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %x.2, %s.3
+
+ ret i64 %x.3
+}
+
+define i32 @bitcast.v2i.le.tree(<4 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @bitcast.v2i.le.tree(
+; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 0
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 1
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <4 x i8> [[V]], i64 2
+; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32
+; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[S_2]], [[S_3]]
+; CHECK-BE-NEXT: [[X:%.*]] = or disjoint i32 [[X_1]], [[X_3]]
+; CHECK-BE-NEXT: ret i32 [[X]]
+;
+; CHECK-LE-LABEL: define i32 @bitcast.v2i.le.tree(
+; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-LE-NEXT: ret i32 [[X]]
+;
+ %v.0 = extractelement <4 x i8> %v, i64 0
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i64 1
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i8> %v, i64 2
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+
+ %v.3 = extractelement <4 x i8> %v, i64 3
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %s.2, %s.3
+
+ %x = or i32 %x.1, %x.3
+
+ ret i32 %x
+}
+
+define i32 @bitcast.v2i.be.tree(<4 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @bitcast.v2i.be.tree(
+; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-BE-NEXT: ret i32 [[X]]
+;
+; CHECK-LE-LABEL: define i32 @bitcast.v2i.be.tree(
+; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 2
+; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8
+; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <4 x i8> [[V]], i64 1
+; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32
+; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 0
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[S_2]], [[S_3]]
+; CHECK-LE-NEXT: [[X:%.*]] = or disjoint i32 [[X_1]], [[X_3]]
+; CHECK-LE-NEXT: ret i32 [[X]]
+;
+ %v.0 = extractelement <4 x i8> %v, i64 3
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i64 2
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i8> %v, i64 1
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+
+ %v.3 = extractelement <4 x i8> %v, i64 0
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %s.2, %s.3
+
+ %x = or i32 %x.1, %x.3
+
+ ret i32 %x
+}
+
+define i64 @bitcast.v2i.le.tree.i16(<4 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @bitcast.v2i.le.tree.i16(
+; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 0
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 1
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <4 x i16> [[V]], i64 2
+; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64
+; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[S_2]], [[S_3]]
+; CHECK-BE-NEXT: [[X:%.*]] = or disjoint i64 [[X_1]], [[X_3]]
+; CHECK-BE-NEXT: ret i64 [[X]]
+;
+; CHECK-LE-LABEL: define i64 @bitcast.v2i.le.tree.i16(
+; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X:%.*]] = bitcast <4 x i16> [[V]] to i64
+; CHECK-LE-NEXT: ret i64 [[X]]
+;
+ %v.0 = extractelement <4 x i16> %v, i64 0
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <4 x i16> %v, i64 1
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 16
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i16> %v, i64 2
+ %z.2 = zext i16 %v.2 to i64
+ %s.2 = shl i64 %z.2, 32
+
+ %v.3 = extractelement <4 x i16> %v, i64 3
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %s.2, %s.3
+
+ %x = or i64 %x.1, %x.3
+
+ ret i64 %x
+}
+
+define i64 @bitcast.v2i.be.tree.i16(<4 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @bitcast.v2i.be.tree.i16(
+; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X:%.*]] = bitcast <4 x i16> [[V]] to i64
+; CHECK-BE-NEXT: ret i64 [[X]]
+;
+; CHECK-LE-LABEL: define i64 @bitcast.v2i.be.tree.i16(
+; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 2
+; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16
+; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <4 x i16> [[V]], i64 1
+; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64
+; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 0
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[S_2]], [[S_3]]
+; CHECK-LE-NEXT: [[X:%.*]] = or disjoint i64 [[X_1]], [[X_3]]
+; CHECK-LE-NEXT: ret i64 [[X]]
+;
+ %v.0 = extractelement <4 x i16> %v, i64 3
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <4 x i16> %v, i64 2
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 16
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.2 = extractelement <4 x i16> %v, i64 1
+ %z.2 = zext i16 %v.2 to i64
+ %s.2 = shl i64 %z.2, 32
+
+ %v.3 = extractelement <4 x i16> %v, i64 0
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %s.2, %s.3
+
+ %x = or i64 %x.1, %x.3
+
+ ret i64 %x
+}
+
+define i32 @extract.le.i32(<8 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @extract.le.i32(
+; CHECK-BE-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <8 x i8> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <8 x i8> [[V]], i64 4
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <8 x i8> [[V]], i64 5
+; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32
+; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16
+; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i32 [[X_1]], [[S_2]]
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <8 x i8> [[V]], i64 6
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]]
+; CHECK-BE-NEXT: ret i32 [[X_3]]
+;
+; CHECK-LE-LABEL: define i32 @extract.le.i32(
+; CHECK-LE-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-LE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i8> [[X_3_V_EXTRACT]] to <2 x i32>
+; CHECK-LE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i32> [[X_3_V_BC]], i64 0
+; CHECK-LE-NEXT: ret i32 [[X_3_V_EXTRACT1]]
+;
+ %v.0 = extractelement <8 x i8> %v, i64 3
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <8 x i8> %v, i64 4
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <8 x i8> %v, i64 5
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+ %x.2 = or i32 %x.1, %s.2
+
+ %v.3 = extractelement <8 x i8> %v, i64 6
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.2, %s.3
+
+ ret i32 %x.3
+}
+
+define i32 @extract.be.i32(<8 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @extract.be.i32(
+; CHECK-BE-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-BE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i8> [[X_3_V_EXTRACT]] to <2 x i32>
+; CHECK-BE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i32> [[X_3_V_BC]], i64 0
+; CHECK-BE-NEXT: ret i32 [[X_3_V_EXTRACT1]]
+;
+; CHECK-LE-LABEL: define i32 @extract.be.i32(
+; CHECK-LE-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <8 x i8> [[V]], i64 6
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <8 x i8> [[V]], i64 5
+; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8
+; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <8 x i8> [[V]], i64 4
+; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32
+; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16
+; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i32 [[X_1]], [[S_2]]
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <8 x i8> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]]
+; CHECK-LE-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <8 x i8> %v, i64 6
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <8 x i8> %v, i64 5
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.2 = extractelement <8 x i8> %v, i64 4
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+ %x.2 = or i32 %x.1, %s.2
+
+ %v.3 = extractelement <8 x i8> %v, i64 3
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.2, %s.3
+
+ ret i32 %x.3
+}
+
+define i64 @extract.le.i64(<8 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @extract.le.i64(
+; CHECK-BE-SAME: <8 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <8 x i16> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <8 x i16> [[V]], i64 4
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <8 x i16> [[V]], i64 5
+; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64
+; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32
+; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]]
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <8 x i16> [[V]], i64 6
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]]
+; CHECK-BE-NEXT: ret i64 [[X_3]]
+;
+; CHECK-LE-LABEL: define i64 @extract.le.i64(
+; CHECK-LE-SAME: <8 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i16> [[V]], <8 x i16> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-LE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i16> [[X_3_V_EXTRACT]] to <2 x i64>
+; CHECK-LE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i64> [[X_3_V_BC]], i64 0
+; CHECK-LE-NEXT: ret i64 [[X_3_V_EXTRACT1]]
+;
+ %v.0 = extractelement <8 x i16> %v, i64 3
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <8 x i16> %v, i64 4
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 16
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.2 = extractelement <8 x i16> %v, i64 5
+ %z.2 = zext i16 %v.2 to i64
+ %s.2 = shl i64 %z.2, 32
+ %x.2 = or i64 %x.1, %s.2
+
+ %v.3 = extractelement <8 x i16> %v, i64 6
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %x.2, %s.3
+
+ ret i64 %x.3
+}
+
+define i64 @extract.be.i64(<8 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @extract.be.i64(
+; CHECK-BE-SAME: <8 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i16> [[V]], <8 x i16> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-BE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i16> [[X_3_V_EXTRACT]] to <2 x i64>
+; CHECK-BE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i64> [[X_3_V_BC]], i64 0
+; CHECK-BE-NEXT: ret i64 [[X_3_V_EXTRACT1]]
+;
+; CHECK-LE-LABEL: define i64 @extract.be.i64(
+; CHECK-LE-SAME: <8 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <8 x i16> [[V]], i64 6
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <8 x i16> [[V]], i64 5
+; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16
+; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <8 x i16> [[V]], i64 4
+; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64
+; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32
+; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]]
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <8 x i16> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]]
+; CHECK-LE-NEXT: ret i64 [[X_3]]
+;
+ %v.0 = extractelement <8 x i16> %v, i64 6
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <8 x i16> %v, i64 5
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 16
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.2 = extractelement <8 x i16> %v, i64 4
+ %z.2 = zext i16 %v.2 to i64
+ %s.2 = shl i64 %z.2, 32
+ %x.2 = or i64 %x.1, %s.2
+
+ %v.3 = extractelement <8 x i16> %v, i64 3
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %x.2, %s.3
+
+ ret i64 %x.3
+}
+
+define i32 @partial.le(<4 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @partial.le(
+; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 0
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 1
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_1]], [[S_3]]
+; CHECK-BE-NEXT: ret i32 [[X_3]]
+;
+; CHECK-LE-LABEL: define i32 @partial.le(
+; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i8> [[V]], i8 0, i64 2
+; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V1]] to i32
+; CHECK-LE-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <4 x i8> %v, i64 0
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i64 1
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.3 = extractelement <4 x i8> %v, i64 3
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.1, %s.3
+
+ ret i32 %x.3
+}
+
+define i32 @partial.be(<4 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @partial.be(
+; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i8> [[V]], i8 0, i64 2
+; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V1]] to i32
+; CHECK-BE-NEXT: ret i32 [[X_3]]
+;
+; CHECK-LE-LABEL: define i32 @partial.be(
+; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 1
+; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 16
+; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 0
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_1]], [[S_3]]
+; CHECK-LE-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <4 x i8> %v, i64 3
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <4 x i8> %v, i64 1
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 16
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.3 = extractelement <4 x i8> %v, i64 0
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.1, %s.3
+
+ ret i32 %x.3
+}
+
+
+define i64 @partial.le.i16(<4 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @partial.le.i16(
+; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 0
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 1
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_1]], [[S_3]]
+; CHECK-BE-NEXT: ret i64 [[X_3]]
+;
+; CHECK-LE-LABEL: define i64 @partial.le.i16(
+; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i16> [[V]], i16 0, i64 2
+; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[X_3_V1]] to i64
+; CHECK-LE-NEXT: ret i64 [[X_3]]
+;
+ %v.0 = extractelement <4 x i16> %v, i64 0
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <4 x i16> %v, i64 1
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 16
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.3 = extractelement <4 x i16> %v, i64 3
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %x.1, %s.3
+
+ ret i64 %x.3
+}
+
+define i64 @partial.be.i16(<4 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @partial.be.i16(
+; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i16> [[V]], i16 0, i64 2
+; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[X_3_V1]] to i64
+; CHECK-BE-NEXT: ret i64 [[X_3]]
+;
+; CHECK-LE-LABEL: define i64 @partial.be.i16(
+; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 1
+; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 32
+; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 0
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_1]], [[S_3]]
+; CHECK-LE-NEXT: ret i64 [[X_3]]
+;
+ %v.0 = extractelement <4 x i16> %v, i64 3
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <4 x i16> %v, i64 1
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 32
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.3 = extractelement <4 x i16> %v, i64 0
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %x.1, %s.3
+
+ ret i64 %x.3
+}
+
+define i32 @partial.extract.le.i32(<8 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @partial.extract.le.i32(
+; CHECK-BE-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <8 x i8> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <8 x i8> [[V]], i64 4
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <8 x i8> [[V]], i64 6
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_1]], [[S_3]]
+; CHECK-BE-NEXT: ret i32 [[X_3]]
+;
+; CHECK-LE-LABEL: define i32 @partial.extract.le.i32(
+; CHECK-LE-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X_3_V:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <4 x i32> <i32 3, i32 4, i32 8, i32 6>
+; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V]] to i32
+; CHECK-LE-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <8 x i8> %v, i64 3
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.1 = extractelement <8 x i8> %v, i64 4
+ %z.1 = zext i8 %v.1 to i32
+ %s.1 = shl i32 %z.1, 8
+ %x.1 = or i32 %z.0, %s.1
+
+ %v.3 = extractelement <8 x i8> %v, i64 6
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.1, %s.3
+
+ ret i32 %x.3
+}
+
+define i32 @partial.extract.be.i32(<8 x i8> %v) {
+; CHECK-BE-LABEL: define i32 @partial.extract.be.i32(
+; CHECK-BE-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X_3_V:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <4 x i32> <i32 3, i32 4, i32 8, i32 6>
+; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V]] to i32
+; CHECK-BE-NEXT: ret i32 [[X_3]]
+;
+; CHECK-LE-LABEL: define i32 @partial.extract.be.i32(
+; CHECK-LE-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <8 x i8> [[V]], i64 6
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32
+; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <8 x i8> [[V]], i64 4
+; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32
+; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16
+; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i32 [[S_2]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <8 x i8> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]]
+; CHECK-LE-NEXT: ret i32 [[X_3]]
+;
+ %v.0 = extractelement <8 x i8> %v, i64 6
+ %z.0 = zext i8 %v.0 to i32
+
+ %v.2 = extractelement <8 x i8> %v, i64 4
+ %z.2 = zext i8 %v.2 to i32
+ %s.2 = shl i32 %z.2, 16
+ %x.2 = or i32 %z.0, %s.2
+
+ %v.3 = extractelement <8 x i8> %v, i64 3
+ %z.3 = zext i8 %v.3 to i32
+ %s.3 = shl i32 %z.3, 24
+ %x.3 = or i32 %x.2, %s.3
+
+ ret i32 %x.3
+}
+
+define i64 @partial.extract.le.i64(<8 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @partial.extract.le.i64(
+; CHECK-BE-SAME: <8 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <8 x i16> [[V]], i64 3
+; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <8 x i16> [[V]], i64 4
+; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64
+; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16
+; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]]
+; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <8 x i16> [[V]], i64 5
+; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64
+; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32
+; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]]
+; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <8 x i16> [[V]], i64 6
+; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]]
+; CHECK-BE-NEXT: ret i64 [[X_3]]
+;
+; CHECK-LE-LABEL: define i64 @partial.extract.le.i64(
+; CHECK-LE-SAME: <8 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i16> [[V]], <8 x i16> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-LE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i16> [[X_3_V_EXTRACT]] to <2 x i64>
+; CHECK-LE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i64> [[X_3_V_BC]], i64 0
+; CHECK-LE-NEXT: ret i64 [[X_3_V_EXTRACT1]]
+;
+ %v.0 = extractelement <8 x i16> %v, i64 3
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.1 = extractelement <8 x i16> %v, i64 4
+ %z.1 = zext i16 %v.1 to i64
+ %s.1 = shl i64 %z.1, 16
+ %x.1 = or i64 %z.0, %s.1
+
+ %v.2 = extractelement <8 x i16> %v, i64 5
+ %z.2 = zext i16 %v.2 to i64
+ %s.2 = shl i64 %z.2, 32
+ %x.2 = or i64 %x.1, %s.2
+
+ %v.3 = extractelement <8 x i16> %v, i64 6
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %x.2, %s.3
+
+ ret i64 %x.3
+}
+
+define i64 @partial.extract.be.i64(<8 x i16> %v) {
+; CHECK-BE-LABEL: define i64 @partial.extract.be.i64(
+; CHECK-BE-SAME: <8 x i16> [[V:%.*]]) {
+; CHECK-BE-NEXT: [[X_3_V:%.*]] = shufflevector <8 x i16> [[V]], <8 x i16> <i16 0, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>, <4 x i32> <i32 3, i32 4, i32 8, i32 6>
+; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[X_3_V]] to i64
+; CHECK-BE-NEXT: ret i64 [[X_3]]
+;
+; CHECK-LE-LABEL: define i64 @partial.extract.be.i64(
+; CHECK-LE-SAME: <8 x i16> [[V:%.*]]) {
+; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <8 x i16> [[V]], i64 6
+; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64
+; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <8 x i16> [[V]], i64 4
+; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64
+; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32
+; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i64 [[S_2]], [[Z_0]]
+; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <8 x i16> [[V]], i64 3
+; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64
+; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48
+; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]]
+; CHECK-LE-NEXT: ret i64 [[X_3]]
+;
+ %v.0 = extractelement <8 x i16> %v, i64 6
+ %z.0 = zext i16 %v.0 to i64
+
+ %v.2 = extractelement <8 x i16> %v, i64 4
+ %z.2 = zext i16 %v.2 to i64
+ %s.2 = shl i64 %z.2, 32
+ %x.2 = or i64 %z.0, %s.2
+
+ %v.3 = extractelement <8 x i16> %v, i64 3
+ %z.3 = zext i16 %v.3 to i64
+ %s.3 = shl i64 %z.3, 48
+ %x.3 = or i64 %x.2, %s.3
+
+ ret i64 %x.3
+}
+
+define <2 x i16> @shufflecast.v2v(<4 x i8> %v) {
+; CHECK-LABEL: define <2 x i16> @shufflecast.v2v(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT: [[W_3:%.*]] = bitcast <4 x i8> [[V]] to <2 x i16>
+; CHECK-NEXT: ret <2 x i16> [[W_3]]
+;
+ %v.0 = shufflevector <4 x i8> %v, <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ %c.0 = bitcast <4 x i8> %v.0 to <2 x i16>
+
+ %v.1 = shufflevector <4 x i8> %v, <4 x i8> zeroinitializer, <4 x i32> <i32 4, i32 1, i32 4, i32 4>
+ %c.1 = bitcast <4 x i8> %v.1 to <2 x i16>
+ %w.1 = or <2 x i16> %c.0, %c.1
+
+ %v.2 = shufflevector <4 x i8> %v, <4 x i8> zeroinitializer, <4 x i32> <i32 4, i32 4, i32 2, i32 4>
+ %c.2 = bitcast <4 x i8> %v.2 to <2 x i16>
+ %w.2 = or <2 x i16> %w.1, %c.2
+
+ %v.3 = shufflevector <4 x i8> %v, <4 x i8> zeroinitializer, <4 x i32> <i32 4, i32 4, i32 4, i32 3>
+ %c.3 = bitcast <4 x i8> %v.3 to <2 x i16>
+ %w.3 = or <2 x i16> %w.2, %c.3
+
+ ret <2 x i16> %w.3
+}
+
+define <2 x i32> @shufflecast.v2v.i16(<4 x i16> %v) {
+; CHECK-LABEL: define <2 x i32> @shufflecast.v2v.i16(
+; CHECK-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-NEXT: [[W_3:%.*]] = bitcast <4 x i16> [[V]] to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> [[W_3]]
+;
+ %v.0 = shufflevector <4 x i16> %v, <4 x i16> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ %c.0 = bitcast <4 x i16> %v.0 to <2 x i32>
+
+ %v.1 = shufflevector <4 x i16> %v, <4 x i16> zeroinitializer, <4 x i32> <i32 4, i32 1, i32 4, i32 4>
+ %c.1 = bitcast <4 x i16> %v.1 to <2 x i32>
+ %w.1 = or <2 x i32> %c.0, %c.1
+
+ %v.2 = shufflevector <4 x i16> %v, <4 x i16> zeroinitializer, <4 x i32> <i32 4, i32 4, i32 2, i32 4>
+ %c.2 = bitcast <4 x i16> %v.2 to <2 x i32>
+ %w.2 = or <2 x i32> %w.1, %c.2
+
+ %v.3 = shufflevector <4 x i16> %v, <4 x i16> zeroinitializer, <4 x i32> <i32 4, i32 4, i32 4, i32 3>
+ %c.3 = bitcast <4 x i16> %v.3 to <2 x i32>
+ %w.3 = or <2 x i32> %w.2, %c.3
+
+ ret <2 x i32> %w.3
+}
+
+define i32 @bitcast.v2i.half(<2 x half> %v) {
+; CHECK-LABEL: define i32 @bitcast.v2i.half(
+; CHECK-SAME: <2 x half> [[V:%.*]]) {
+; CHECK-NEXT: [[X:%.*]] = bitcast <2 x half> [[V]] to i32
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %v.0 = insertelement <2 x half> %v, half 0.0, i64 1
+ %x.0 = bitcast <2 x half> %v.0 to i32
+
+ %v.1 = insertelement <2 x half> %v, half 0.0, i64 0
+ %x.1 = bitcast <2 x half> %v.1 to i32
+
+ %x = or i32 %x.0, %x.1
+ ret i32 %x
+}
More information about the llvm-commits
mailing list