[llvm] 766c90f - [VectorCombine] foldShuffleOfCastops - handle unary shuffles (#160009)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 29 06:22:03 PDT 2025
Author: Chaitanya Koparkar
Date: 2025-09-29T14:21:59+01:00
New Revision: 766c90f43966fe9eb438e5a6f6378f4e3e0bc37e
URL: https://github.com/llvm/llvm-project/commit/766c90f43966fe9eb438e5a6f6378f4e3e0bc37e
DIFF: https://github.com/llvm/llvm-project/commit/766c90f43966fe9eb438e5a6f6378f4e3e0bc37e.diff
LOG: [VectorCombine] foldShuffleOfCastops - handle unary shuffles (#160009)
Fixes #156853.
Added:
Modified:
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll
llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index cbdc621f1878b..32704bdb54f4f 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2487,21 +2487,31 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
if (!match(&I, m_Shuffle(m_Value(V0), m_Value(V1), m_Mask(OldMask))))
return false;
+ // Check whether this is a binary shuffle.
+ bool IsBinaryShuffle = !isa<UndefValue>(V1);
+
auto *C0 = dyn_cast<CastInst>(V0);
auto *C1 = dyn_cast<CastInst>(V1);
- if (!C0 || !C1)
+ if (!C0 || (IsBinaryShuffle && !C1))
return false;
Instruction::CastOps Opcode = C0->getOpcode();
- if (C0->getSrcTy() != C1->getSrcTy())
+
+ // If this is allowed, foldShuffleOfCastops can get stuck in a loop
+ // with foldBitcastOfShuffle. Reject in favor of foldBitcastOfShuffle.
+ if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
return false;
- // Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds.
- if (Opcode != C1->getOpcode()) {
- if (match(C0, m_SExtLike(m_Value())) && match(C1, m_SExtLike(m_Value())))
- Opcode = Instruction::SExt;
- else
+ if (IsBinaryShuffle) {
+ if (C0->getSrcTy() != C1->getSrcTy())
return false;
+ // Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds.
+ if (Opcode != C1->getOpcode()) {
+ if (match(C0, m_SExtLike(m_Value())) && match(C1, m_SExtLike(m_Value())))
+ Opcode = Instruction::SExt;
+ else
+ return false;
+ }
}
auto *ShuffleDstTy = dyn_cast<FixedVectorType>(I.getType());
@@ -2544,23 +2554,31 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
InstructionCost CostC0 =
TTI.getCastInstrCost(C0->getOpcode(), CastDstTy, CastSrcTy,
TTI::CastContextHint::None, CostKind);
- InstructionCost CostC1 =
- TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy,
- TTI::CastContextHint::None, CostKind);
- InstructionCost OldCost = CostC0 + CostC1;
- OldCost +=
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleDstTy,
- CastDstTy, OldMask, CostKind, 0, nullptr, {}, &I);
- InstructionCost NewCost =
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, NewShuffleDstTy,
- CastSrcTy, NewMask, CostKind);
+ TargetTransformInfo::ShuffleKind ShuffleKind;
+ if (IsBinaryShuffle)
+ ShuffleKind = TargetTransformInfo::SK_PermuteTwoSrc;
+ else
+ ShuffleKind = TargetTransformInfo::SK_PermuteSingleSrc;
+
+ InstructionCost OldCost = CostC0;
+ OldCost += TTI.getShuffleCost(ShuffleKind, ShuffleDstTy, CastDstTy, OldMask,
+ CostKind, 0, nullptr, {}, &I);
+
+ InstructionCost NewCost = TTI.getShuffleCost(ShuffleKind, NewShuffleDstTy,
+ CastSrcTy, NewMask, CostKind);
NewCost += TTI.getCastInstrCost(Opcode, ShuffleDstTy, NewShuffleDstTy,
TTI::CastContextHint::None, CostKind);
if (!C0->hasOneUse())
NewCost += CostC0;
- if (!C1->hasOneUse())
- NewCost += CostC1;
+ if (IsBinaryShuffle) {
+ InstructionCost CostC1 =
+ TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy,
+ TTI::CastContextHint::None, CostKind);
+ OldCost += CostC1;
+ if (!C1->hasOneUse())
+ NewCost += CostC1;
+ }
LLVM_DEBUG(dbgs() << "Found a shuffle feeding two casts: " << I
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
@@ -2568,14 +2586,20 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
if (NewCost > OldCost)
return false;
- Value *Shuf = Builder.CreateShuffleVector(C0->getOperand(0),
- C1->getOperand(0), NewMask);
+ Value *Shuf;
+ if (IsBinaryShuffle)
+ Shuf = Builder.CreateShuffleVector(C0->getOperand(0), C1->getOperand(0),
+ NewMask);
+ else
+ Shuf = Builder.CreateShuffleVector(C0->getOperand(0), NewMask);
+
Value *Cast = Builder.CreateCast(Opcode, Shuf, ShuffleDstTy);
// Intersect flags from the old casts.
if (auto *NewInst = dyn_cast<Instruction>(Cast)) {
NewInst->copyIRFlags(C0);
- NewInst->andIRFlags(C1);
+ if (IsBinaryShuffle)
+ NewInst->andIRFlags(C1);
}
Worklist.pushValue(Shuf);
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll b/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll
index 6341c8945247d..1503a1b51d256 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll
@@ -14,9 +14,9 @@ define <4 x i32> @load_i32_zext_to_v4i32(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -36,9 +36,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_both_nneg(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext nneg <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext nneg <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -58,9 +58,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_inner_nneg(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext nneg <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext nneg <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -80,9 +80,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_outer_nneg(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -102,9 +102,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_inner_nneg_outer_sext(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext nneg <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext nneg <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -125,9 +125,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_clobber_after_load(ptr %di) {
; CHECK-NEXT: call void @use.i32(i32 0)
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -148,9 +148,9 @@ define <4 x i32> @load_i32_sext_zext_to_v4i32(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -170,9 +170,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_load_other_users(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: call void @use.i32(i32 [[L]])
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
@@ -194,9 +194,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_ins_other_users(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: call void @use.v2i32(<2 x i32> [[VEC_INS]])
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
@@ -218,9 +218,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_bc_other_users(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: call void @use.v8i8(<8 x i8> [[VEC_BC]])
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
@@ -266,10 +266,10 @@ define <4 x i32> @load_i32_zext_to_v4i32_shuffle_other_users(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
-; CHECK-NEXT: call void @use.v8i16(<4 x i16> [[VEC_SHUFFLE]])
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32>
+; CHECK-NEXT: call void @use.v8i16(<4 x i16> [[E_1]])
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -290,9 +290,9 @@ define <8 x i32> @load_i64_zext_to_v8i32(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[DI]], align 8
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i64> [[VEC_INS]] to <16 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = zext <16 x i8> [[VEC_BC]] to <16 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i16> [[EXT_1]], <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[OUTER_EXT:%.*]] = zext nneg <8 x i16> [[VEC_SHUFFLE]] to <8 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i8> [[VEC_BC]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_SHUFFLE]] to <8 x i16>
+; CHECK-NEXT: [[OUTER_EXT:%.*]] = zext nneg <8 x i16> [[EXT_1]] to <8 x i32>
; CHECK-NEXT: ret <8 x i32> [[OUTER_EXT]]
;
entry:
@@ -312,9 +312,9 @@ define <3 x i32> @load_i24_zext_to_v3i32(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i24, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i24> <i24 poison, i24 0>, i24 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i24> [[VEC_INS]] to <6 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = zext <6 x i8> [[VEC_BC]] to <6 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i16> [[EXT_1]], <6 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <3 x i16> [[VEC_SHUFFLE]] to <3 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i8> [[VEC_BC]], <6 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: [[EXT_1:%.*]] = zext <3 x i8> [[VEC_SHUFFLE]] to <3 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <3 x i16> [[EXT_1]] to <3 x i32>
; CHECK-NEXT: ret <3 x i32> [[EXT_2]]
;
entry:
@@ -334,9 +334,9 @@ define <4 x i32> @load_i32_insert_idx_1_sext(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[L]], i64 1
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[EXT_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[EXT_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -356,9 +356,9 @@ define <4 x i32> @mask_extracts_not_all_elements_1_sext(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+; CHECK-NEXT: [[EXT_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[EXT_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -378,9 +378,9 @@ define <4 x i32> @mask_extracts_not_all_elements_2_sext(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: [[EXT_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[EXT_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -422,9 +422,9 @@ define <4 x i32> @load_i32_sext_to_v4i32(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -444,9 +444,9 @@ define <8 x i32> @load_i64_sext_to_v8i32(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[DI]], align 8
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i64> [[VEC_INS]] to <16 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = sext <16 x i8> [[VEC_BC]] to <16 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i16> [[EXT_1]], <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[OUTER_EXT:%.*]] = sext <8 x i16> [[VEC_SHUFFLE]] to <8 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i8> [[VEC_BC]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_SHUFFLE]] to <8 x i16>
+; CHECK-NEXT: [[OUTER_EXT:%.*]] = sext <8 x i16> [[EXT_1]] to <8 x i32>
; CHECK-NEXT: ret <8 x i32> [[OUTER_EXT]]
;
entry:
@@ -466,9 +466,9 @@ define <3 x i32> @load_i24_sext_to_v3i32(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i24, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i24> <i24 poison, i24 0>, i24 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i24> [[VEC_INS]] to <6 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = sext <6 x i8> [[VEC_BC]] to <6 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i16> [[EXT_1]], <6 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT: [[EXT_2:%.*]] = sext <3 x i16> [[VEC_SHUFFLE]] to <3 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i8> [[VEC_BC]], <6 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: [[EXT_1:%.*]] = sext <3 x i8> [[VEC_SHUFFLE]] to <3 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = sext <3 x i16> [[EXT_1]] to <3 x i32>
; CHECK-NEXT: ret <3 x i32> [[EXT_2]]
;
entry:
@@ -488,9 +488,9 @@ define <4 x i32> @load_i32_insert_idx_1(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[L]], i64 1
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[EXT_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[EXT_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -510,9 +510,9 @@ define <4 x i32> @mask_extracts_not_all_elements_1(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
-; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+; CHECK-NEXT: [[EXT_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[EXT_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -532,9 +532,9 @@ define <4 x i32> @mask_extracts_not_all_elements_2(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
-; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: [[EXT_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[EXT_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index acbc836ffcab0..ed29719d49493 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -205,8 +205,8 @@ define <8 x i8> @abs_
diff erent(<8 x i8> %a) {
define <4 x i32> @poison_intrinsic(<2 x i16> %l256) {
; CHECK-LABEL: @poison_intrinsic(
; CHECK-NEXT: [[L266:%.*]] = call <2 x i16> @llvm.abs.v2i16(<2 x i16> [[L256:%.*]], i1 false)
-; CHECK-NEXT: [[L267:%.*]] = zext <2 x i16> [[L266]] to <2 x i32>
-; CHECK-NEXT: [[L271:%.*]] = shufflevector <2 x i32> [[L267]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[L267:%.*]] = shufflevector <2 x i16> [[L266]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[L271:%.*]] = zext <4 x i16> [[L267]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[L271]]
;
%l266 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %l256, i1 false)
@@ -534,9 +534,9 @@ define <4 x i64> @single_zext(<4 x i32> %x) {
define <4 x i64> @not_zext(<4 x i32> %x) {
; CHECK-LABEL: @not_zext(
-; CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i32> [[X:%.*]] to <4 x i64>
-; CHECK-NEXT: [[REVSHUF:%.*]] = shufflevector <4 x i64> [[ZEXT]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: ret <4 x i64> [[REVSHUF]]
+; CHECK-NEXT: [[REVSHUF:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i32> [[REVSHUF:%.*]] to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> [[ZEXT]]
;
%zext = zext <4 x i32> %x to <4 x i64>
%revshuf = shufflevector <4 x i64> %zext, <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -922,10 +922,9 @@ define <4 x i8> @singleop(<4 x i8> %a, <4 x i8> %b) {
define <4 x i64> @cast_mismatched_types(<4 x i32> %x) {
; CHECK-LABEL: @cast_mismatched_types(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i32> [[SHUF]] to <2 x i64>
-; CHECK-NEXT: [[EXTSHUF:%.*]] = shufflevector <2 x i64> [[ZEXT]], <2 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-; CHECK-NEXT: ret <4 x i64> [[EXTSHUF]]
+; CHECK-SAME: <4 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i32> [[X]] to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> [[ZEXT]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
%zext = zext <2 x i32> %shuf to <2 x i64>
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll
index fba4b60ef417b..82a739964c9d0 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll
@@ -342,3 +342,59 @@ define <16 x i32> @concat_sext_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
%r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i32> %r
}
+
+; Unary shuffles
+
+define <4 x i16> @unary_shuffle_zext_v8i8_v4i16(<8 x i8> %a0) {
+; CHECK-LABEL: define <4 x i16> @unary_shuffle_zext_v8i8_v4i16(
+; CHECK-SAME: <8 x i8> [[A0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[A0]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[X1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: ret <4 x i16> [[X1]]
+;
+ %x1 = zext <8 x i8> %a0 to <8 x i16>
+ %vec.shuffle = shufflevector <8 x i16> %x1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i16> %vec.shuffle
+}
+
+define <4 x i16> @unary_shuffle_sext_v8i8_v4i16(<8 x i8> %a0) {
+; CHECK-LABEL: define <4 x i16> @unary_shuffle_sext_v8i8_v4i16(
+; CHECK-SAME: <8 x i8> [[A0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[A0]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[X1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: ret <4 x i16> [[X1]]
+;
+ %x1 = sext <8 x i8> %a0 to <8 x i16>
+ %vec.shuffle = shufflevector <8 x i16> %x1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i16> %vec.shuffle
+}
+
+; negative - avoid loop with foldBitcastOfShuffle
+
+define <2 x i32> @unary_shuffle_bitcast_v8i8_v2i32(<8 x i8> %a0) {
+; CHECK-LABEL: define <2 x i32> @unary_shuffle_bitcast_v8i8_v2i32(
+; CHECK-SAME: <8 x i8> [[A0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[X1:%.*]] = bitcast <8 x i8> [[A0]] to <2 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <2 x i32> [[X1]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: ret <2 x i32> [[VEC_SHUFFLE]]
+;
+ %x1 = bitcast <8 x i8> %a0 to <2 x i32>
+ %vec.shuffle = shufflevector <2 x i32> %x1, <2 x i32> poison, <2 x i32> <i32 0, i32 1>
+ ret <2 x i32> %vec.shuffle
+}
+
+; negative - multiuse
+
+define <4 x i16> @unary_shuffle_sext_v8i8_v4i16_multiuse(<8 x i8> %a0, ptr %a1) {
+; CHECK-LABEL: define <4 x i16> @unary_shuffle_sext_v8i8_v4i16_multiuse(
+; CHECK-SAME: <8 x i8> [[A0:%.*]], ptr [[A1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[X1:%.*]] = sext <8 x i8> [[A0]] to <8 x i16>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[X1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: store <8 x i16> [[X1]], ptr [[A1]], align 16
+; CHECK-NEXT: ret <4 x i16> [[VEC_SHUFFLE]]
+;
+ %x1 = sext <8 x i8> %a0 to <8 x i16>
+ %vec.shuffle = shufflevector <8 x i16> %x1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ store <8 x i16> %x1, ptr %a1, align 16
+ ret <4 x i16> %vec.shuffle
+}
More information about the llvm-commits
mailing list