[llvm] [VectorCombine] foldShuffleOfCastops - handle unary shuffles (PR #160009)
Chaitanya Koparkar via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 25 04:24:17 PDT 2025
https://github.com/ckoparkar updated https://github.com/llvm/llvm-project/pull/160009
>From ed4c1dfb83a5d3d33f185559a4820b7d3fefb45e Mon Sep 17 00:00:00 2001
From: Chaitanya Koparkar <ckoparkar at gmail.com>
Date: Sun, 21 Sep 2025 16:01:33 -0400
Subject: [PATCH 1/5] [VectorCombine] foldShuffleOfCastops - handle unary
shuffles
---
.../Transforms/Vectorize/VectorCombine.cpp | 67 ++++++++++++-------
1 file changed, 44 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 17cb18a22336a..c2a4353b3eb62 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2477,21 +2477,28 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
if (!match(&I, m_Shuffle(m_Value(V0), m_Value(V1), m_Mask(OldMask))))
return false;
+ // Check whether this is a unary shuffle.
+ // TODO: should this be extended to match undef or unused values.
+ bool IsBinaryShuffle = !isa<PoisonValue>(V1);
+ LLVM_DEBUG(dbgs() << "Is binary shuffle: " << IsBinaryShuffle << "\n");
+
auto *C0 = dyn_cast<CastInst>(V0);
auto *C1 = dyn_cast<CastInst>(V1);
- if (!C0 || !C1)
+ if (!C0 || (IsBinaryShuffle && !C1))
return false;
Instruction::CastOps Opcode = C0->getOpcode();
- if (C0->getSrcTy() != C1->getSrcTy())
- return false;
- // Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds.
- if (Opcode != C1->getOpcode()) {
- if (match(C0, m_SExtLike(m_Value())) && match(C1, m_SExtLike(m_Value())))
- Opcode = Instruction::SExt;
- else
+ if (IsBinaryShuffle) {
+ if (C0->getSrcTy() != C1->getSrcTy())
return false;
+ // Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds.
+ if (Opcode != C1->getOpcode()) {
+ if (match(C0, m_SExtLike(m_Value())) && match(C1, m_SExtLike(m_Value())))
+ Opcode = Instruction::SExt;
+ else
+ return false;
+ }
}
auto *ShuffleDstTy = dyn_cast<FixedVectorType>(I.getType());
@@ -2534,23 +2541,31 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
InstructionCost CostC0 =
TTI.getCastInstrCost(C0->getOpcode(), CastDstTy, CastSrcTy,
TTI::CastContextHint::None, CostKind);
- InstructionCost CostC1 =
- TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy,
- TTI::CastContextHint::None, CostKind);
- InstructionCost OldCost = CostC0 + CostC1;
- OldCost +=
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleDstTy,
- CastDstTy, OldMask, CostKind, 0, nullptr, {}, &I);
- InstructionCost NewCost =
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, NewShuffleDstTy,
- CastSrcTy, NewMask, CostKind);
+ TargetTransformInfo::ShuffleKind ShuffleKind;
+ if (IsBinaryShuffle)
+ ShuffleKind = TargetTransformInfo::SK_PermuteTwoSrc;
+ else
+ ShuffleKind = TargetTransformInfo::SK_PermuteSingleSrc;
+
+ InstructionCost OldCost = CostC0;
+ OldCost += TTI.getShuffleCost(ShuffleKind, ShuffleDstTy, CastDstTy, OldMask,
+ CostKind, 0, nullptr, {}, &I);
+
+ InstructionCost NewCost = TTI.getShuffleCost(ShuffleKind, NewShuffleDstTy,
+ CastSrcTy, NewMask, CostKind);
NewCost += TTI.getCastInstrCost(Opcode, ShuffleDstTy, NewShuffleDstTy,
TTI::CastContextHint::None, CostKind);
if (!C0->hasOneUse())
NewCost += CostC0;
- if (!C1->hasOneUse())
- NewCost += CostC1;
+ if (IsBinaryShuffle) {
+ InstructionCost CostC1 =
+ TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy,
+ TTI::CastContextHint::None, CostKind);
+ OldCost += CostC1;
+ if (!C1->hasOneUse())
+ NewCost += CostC1;
+ }
LLVM_DEBUG(dbgs() << "Found a shuffle feeding two casts: " << I
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
@@ -2558,14 +2573,20 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
if (NewCost > OldCost)
return false;
- Value *Shuf = Builder.CreateShuffleVector(C0->getOperand(0),
- C1->getOperand(0), NewMask);
+ Value *Shuf;
+ if (IsBinaryShuffle)
+ Shuf = Builder.CreateShuffleVector(C0->getOperand(0), C1->getOperand(0),
+ NewMask);
+ else
+ Shuf = Builder.CreateShuffleVector(C0->getOperand(0), NewMask);
+
Value *Cast = Builder.CreateCast(Opcode, Shuf, ShuffleDstTy);
// Intersect flags from the old casts.
if (auto *NewInst = dyn_cast<Instruction>(Cast)) {
NewInst->copyIRFlags(C0);
- NewInst->andIRFlags(C1);
+ if (IsBinaryShuffle)
+ NewInst->andIRFlags(C1);
}
Worklist.pushValue(Shuf);
>From 8ae981639ffb9b4e48b7452819d003e0c3b3dab9 Mon Sep 17 00:00:00 2001
From: Chaitanya Koparkar <ckoparkar at gmail.com>
Date: Tue, 23 Sep 2025 08:50:25 -0400
Subject: [PATCH 2/5] Fix shuffletoidentity test
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 4 ++--
.../VectorCombine/AArch64/shuffletoidentity.ll | 17 ++++++++---------
2 files changed, 10 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 7c3c219b94190..526a4add2a89a 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2488,9 +2488,9 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
return false;
// Check whether this is a unary shuffle.
- // TODO: should this be extended to match undef or unused values.
+ // TODO: check if this can be extended to match undef or unused values,
+ // perhaps using ShuffleVectorInst::isSingleSource.
bool IsBinaryShuffle = !isa<PoisonValue>(V1);
- LLVM_DEBUG(dbgs() << "Is binary shuffle: " << IsBinaryShuffle << "\n");
auto *C0 = dyn_cast<CastInst>(V0);
auto *C1 = dyn_cast<CastInst>(V1);
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index acbc836ffcab0..ed29719d49493 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -205,8 +205,8 @@ define <8 x i8> @abs_different(<8 x i8> %a) {
define <4 x i32> @poison_intrinsic(<2 x i16> %l256) {
; CHECK-LABEL: @poison_intrinsic(
; CHECK-NEXT: [[L266:%.*]] = call <2 x i16> @llvm.abs.v2i16(<2 x i16> [[L256:%.*]], i1 false)
-; CHECK-NEXT: [[L267:%.*]] = zext <2 x i16> [[L266]] to <2 x i32>
-; CHECK-NEXT: [[L271:%.*]] = shufflevector <2 x i32> [[L267]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[L267:%.*]] = shufflevector <2 x i16> [[L266]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[L271:%.*]] = zext <4 x i16> [[L267]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[L271]]
;
%l266 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %l256, i1 false)
@@ -534,9 +534,9 @@ define <4 x i64> @single_zext(<4 x i32> %x) {
define <4 x i64> @not_zext(<4 x i32> %x) {
; CHECK-LABEL: @not_zext(
-; CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i32> [[X:%.*]] to <4 x i64>
-; CHECK-NEXT: [[REVSHUF:%.*]] = shufflevector <4 x i64> [[ZEXT]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: ret <4 x i64> [[REVSHUF]]
+; CHECK-NEXT: [[REVSHUF:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i32> [[REVSHUF:%.*]] to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> [[ZEXT]]
;
%zext = zext <4 x i32> %x to <4 x i64>
%revshuf = shufflevector <4 x i64> %zext, <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -922,10 +922,9 @@ define <4 x i8> @singleop(<4 x i8> %a, <4 x i8> %b) {
define <4 x i64> @cast_mismatched_types(<4 x i32> %x) {
; CHECK-LABEL: @cast_mismatched_types(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i32> [[SHUF]] to <2 x i64>
-; CHECK-NEXT: [[EXTSHUF:%.*]] = shufflevector <2 x i64> [[ZEXT]], <2 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-; CHECK-NEXT: ret <4 x i64> [[EXTSHUF]]
+; CHECK-SAME: <4 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i32> [[X]] to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> [[ZEXT]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
%zext = zext <2 x i32> %shuf to <2 x i64>
>From 8c27278bd39af0ae2ffe768180da09239e4265fd Mon Sep 17 00:00:00 2001
From: Chaitanya Koparkar <ckoparkar at gmail.com>
Date: Wed, 24 Sep 2025 08:40:05 -0400
Subject: [PATCH 3/5] Break loop with foldBitcastOfShuffle
---
.../Transforms/Vectorize/VectorCombine.cpp | 6 +
.../AArch64/combine-shuffle-ext.ll | 134 +++++++++---------
2 files changed, 73 insertions(+), 67 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 526a4add2a89a..2a9f637791dfe 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2499,6 +2499,12 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
Instruction::CastOps Opcode = C0->getOpcode();
+ // If this is allowed, foldShuffleOfCastops can get stuck in a loop
+ // with foldBitcastOfShuffle. Reject in favor of foldBitcastOfShuffle.
+ if (!IsBinaryShuffle && Opcode == Instruction::BitCast) {
+ return false;
+ }
+
if (IsBinaryShuffle) {
if (C0->getSrcTy() != C1->getSrcTy())
return false;
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll b/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll
index 6341c8945247d..1503a1b51d256 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll
@@ -14,9 +14,9 @@ define <4 x i32> @load_i32_zext_to_v4i32(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -36,9 +36,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_both_nneg(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext nneg <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext nneg <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -58,9 +58,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_inner_nneg(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext nneg <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext nneg <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -80,9 +80,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_outer_nneg(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -102,9 +102,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_inner_nneg_outer_sext(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext nneg <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext nneg <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -125,9 +125,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_clobber_after_load(ptr %di) {
; CHECK-NEXT: call void @use.i32(i32 0)
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -148,9 +148,9 @@ define <4 x i32> @load_i32_sext_zext_to_v4i32(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -170,9 +170,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_load_other_users(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: call void @use.i32(i32 [[L]])
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
@@ -194,9 +194,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_ins_other_users(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: call void @use.v2i32(<2 x i32> [[VEC_INS]])
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
@@ -218,9 +218,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_bc_other_users(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: call void @use.v8i8(<8 x i8> [[VEC_BC]])
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
@@ -266,10 +266,10 @@ define <4 x i32> @load_i32_zext_to_v4i32_shuffle_other_users(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
-; CHECK-NEXT: call void @use.v8i16(<4 x i16> [[VEC_SHUFFLE]])
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32>
+; CHECK-NEXT: call void @use.v8i16(<4 x i16> [[E_1]])
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -290,9 +290,9 @@ define <8 x i32> @load_i64_zext_to_v8i32(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[DI]], align 8
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i64> [[VEC_INS]] to <16 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = zext <16 x i8> [[VEC_BC]] to <16 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i16> [[EXT_1]], <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[OUTER_EXT:%.*]] = zext nneg <8 x i16> [[VEC_SHUFFLE]] to <8 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i8> [[VEC_BC]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_SHUFFLE]] to <8 x i16>
+; CHECK-NEXT: [[OUTER_EXT:%.*]] = zext nneg <8 x i16> [[EXT_1]] to <8 x i32>
; CHECK-NEXT: ret <8 x i32> [[OUTER_EXT]]
;
entry:
@@ -312,9 +312,9 @@ define <3 x i32> @load_i24_zext_to_v3i32(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i24, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i24> <i24 poison, i24 0>, i24 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i24> [[VEC_INS]] to <6 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = zext <6 x i8> [[VEC_BC]] to <6 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i16> [[EXT_1]], <6 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <3 x i16> [[VEC_SHUFFLE]] to <3 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i8> [[VEC_BC]], <6 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: [[EXT_1:%.*]] = zext <3 x i8> [[VEC_SHUFFLE]] to <3 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <3 x i16> [[EXT_1]] to <3 x i32>
; CHECK-NEXT: ret <3 x i32> [[EXT_2]]
;
entry:
@@ -334,9 +334,9 @@ define <4 x i32> @load_i32_insert_idx_1_sext(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[L]], i64 1
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[EXT_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[EXT_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -356,9 +356,9 @@ define <4 x i32> @mask_extracts_not_all_elements_1_sext(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+; CHECK-NEXT: [[EXT_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[EXT_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -378,9 +378,9 @@ define <4 x i32> @mask_extracts_not_all_elements_2_sext(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: [[EXT_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[EXT_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -422,9 +422,9 @@ define <4 x i32> @load_i32_sext_to_v4i32(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[E_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[E_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -444,9 +444,9 @@ define <8 x i32> @load_i64_sext_to_v8i32(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[DI]], align 8
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i64> [[VEC_INS]] to <16 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = sext <16 x i8> [[VEC_BC]] to <16 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i16> [[EXT_1]], <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[OUTER_EXT:%.*]] = sext <8 x i16> [[VEC_SHUFFLE]] to <8 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i8> [[VEC_BC]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_SHUFFLE]] to <8 x i16>
+; CHECK-NEXT: [[OUTER_EXT:%.*]] = sext <8 x i16> [[EXT_1]] to <8 x i32>
; CHECK-NEXT: ret <8 x i32> [[OUTER_EXT]]
;
entry:
@@ -466,9 +466,9 @@ define <3 x i32> @load_i24_sext_to_v3i32(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i24, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i24> <i24 poison, i24 0>, i24 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i24> [[VEC_INS]] to <6 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = sext <6 x i8> [[VEC_BC]] to <6 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i16> [[EXT_1]], <6 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT: [[EXT_2:%.*]] = sext <3 x i16> [[VEC_SHUFFLE]] to <3 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i8> [[VEC_BC]], <6 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: [[EXT_1:%.*]] = sext <3 x i8> [[VEC_SHUFFLE]] to <3 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = sext <3 x i16> [[EXT_1]] to <3 x i32>
; CHECK-NEXT: ret <3 x i32> [[EXT_2]]
;
entry:
@@ -488,9 +488,9 @@ define <4 x i32> @load_i32_insert_idx_1(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[L]], i64 1
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[EXT_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[EXT_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -510,9 +510,9 @@ define <4 x i32> @mask_extracts_not_all_elements_1(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
-; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+; CHECK-NEXT: [[EXT_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[EXT_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
@@ -532,9 +532,9 @@ define <4 x i32> @mask_extracts_not_all_elements_2(ptr %di) {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
-; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: [[EXT_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[EXT_1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
>From e4b329f2fba9bf042f790a7b9cfca4acf1c35aec Mon Sep 17 00:00:00 2001
From: Chaitanya Koparkar <ckoparkar at gmail.com>
Date: Thu, 25 Sep 2025 06:56:16 -0400
Subject: [PATCH 4/5] Add tests, remove comment
---
.../Transforms/Vectorize/VectorCombine.cpp | 9 +--
.../VectorCombine/X86/shuffle-of-casts.ll | 68 +++++++++++++++++++
2 files changed, 71 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 2a9f637791dfe..54af105383be0 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2487,10 +2487,8 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
if (!match(&I, m_Shuffle(m_Value(V0), m_Value(V1), m_Mask(OldMask))))
return false;
- // Check whether this is a unary shuffle.
- // TODO: check if this can be extended to match undef or unused values,
- // perhaps using ShuffleVectorInst::isSingleSource.
- bool IsBinaryShuffle = !isa<PoisonValue>(V1);
+ // Check whether this is a binary shuffle.
+ bool IsBinaryShuffle = !isa<UndefValue>(V1);
auto *C0 = dyn_cast<CastInst>(V0);
auto *C1 = dyn_cast<CastInst>(V1);
@@ -2501,9 +2499,8 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
// If this is allowed, foldShuffleOfCastops can get stuck in a loop
// with foldBitcastOfShuffle. Reject in favor of foldBitcastOfShuffle.
- if (!IsBinaryShuffle && Opcode == Instruction::BitCast) {
+ if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
return false;
- }
if (IsBinaryShuffle) {
if (C0->getSrcTy() != C1->getSrcTy())
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll
index fba4b60ef417b..ef2b88da50891 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll
@@ -342,3 +342,71 @@ define <16 x i32> @concat_sext_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
%r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i32> %r
}
+
+; Unary shuffles
+
+define <4 x i16> @unary_shuffle_zext_v8i8_v4i16(<8 x i8> %a0) {
+; CHECK-LABEL: define <4 x i16> @unary_shuffle_zext_v8i8_v4i16(
+; CHECK-SAME: <8 x i8> [[A0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[A0]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[X1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: ret <4 x i16> [[X1]]
+;
+ %x1 = zext <8 x i8> %a0 to <8 x i16>
+ %vec.shuffle = shufflevector <8 x i16> %x1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i16> %vec.shuffle
+}
+
+define <4 x i16> @unary_shuffle_sext_v8i8_v4i16(<8 x i8> %a0) {
+; CHECK-LABEL: define <4 x i16> @unary_shuffle_sext_v8i8_v4i16(
+; CHECK-SAME: <8 x i8> [[A0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[A0]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[X1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: ret <4 x i16> [[X1]]
+;
+ %x1 = sext <8 x i8> %a0 to <8 x i16>
+ %vec.shuffle = shufflevector <8 x i16> %x1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i16> %vec.shuffle
+}
+
+define <4 x i16> @unary_shuffle_sext_v8i8_v4i16_undef(<8 x i8> %a0) {
+; CHECK-LABEL: define <4 x i16> @unary_shuffle_sext_v8i8_v4i16_undef(
+; CHECK-SAME: <8 x i8> [[A0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[A0]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[X1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
+; CHECK-NEXT: ret <4 x i16> [[X1]]
+;
+ %x1 = sext <8 x i8> %a0 to <8 x i16>
+ %vec.shuffle = shufflevector <8 x i16> %x1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i16> %vec.shuffle
+}
+
+; negative - avoid loop with foldBitcastOfShuffle
+
+define <2 x i32> @unary_shuffle_bitcast_v8i8_v2i32(<8 x i8> %a0) {
+; CHECK-LABEL: define <2 x i32> @unary_shuffle_bitcast_v8i8_v2i32(
+; CHECK-SAME: <8 x i8> [[A0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[X1:%.*]] = bitcast <8 x i8> [[A0]] to <2 x i32>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <2 x i32> [[X1]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: ret <2 x i32> [[VEC_SHUFFLE]]
+;
+ %x1 = bitcast <8 x i8> %a0 to <2 x i32>
+ %vec.shuffle = shufflevector <2 x i32> %x1, <2 x i32> poison, <2 x i32> <i32 0, i32 1>
+ ret <2 x i32> %vec.shuffle
+}
+
+; negative - multiuse
+
+define <4 x i16> @unary_shuffle_sext_v8i8_v4i16_multiuse(<8 x i8> %a0, ptr %a1) {
+; CHECK-LABEL: define <4 x i16> @unary_shuffle_sext_v8i8_v4i16_multiuse(
+; CHECK-SAME: <8 x i8> [[A0:%.*]], ptr [[A1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[X1:%.*]] = sext <8 x i8> [[A0]] to <8 x i16>
+; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[X1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: store <8 x i16> [[X1]], ptr [[A1]], align 16
+; CHECK-NEXT: ret <4 x i16> [[VEC_SHUFFLE]]
+;
+ %x1 = sext <8 x i8> %a0 to <8 x i16>
+ %vec.shuffle = shufflevector <8 x i16> %x1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ store <8 x i16> %x1, ptr %a1, align 16
+ ret <4 x i16> %vec.shuffle
+}
>From 4f5f5da95a76f1c2aabeb1a4753536426d62e529 Mon Sep 17 00:00:00 2001
From: Chaitanya Koparkar <ckoparkar at gmail.com>
Date: Thu, 25 Sep 2025 07:23:13 -0400
Subject: [PATCH 5/5] Remove test that uses undef
---
.../Transforms/VectorCombine/X86/shuffle-of-casts.ll | 12 ------------
1 file changed, 12 deletions(-)
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll
index ef2b88da50891..82a739964c9d0 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll
@@ -369,18 +369,6 @@ define <4 x i16> @unary_shuffle_sext_v8i8_v4i16(<8 x i8> %a0) {
ret <4 x i16> %vec.shuffle
}
-define <4 x i16> @unary_shuffle_sext_v8i8_v4i16_undef(<8 x i8> %a0) {
-; CHECK-LABEL: define <4 x i16> @unary_shuffle_sext_v8i8_v4i16_undef(
-; CHECK-SAME: <8 x i8> [[A0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[A0]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[X1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16>
-; CHECK-NEXT: ret <4 x i16> [[X1]]
-;
- %x1 = sext <8 x i8> %a0 to <8 x i16>
- %vec.shuffle = shufflevector <8 x i16> %x1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- ret <4 x i16> %vec.shuffle
-}
-
; negative - avoid loop with foldBitcastOfShuffle
define <2 x i32> @unary_shuffle_bitcast_v8i8_v2i32(<8 x i8> %a0) {
More information about the llvm-commits
mailing list