[llvm] [InstCombine] Move extends across identity shuffles. (PR #146901)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 5 13:01:27 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/146901
>From 0f3f2c1951b2db6ea2768dbbf01941cf186e037d Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 3 Jul 2025 12:07:42 +0100
Subject: [PATCH 1/2] [InstCombine] Move extends across identity shuffles.
Add a new fold to instcombine to move SExt/ZExt across identity
shuffles, applying the cast after the shuffle. This sinks extends and
can enable more general additional folding of both shuffles (and
related instructions) and extends. If backends prefer splitting up doing
casts first, the extends can be hoisted again in VectorCombine for
example.
A larger example is included in the load_i32_zext_to_v4i32. The wider
extend is easier to compute an accurate cost for and targets (like
AArch64) can lower a single wider extend more efficiently than multiple
separate extends.
This is a generalization of a VectorCombine version
(https://github.com/llvm/llvm-project/pull/141109/) as suggested by
@preames.
---
.../InstCombine/InstCombineVectorOps.cpp | 24 +++++++++++++++----
.../Transforms/InstCombine/X86/blend_x86.ll | 2 +-
.../InstCombine/fold-shuffle-ext.ll | 16 +++++--------
.../PhaseOrdering/X86/blendv-select.ll | 5 ++--
4 files changed, 28 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 1a8d4215b5b71..ae5bc2cdbfdd7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2571,17 +2571,16 @@ static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
/// Canonicalize casts after shuffle.
static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
InstCombiner::BuilderTy &Builder) {
- // Do we have 2 matching cast operands?
auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
- auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
- if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
- Cast0->getSrcTy() != Cast1->getSrcTy())
+ if (!Cast0)
return nullptr;
// TODO: Allow other opcodes? That would require easing the type restrictions
// below here.
CastInst::CastOps CastOpcode = Cast0->getOpcode();
switch (CastOpcode) {
+ case Instruction::SExt:
+ case Instruction::ZExt:
case Instruction::FPToSI:
case Instruction::FPToUI:
case Instruction::SIToFP:
@@ -2591,15 +2590,30 @@ static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
return nullptr;
}
+ VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
VectorType *ShufTy = Shuf.getType();
VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
- VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
// TODO: Allow length-increasing shuffles?
if (ShufTy->getElementCount().getKnownMinValue() >
ShufOpTy->getElementCount().getKnownMinValue())
return nullptr;
+ // shuffle (cast X), Y, identity-with-extract-mask -->
+ // cast (shuffle X, Y, identity-with-extract-mask).
+ if (Cast0->hasOneUse() && Shuf.isIdentityWithExtract()) {
+ auto *NewIns = Builder.CreateShuffleVector(Cast0->getOperand(0),
+ PoisonValue::get(CastSrcTy),
+ Shuf.getShuffleMask());
+ return CastInst::Create(Cast0->getOpcode(), NewIns, Shuf.getType());
+ }
+
+ auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
+ // Do we have 2 matching cast operands?
+ if (!Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
+ Cast0->getSrcTy() != Cast1->getSrcTy())
+ return nullptr;
+
// TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
"Expected fixed vector operands for casts and binary shuffle");
diff --git a/llvm/test/Transforms/InstCombine/X86/blend_x86.ll b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll
index aa49f493c9fa1..5c3b0beefbb66 100644
--- a/llvm/test/Transforms/InstCombine/X86/blend_x86.ll
+++ b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll
@@ -287,9 +287,9 @@ define <4 x float> @sel_v16i8_bitcast_shuffle_bitcast_cmp(<8 x float> %a, <8 x f
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x float> [[A]] to <8 x i32>
; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x float> [[B]] to <8 x i32>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <8 x i32> [[A_BC]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <8 x i32> [[B_BC]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[B_LO]], <4 x i32> [[A_LO]]
; CHECK-NEXT: [[RES:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
; CHECK-NEXT: ret <4 x float> [[RES]]
diff --git a/llvm/test/Transforms/InstCombine/fold-shuffle-ext.ll b/llvm/test/Transforms/InstCombine/fold-shuffle-ext.ll
index dddd63d101e61..fc19b11fa8694 100644
--- a/llvm/test/Transforms/InstCombine/fold-shuffle-ext.ll
+++ b/llvm/test/Transforms/InstCombine/fold-shuffle-ext.ll
@@ -5,8 +5,8 @@ define <4 x i16> @ext_identity_mask_first_vector_first_half_4xi16(<8 x i8> %x) {
; CHECK-LABEL: define <4 x i16> @ext_identity_mask_first_vector_first_half_4xi16(
; CHECK-SAME: <8 x i8> [[X:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[X]] to <8 x i16>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[X]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16>
; CHECK-NEXT: ret <4 x i16> [[SHUFFLE]]
;
entry:
@@ -19,8 +19,8 @@ define <3 x i32> @ext_identity_mask_first_vector_first_half_3xi32(<4 x i16> %x)
; CHECK-LABEL: define <3 x i32> @ext_identity_mask_first_vector_first_half_3xi32(
; CHECK-SAME: <4 x i16> [[X:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i16> [[X]] to <4 x i32>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[E_1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = zext <3 x i16> [[TMP0]] to <3 x i32>
; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
;
entry:
@@ -102,12 +102,8 @@ define <4 x i32> @load_i32_zext_to_v4i32(ptr %di) {
; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32(
; CHECK-SAME: ptr [[DI:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4
-; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
-; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT: [[L1:%.*]] = load <4 x i8>, ptr [[DI]], align 4
+; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i8> [[L1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
;
entry:
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
index bbf893f6127b0..dbce77698eb07 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
@@ -496,9 +496,8 @@ define <2 x i64> @x86_pblendvb_v32i8_v16i8_undefs(<4 x i64> %a, <4 x i64> %b, <4
; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
-; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <32 x i8> [[SEXT]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_LO]], <16 x i8> [[B_LO]], <16 x i8> [[SEXT_LO]])
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i1> [[CMP]], <32 x i1> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[SEL_LO:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[B_LO]], <16 x i8> [[A_LO]]
; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i8> [[SEL_LO]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[RES]]
;
>From 9312aa7c65f524fcb7f19228dfa6a16309bd78cc Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 5 Jul 2025 20:46:01 +0100
Subject: [PATCH 2/2] !fixup require second op poison
---
llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index ae5bc2cdbfdd7..2915764699541 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2599,9 +2599,10 @@ static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
ShufOpTy->getElementCount().getKnownMinValue())
return nullptr;
- // shuffle (cast X), Y, identity-with-extract-mask -->
- // cast (shuffle X, Y, identity-with-extract-mask).
- if (Cast0->hasOneUse() && Shuf.isIdentityWithExtract()) {
+ // shuffle (cast X), Poison, identity-with-extract-mask -->
+ // cast (shuffle X, Poison, identity-with-extract-mask).
+ if (Cast0->hasOneUse() && Shuf.isIdentityWithExtract() &&
+ isa<PoisonValue>(Shuf.getOperand(1))) {
auto *NewIns = Builder.CreateShuffleVector(Cast0->getOperand(0),
PoisonValue::get(CastSrcTy),
Shuf.getShuffleMask());
More information about the llvm-commits
mailing list