[llvm] [InstCombine] Move extends across identity shuffles. (PR #146901)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 3 07:22:53 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Add a new fold to instcombine to move SExt/ZExt across identity
shuffles, applying the cast after the shuffle. This sinks extends and
can enable more general additional folding of both shuffles (and
related instructions) and extends. If backends prefer splitting up doing
casts first, the extends can be hoisted again in VectorCombine for
example.
A larger example is included in the load_i32_zext_to_v4i32. The wider
extend is easier to compute an accurate cost for and targets (like
AArch64) can lower a single wider extend more efficiently than multiple
separate extends.
This is a generalization of a VectorCombine version
(https://github.com/llvm/llvm-project/pull/141109) as suggested by
@<!-- -->preames.
---
Full diff: https://github.com/llvm/llvm-project/pull/146901.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp (+19-5)
- (modified) llvm/test/Transforms/InstCombine/X86/blend_x86.ll (+1-1)
- (added) llvm/test/Transforms/InstCombine/fold-shuffle-ext.ll (+104)
- (modified) llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll (+32-103)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index a746a5229fb9a..a31fd68dc7165 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2559,17 +2559,16 @@ static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
/// Canonicalize casts after shuffle.
static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
InstCombiner::BuilderTy &Builder) {
- // Do we have 2 matching cast operands?
auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
- auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
- if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
- Cast0->getSrcTy() != Cast1->getSrcTy())
+ if (!Cast0)
return nullptr;
// TODO: Allow other opcodes? That would require easing the type restrictions
// below here.
CastInst::CastOps CastOpcode = Cast0->getOpcode();
switch (CastOpcode) {
+ case Instruction::SExt:
+ case Instruction::ZExt:
case Instruction::FPToSI:
case Instruction::FPToUI:
case Instruction::SIToFP:
@@ -2579,15 +2578,30 @@ static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
return nullptr;
}
+ VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
VectorType *ShufTy = Shuf.getType();
VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
- VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
// TODO: Allow length-increasing shuffles?
if (ShufTy->getElementCount().getKnownMinValue() >
ShufOpTy->getElementCount().getKnownMinValue())
return nullptr;
+ // shuffle (cast X), Y, identity-with-extract-mask -->
+ // cast (shuffle X, Y, identity-with-extract-mask).
+ if (Cast0->hasOneUse() && Shuf.isIdentityWithExtract()) {
+ auto *NewIns = Builder.CreateShuffleVector(Cast0->getOperand(0),
+ PoisonValue::get(CastSrcTy),
+ Shuf.getShuffleMask());
+ return CastInst::Create(Cast0->getOpcode(), NewIns, Shuf.getType());
+ }
+
+ auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
+ // Do we have 2 matching cast operands?
+ if (!Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
+ Cast0->getSrcTy() != Cast1->getSrcTy())
+ return nullptr;
+
// TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
"Expected fixed vector operands for casts and binary shuffle");
diff --git a/llvm/test/Transforms/InstCombine/X86/blend_x86.ll b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll
index aa49f493c9fa1..5c3b0beefbb66 100644
--- a/llvm/test/Transforms/InstCombine/X86/blend_x86.ll
+++ b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll
@@ -287,9 +287,9 @@ define <4 x float> @sel_v16i8_bitcast_shuffle_bitcast_cmp(<8 x float> %a, <8 x f
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x float> [[A]] to <8 x i32>
; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x float> [[B]] to <8 x i32>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <8 x i32> [[A_BC]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <8 x i32> [[B_BC]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[B_LO]], <4 x i32> [[A_LO]]
; CHECK-NEXT: [[RES:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
; CHECK-NEXT: ret <4 x float> [[RES]]
diff --git a/llvm/test/Transforms/InstCombine/fold-shuffle-ext.ll b/llvm/test/Transforms/InstCombine/fold-shuffle-ext.ll
new file mode 100644
index 0000000000000..84ce83d40bee9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-shuffle-ext.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p instcombine -S %s | FileCheck %s
+
+define <4 x i16> @ext_identity_mask_first_vector_first_half_4xi16(<8 x i8> %x) {
+; CHECK-LABEL: define <4 x i16> @ext_identity_mask_first_vector_first_half_4xi16(
+; CHECK-SAME: <8 x i8> [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[X]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16>
+; CHECK-NEXT: ret <4 x i16> [[SHUFFLE]]
+;
+entry:
+ %e.1 = zext <8 x i8> %x to <8 x i16>
+ %shuffle = shufflevector <8 x i16> %e.1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i16> %shuffle
+}
+
+define <3 x i32> @ext_identity_mask_first_vector_first_half_3xi32(<4 x i16> %x) {
+; CHECK-LABEL: define <3 x i32> @ext_identity_mask_first_vector_first_half_3xi32(
+; CHECK-SAME: <4 x i16> [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[X]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = zext <3 x i16> [[TMP0]] to <3 x i32>
+; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
+;
+entry:
+ %e.1 = zext <4 x i16> %x to <4 x i32>
+ %shuffle = shufflevector <4 x i32> %e.1, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+ ret <3 x i32> %shuffle
+}
+
+define <4 x i16> @ext_no_identity_mask1(<8 x i8> %in) {
+; CHECK-LABEL: define <4 x i16> @ext_no_identity_mask1(
+; CHECK-SAME: <8 x i8> [[IN:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[IN]] to <8 x i16>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT: ret <4 x i16> [[SHUFFLE]]
+;
+entry:
+ %e.1 = zext <8 x i8> %in to <8 x i16>
+ %shuffle = shufflevector <8 x i16> %e.1, <8 x i16> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+ ret <4 x i16> %shuffle
+}
+
+define <4 x i16> @ext_no_identity_mask2(<8 x i8> %x, <8 x i16> %y) {
+; CHECK-LABEL: define <4 x i16> @ext_no_identity_mask2(
+; CHECK-SAME: <8 x i8> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[X]] to <8 x i16>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <4 x i16> [[SHUFFLE]]
+;
+entry:
+ %e.1 = zext <8 x i8> %x to <8 x i16>
+ %shuffle = shufflevector <8 x i16> %e.1, <8 x i16> %y, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ ret <4 x i16> %shuffle
+}
+
+define <5 x i32> @ext_identity_mask_first_vector_first_half_5xi32(<4 x i16> %x) {
+; CHECK-LABEL: define <5 x i32> @ext_identity_mask_first_vector_first_half_5xi32(
+; CHECK-SAME: <4 x i16> [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i16> [[X]] to <4 x i32>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[E_1]], <4 x i32> poison, <5 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT: ret <5 x i32> [[SHUFFLE]]
+;
+entry:
+ %e.1 = zext <4 x i16> %x to <4 x i32>
+ %shuffle = shufflevector <4 x i32> %e.1, <4 x i32> %e.1, <5 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4>
+ ret <5 x i32> %shuffle
+}
+
+define <4 x i16> @ext_no_identity_mask_first_vector_second_half(<8 x i8> %x, <8 x i16> %y) {
+; CHECK-LABEL: define <4 x i16> @ext_no_identity_mask_first_vector_second_half(
+; CHECK-SAME: <8 x i8> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[X]] to <8 x i16>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> [[Y]], <4 x i32> <i32 0, i32 9, i32 1, i32 10>
+; CHECK-NEXT: ret <4 x i16> [[SHUFFLE]]
+;
+entry:
+ %e.1 = zext <8 x i8> %x to <8 x i16>
+ %shuffle = shufflevector <8 x i16> %e.1, <8 x i16> %y, <4 x i32> <i32 0, i32 9, i32 1, i32 10>
+ ret <4 x i16> %shuffle
+}
+
+define <4 x i32> @load_i32_zext_to_v4i32(ptr %di) {
+; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32(
+; CHECK-SAME: ptr [[DI:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[L1:%.*]] = load <4 x i8>, ptr [[DI]], align 4
+; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i8> [[L1]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[EXT_2]]
+;
+entry:
+ %l = load i32, ptr %di
+ %vec.ins = insertelement <2 x i32> <i32 poison, i32 0>, i32 %l, i64 0
+ %vec.bc = bitcast <2 x i32> %vec.ins to <8 x i8>
+ %e.1 = zext <8 x i8> %vec.bc to <8 x i16>
+ %vec.shuffle = shufflevector <8 x i16> %e.1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %ext.2 = zext nneg <4 x i16> %vec.shuffle to <4 x i32>
+ ret <4 x i32> %ext.2
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
index daf4a7b799dd4..dbce77698eb07 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
@@ -12,20 +12,10 @@
;
define <4 x double> @x86_pblendvb_v4f64_v2f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
-; SSE-LABEL: @x86_pblendvb_v4f64_v2f64(
-; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]]
-; SSE-NEXT: [[DOTV:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]]
-; SSE-NEXT: ret <4 x double> [[DOTV]]
-;
-; AVX2-LABEL: @x86_pblendvb_v4f64_v2f64(
-; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]]
-; AVX2-NEXT: [[DOTV:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]]
-; AVX2-NEXT: ret <4 x double> [[DOTV]]
-;
-; AVX512-LABEL: @x86_pblendvb_v4f64_v2f64(
-; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]]
-; AVX512-NEXT: [[DOTV:%.*]] = select <4 x i1> [[CMP]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]]
-; AVX512-NEXT: ret <4 x double> [[DOTV]]
+; CHECK-LABEL: @x86_pblendvb_v4f64_v2f64(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]]
+; CHECK-NEXT: [[DOTV:%.*]] = select <4 x i1> [[CMP]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]]
+; CHECK-NEXT: ret <4 x double> [[DOTV]]
;
%a.bc = bitcast <4 x double> %a to <32 x i8>
%b.bc = bitcast <4 x double> %b to <32 x i8>
@@ -46,20 +36,10 @@ define <4 x double> @x86_pblendvb_v4f64_v2f64(<4 x double> %a, <4 x double> %b,
}
define <8 x float> @x86_pblendvb_v8f32_v4f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
-; SSE-LABEL: @x86_pblendvb_v8f32_v4f32(
-; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]]
-; SSE-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]]
-; SSE-NEXT: ret <8 x float> [[DOTV]]
-;
-; AVX2-LABEL: @x86_pblendvb_v8f32_v4f32(
-; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]]
-; AVX2-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]]
-; AVX2-NEXT: ret <8 x float> [[DOTV]]
-;
-; AVX512-LABEL: @x86_pblendvb_v8f32_v4f32(
-; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]]
-; AVX512-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]]
-; AVX512-NEXT: ret <8 x float> [[DOTV]]
+; CHECK-LABEL: @x86_pblendvb_v8f32_v4f32(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]]
+; CHECK-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]]
+; CHECK-NEXT: ret <8 x float> [[DOTV]]
;
%a.bc = bitcast <8 x float> %a to <32 x i8>
%b.bc = bitcast <8 x float> %b to <32 x i8>
@@ -80,20 +60,10 @@ define <8 x float> @x86_pblendvb_v8f32_v4f32(<8 x float> %a, <8 x float> %b, <8
}
define <4 x i64> @x86_pblendvb_v4i64_v2i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
-; SSE-LABEL: @x86_pblendvb_v4i64_v2i64(
-; SSE-NEXT: [[TMP1:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]]
-; SSE-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]]
-; SSE-NEXT: ret <4 x i64> [[TMP2]]
-;
-; AVX2-LABEL: @x86_pblendvb_v4i64_v2i64(
-; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]]
-; AVX2-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]]
-; AVX2-NEXT: ret <4 x i64> [[TMP2]]
-;
-; AVX512-LABEL: @x86_pblendvb_v4i64_v2i64(
-; AVX512-NEXT: [[CMP:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]]
-; AVX512-NEXT: [[TMP1:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]]
-; AVX512-NEXT: ret <4 x i64> [[TMP1]]
+; CHECK-LABEL: @x86_pblendvb_v4i64_v2i64(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt <4 x i64> [[C:%.*]], [[D:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[B:%.*]], <4 x i64> [[A:%.*]]
+; CHECK-NEXT: ret <4 x i64> [[TMP1]]
;
%a.bc = bitcast <4 x i64> %a to <32 x i8>
%b.bc = bitcast <4 x i64> %b to <32 x i8>
@@ -216,35 +186,15 @@ define <4 x i64> @x86_pblendvb_v16i16_v8i16(<4 x i64> %a, <4 x i64> %b, <4 x i64
}
define <4 x i64> @x86_pblendvb_v32i8_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
-; SSE-LABEL: @x86_pblendvb_v32i8_v16i8(
-; SSE-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8>
-; SSE-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8>
-; SSE-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8>
-; SSE-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8>
-; SSE-NEXT: [[TMP3:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]]
-; SSE-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[TMP3]], <32 x i8> [[TMP2]], <32 x i8> [[TMP1]]
-; SSE-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64>
-; SSE-NEXT: ret <4 x i64> [[RES]]
-;
-; AVX2-LABEL: @x86_pblendvb_v32i8_v16i8(
-; AVX2-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8>
-; AVX2-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8>
-; AVX2-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8>
-; AVX2-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8>
-; AVX2-NEXT: [[TMP3:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]]
-; AVX2-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[TMP3]], <32 x i8> [[TMP2]], <32 x i8> [[TMP1]]
-; AVX2-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64>
-; AVX2-NEXT: ret <4 x i64> [[RES]]
-;
-; AVX512-LABEL: @x86_pblendvb_v32i8_v16i8(
-; AVX512-NEXT: [[A_BC:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8>
-; AVX512-NEXT: [[B_BC:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8>
-; AVX512-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8>
-; AVX512-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8>
-; AVX512-NEXT: [[CMP:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]]
-; AVX512-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[B_BC]], <32 x i8> [[A_BC]]
-; AVX512-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64>
-; AVX512-NEXT: ret <4 x i64> [[RES]]
+; CHECK-LABEL: @x86_pblendvb_v32i8_v16i8(
+; CHECK-NEXT: [[A_BC:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8>
+; CHECK-NEXT: [[B_BC:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8>
+; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <32 x i8>
+; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <32 x i8>
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]]
+; CHECK-NEXT: [[CONCAT:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[B_BC]], <32 x i8> [[A_BC]]
+; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i8> [[CONCAT]] to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> [[RES]]
;
%a.bc = bitcast <4 x i64> %a to <32 x i8>
%b.bc = bitcast <4 x i64> %b to <32 x i8>
@@ -424,35 +374,15 @@ define <8 x i64> @x86_pblendvb_v32i16_v16i16(<8 x i64> %a, <8 x i64> %b, <8 x i6
}
define <8 x i64> @x86_pblendvb_v64i8_v32i8(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
-; SSE-LABEL: @x86_pblendvb_v64i8_v32i8(
-; SSE-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8>
-; SSE-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8>
-; SSE-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8>
-; SSE-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8>
-; SSE-NEXT: [[TMP3:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]]
-; SSE-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[TMP3]], <64 x i8> [[TMP2]], <64 x i8> [[TMP1]]
-; SSE-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64>
-; SSE-NEXT: ret <8 x i64> [[RES]]
-;
-; AVX2-LABEL: @x86_pblendvb_v64i8_v32i8(
-; AVX2-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8>
-; AVX2-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8>
-; AVX2-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8>
-; AVX2-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8>
-; AVX2-NEXT: [[TMP3:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]]
-; AVX2-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[TMP3]], <64 x i8> [[TMP2]], <64 x i8> [[TMP1]]
-; AVX2-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64>
-; AVX2-NEXT: ret <8 x i64> [[RES]]
-;
-; AVX512-LABEL: @x86_pblendvb_v64i8_v32i8(
-; AVX512-NEXT: [[A_BC:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8>
-; AVX512-NEXT: [[B_BC:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8>
-; AVX512-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8>
-; AVX512-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8>
-; AVX512-NEXT: [[CMP:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]]
-; AVX512-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[B_BC]], <64 x i8> [[A_BC]]
-; AVX512-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64>
-; AVX512-NEXT: ret <8 x i64> [[RES]]
+; CHECK-LABEL: @x86_pblendvb_v64i8_v32i8(
+; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x i64> [[A:%.*]] to <64 x i8>
+; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x i64> [[B:%.*]] to <64 x i8>
+; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <64 x i8>
+; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <64 x i8>
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt <64 x i8> [[C_BC]], [[D_BC]]
+; CHECK-NEXT: [[CONCAT:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[B_BC]], <64 x i8> [[A_BC]]
+; CHECK-NEXT: [[RES:%.*]] = bitcast <64 x i8> [[CONCAT]] to <8 x i64>
+; CHECK-NEXT: ret <8 x i64> [[RES]]
;
%a.bc = bitcast <8 x i64> %a to <64 x i8>
%b.bc = bitcast <8 x i64> %b to <64 x i8>
@@ -566,9 +496,8 @@ define <2 x i64> @x86_pblendvb_v32i8_v16i8_undefs(<4 x i64> %a, <4 x i64> %b, <4
; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <32 x i8> [[A_BC]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <32 x i8> [[B_BC]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i8> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
-; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <32 x i8> [[SEXT]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[SEL_LO:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_LO]], <16 x i8> [[B_LO]], <16 x i8> [[SEXT_LO]])
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i1> [[CMP]], <32 x i1> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[SEL_LO:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[B_LO]], <16 x i8> [[A_LO]]
; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i8> [[SEL_LO]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[RES]]
;
``````````
</details>
https://github.com/llvm/llvm-project/pull/146901
More information about the llvm-commits
mailing list