[llvm] [X86] canCreateUndefOrPoisonForTargetNode - add handling for VPERMV3 intrinsic opcodes (PR #131768)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 18 02:41:40 PDT 2025
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/131768
We already handle the X86ISD::VPERMV3 node type, but if we can handle equivalent cases before intrinsic lowering we can simplify the code further - e.g. #109272 before constant BUILD_VECTOR nodes gets lowered to constant pool loads.
>From d4b629d3c17c232d52fc36781b4ff4a304794436 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Tue, 18 Mar 2025 09:40:55 +0000
Subject: [PATCH] [X86] canCreateUndefOrPoisonForTargetNode - add handling for
VPERMV3 intrinsic opcodes
We already handle the X86ISD::VPERMV3 node type, but if we can handle equivalent cases before intrinsic lowering we can simplify the code further - e.g. #109272 before constant BUILD_VECTOR nodes gets lowered to constant pool loads.
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 19 +++++++++++++++++++
.../vector-shuffle-combining-avx512vbmi.ll | 6 ++----
2 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ee2c72bc9cced..49a8f62421f68 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44726,6 +44726,25 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode(
case Intrinsic::x86_avx2_pmadd_ub_sw:
case Intrinsic::x86_avx512_pmaddubs_w_512:
return false;
+ case Intrinsic::x86_avx512_vpermi2var_d_128:
+ case Intrinsic::x86_avx512_vpermi2var_d_256:
+ case Intrinsic::x86_avx512_vpermi2var_d_512:
+ case Intrinsic::x86_avx512_vpermi2var_hi_128:
+ case Intrinsic::x86_avx512_vpermi2var_hi_256:
+ case Intrinsic::x86_avx512_vpermi2var_hi_512:
+ case Intrinsic::x86_avx512_vpermi2var_pd_128:
+ case Intrinsic::x86_avx512_vpermi2var_pd_256:
+ case Intrinsic::x86_avx512_vpermi2var_pd_512:
+ case Intrinsic::x86_avx512_vpermi2var_ps_128:
+ case Intrinsic::x86_avx512_vpermi2var_ps_256:
+ case Intrinsic::x86_avx512_vpermi2var_ps_512:
+ case Intrinsic::x86_avx512_vpermi2var_q_128:
+ case Intrinsic::x86_avx512_vpermi2var_q_256:
+ case Intrinsic::x86_avx512_vpermi2var_q_512:
+ case Intrinsic::x86_avx512_vpermi2var_qi_128:
+ case Intrinsic::x86_avx512_vpermi2var_qi_256:
+ case Intrinsic::x86_avx512_vpermi2var_qi_512:
+ return false;
}
}
return TargetLowering::canCreateUndefOrPoisonForTargetNode(
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
index e921915c20306..220653e99addb 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
@@ -200,19 +200,17 @@ define <64 x i8> @combine_vpermi2var_constant_v64i8_with_mask_commute(<64 x i8>
; X86-LABEL: combine_vpermi2var_constant_v64i8_with_mask_commute:
; X86: # %bb.0:
; X86-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63]
-; X86-NEXT: vpermt2b {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm1
; X86-NEXT: vpmovb2m %zmm0, %k0
; X86-NEXT: knotq %k0, %k1
-; X86-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} {z}
+; X86-NEXT: vpermi2b {{\.?LCPI[0-9]+_[0-9]+}}, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_constant_v64i8_with_mask_commute:
; X64: # %bb.0:
; X64-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63]
-; X64-NEXT: vpermt2b {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
; X64-NEXT: vpmovb2m %zmm0, %k0
; X64-NEXT: knotq %k0, %k1
-; X64-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} {z}
+; X64-NEXT: vpermi2b {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 {%k1} {z}
; X64-NEXT: retq
%perm = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63>, <64 x i8> %a0, <64 x i8> <i8 64, i8 65, i8 66, i8 67, i8 68, i8 69, i8 70, i8 71, i8 72, i8 73, i8 74, i8 75, i8 76, i8 77, i8 78, i8 79, i8 80, i8 81, i8 82, i8 83, i8 84, i8 85, i8 86, i8 87, i8 88, i8 89, i8 90, i8 91, i8 92, i8 93, i8 94, i8 95, i8 96, i8 97, i8 98, i8 99, i8 100, i8 101, i8 102, i8 103, i8 104, i8 105, i8 106, i8 107, i8 108, i8 109, i8 110, i8 111, i8 112, i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127>)
%cmp = icmp slt <64 x i8> %a0, zeroinitializer
More information about the llvm-commits
mailing list