[llvm] [X86] Try to scale i8/i16 VPERMV3 to i32 (PR #97212)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 30 05:28:59 PDT 2024
https://github.com/phoebewang created https://github.com/llvm/llvm-project/pull/97212
This is a follow up of #96414
>From 296394854180d89ce97afe5aff20342d2856a0f7 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Sun, 30 Jun 2024 20:26:01 +0800
Subject: [PATCH] [X86] Try to scale i8/i16 VPERMV3 to i32
This is a follow up of #96414
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 12 ++++++++++++
.../X86/vector-shuffle-combining-avx512vbmi.ll | 12 ++++++------
2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1d4af62c3227d..89595e341d1ce 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41358,6 +41358,18 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
DAG.getBitcast(NVT, V1.getOperand(0))),
DAG.getIntPtrConstant(0, DL));
}
+ SmallVector<int, 16> Mask, WidenedMask;
+ SmallVector<SDValue, 2> Ops;
+ if ((EVT == MVT::i8 || EVT == MVT::i16) &&
+ getTargetShuffleMask(N, false, Ops, Mask) &&
+ scaleShuffleElements(Mask, VT.getFixedSizeInBits() / 32, WidenedMask)) {
+ NVT = MVT::getVectorVT(MVT::i32, VT.getFixedSizeInBits() / 32);
+ SDValue V1 = DAG.getBitcast(NVT, N.getOperand(0));
+ SDValue V2 = DAG.getBitcast(NVT, N.getOperand(2));
+ SDValue Mask = getConstVector(WidenedMask, NVT, DAG, DL, true);
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::VPERMV3, DL, NVT, V1, Mask, V2));
+ }
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
index 9b32005927ace..3596d8c0fd031 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
@@ -129,17 +129,17 @@ define <64 x i8> @combine_permi2q_pshufb_as_permi2d_mask(<8 x i64> %a0, <8 x i64
; X86-LABEL: combine_permi2q_pshufb_as_permi2d_mask:
; X86: # %bb.0:
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1
-; X86-NEXT: vmovdqa64 {{.*#+}} zmm2 = [56,57,58,59,56,57,58,59,56,57,58,59,56,57,58,59,44,45,46,47,44,45,46,47,44,45,46,47,44,45,46,47,96,97,98,99,96,97,98,99,96,97,98,99,96,97,98,99,116,117,118,119,116,117,118,119,116,117,118,119,116,117,118,119]
-; X86-NEXT: vpermi2b %zmm0, %zmm1, %zmm2 {%k1} {z}
-; X86-NEXT: vmovdqa64 %zmm2, %zmm0
+; X86-NEXT: vpmovsxbd {{.*#+}} zmm2 = [14,14,14,14,11,11,11,11,24,24,24,24,29,29,29,29]
+; X86-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; X86-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: combine_permi2q_pshufb_as_permi2d_mask:
; X64: # %bb.0:
-; X64-NEXT: vmovdqa64 {{.*#+}} zmm2 = [56,57,58,59,56,57,58,59,56,57,58,59,56,57,58,59,44,45,46,47,44,45,46,47,44,45,46,47,44,45,46,47,96,97,98,99,96,97,98,99,96,97,98,99,96,97,98,99,116,117,118,119,116,117,118,119,116,117,118,119,116,117,118,119]
+; X64-NEXT: vpmovsxbd {{.*#+}} zmm2 = [14,14,14,14,11,11,11,11,24,24,24,24,29,29,29,29]
+; X64-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; X64-NEXT: kmovq %rdi, %k1
-; X64-NEXT: vpermi2b %zmm0, %zmm1, %zmm2 {%k1} {z}
-; X64-NEXT: vmovdqa64 %zmm2, %zmm0
+; X64-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} {z}
; X64-NEXT: retq
%res0 = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> <i32 15, i32 0, i32 13, i32 2, i32 11, i32 4, i32 9, i32 6>
%res1 = bitcast <8 x i64> %res0 to <64 x i8>
More information about the llvm-commits
mailing list