[llvm] [X86] Try to scale i8/i16 VPERMV3 to i32 (PR #97212)

Phoebe Wang via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 30 05:28:59 PDT 2024


https://github.com/phoebewang created https://github.com/llvm/llvm-project/pull/97212

This is a follow up of #96414

>From 296394854180d89ce97afe5aff20342d2856a0f7 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Sun, 30 Jun 2024 20:26:01 +0800
Subject: [PATCH] [X86] Try to scale i8/i16 VPERMV3 to i32

This is a follow up of #96414
---
 llvm/lib/Target/X86/X86ISelLowering.cpp              | 12 ++++++++++++
 .../X86/vector-shuffle-combining-avx512vbmi.ll       | 12 ++++++------
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1d4af62c3227d..89595e341d1ce 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41358,6 +41358,18 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
                       DAG.getBitcast(NVT, V1.getOperand(0))),
           DAG.getIntPtrConstant(0, DL));
     }
+    SmallVector<int, 16> Mask, WidenedMask;
+    SmallVector<SDValue, 2> Ops;
+    if ((EVT == MVT::i8 || EVT == MVT::i16) &&
+        getTargetShuffleMask(N, false, Ops, Mask) &&
+        scaleShuffleElements(Mask, VT.getFixedSizeInBits() / 32, WidenedMask)) {
+      NVT = MVT::getVectorVT(MVT::i32, VT.getFixedSizeInBits() / 32);
+      SDValue V1 = DAG.getBitcast(NVT, N.getOperand(0));
+      SDValue V2 = DAG.getBitcast(NVT, N.getOperand(2));
+      SDValue Mask = getConstVector(WidenedMask, NVT, DAG, DL, true);
+      return DAG.getBitcast(
+          VT, DAG.getNode(X86ISD::VPERMV3, DL, NVT, V1, Mask, V2));
+    }
 
     return SDValue();
   }
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
index 9b32005927ace..3596d8c0fd031 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
@@ -129,17 +129,17 @@ define <64 x i8> @combine_permi2q_pshufb_as_permi2d_mask(<8 x i64> %a0, <8 x i64
 ; X86-LABEL: combine_permi2q_pshufb_as_permi2d_mask:
 ; X86:       # %bb.0:
 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1
-; X86-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [56,57,58,59,56,57,58,59,56,57,58,59,56,57,58,59,44,45,46,47,44,45,46,47,44,45,46,47,44,45,46,47,96,97,98,99,96,97,98,99,96,97,98,99,96,97,98,99,116,117,118,119,116,117,118,119,116,117,118,119,116,117,118,119]
-; X86-NEXT:    vpermi2b %zmm0, %zmm1, %zmm2 {%k1} {z}
-; X86-NEXT:    vmovdqa64 %zmm2, %zmm0
+; X86-NEXT:    vpmovsxbd {{.*#+}} zmm2 = [14,14,14,14,11,11,11,11,24,24,24,24,29,29,29,29]
+; X86-NEXT:    vpermi2d %zmm0, %zmm1, %zmm2
+; X86-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: combine_permi2q_pshufb_as_permi2d_mask:
 ; X64:       # %bb.0:
-; X64-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [56,57,58,59,56,57,58,59,56,57,58,59,56,57,58,59,44,45,46,47,44,45,46,47,44,45,46,47,44,45,46,47,96,97,98,99,96,97,98,99,96,97,98,99,96,97,98,99,116,117,118,119,116,117,118,119,116,117,118,119,116,117,118,119]
+; X64-NEXT:    vpmovsxbd {{.*#+}} zmm2 = [14,14,14,14,11,11,11,11,24,24,24,24,29,29,29,29]
+; X64-NEXT:    vpermi2d %zmm0, %zmm1, %zmm2
 ; X64-NEXT:    kmovq %rdi, %k1
-; X64-NEXT:    vpermi2b %zmm0, %zmm1, %zmm2 {%k1} {z}
-; X64-NEXT:    vmovdqa64 %zmm2, %zmm0
+; X64-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1} {z}
 ; X64-NEXT:    retq
   %res0 = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> <i32 15, i32 0, i32 13, i32 2, i32 11, i32 4, i32 9, i32 6>
   %res1 = bitcast <8 x i64> %res0 to <64 x i8>



More information about the llvm-commits mailing list