[llvm] [X86] Try to scale i8/i16 VPERMV3 to i32 (PR #97212)

via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 30 05:29:30 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Phoebe Wang (phoebewang)

<details>
<summary>Changes</summary>

This is a follow up of #<!-- -->96414

---
Full diff: https://github.com/llvm/llvm-project/pull/97212.diff


2 Files Affected:

- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+12) 
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll (+6-6) 


``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1d4af62c3227d..89595e341d1ce 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41358,6 +41358,18 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
                       DAG.getBitcast(NVT, V1.getOperand(0))),
           DAG.getIntPtrConstant(0, DL));
     }
+    SmallVector<int, 16> Mask, WidenedMask;
+    SmallVector<SDValue, 2> Ops;
+    if ((EVT == MVT::i8 || EVT == MVT::i16) &&
+        getTargetShuffleMask(N, false, Ops, Mask) &&
+        scaleShuffleElements(Mask, VT.getFixedSizeInBits() / 32, WidenedMask)) {
+      NVT = MVT::getVectorVT(MVT::i32, VT.getFixedSizeInBits() / 32);
+      SDValue V1 = DAG.getBitcast(NVT, N.getOperand(0));
+      SDValue V2 = DAG.getBitcast(NVT, N.getOperand(2));
+      SDValue Mask = getConstVector(WidenedMask, NVT, DAG, DL, true);
+      return DAG.getBitcast(
+          VT, DAG.getNode(X86ISD::VPERMV3, DL, NVT, V1, Mask, V2));
+    }
 
     return SDValue();
   }
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
index 9b32005927ace..3596d8c0fd031 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
@@ -129,17 +129,17 @@ define <64 x i8> @combine_permi2q_pshufb_as_permi2d_mask(<8 x i64> %a0, <8 x i64
 ; X86-LABEL: combine_permi2q_pshufb_as_permi2d_mask:
 ; X86:       # %bb.0:
 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1
-; X86-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [56,57,58,59,56,57,58,59,56,57,58,59,56,57,58,59,44,45,46,47,44,45,46,47,44,45,46,47,44,45,46,47,96,97,98,99,96,97,98,99,96,97,98,99,96,97,98,99,116,117,118,119,116,117,118,119,116,117,118,119,116,117,118,119]
-; X86-NEXT:    vpermi2b %zmm0, %zmm1, %zmm2 {%k1} {z}
-; X86-NEXT:    vmovdqa64 %zmm2, %zmm0
+; X86-NEXT:    vpmovsxbd {{.*#+}} zmm2 = [14,14,14,14,11,11,11,11,24,24,24,24,29,29,29,29]
+; X86-NEXT:    vpermi2d %zmm0, %zmm1, %zmm2
+; X86-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: combine_permi2q_pshufb_as_permi2d_mask:
 ; X64:       # %bb.0:
-; X64-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [56,57,58,59,56,57,58,59,56,57,58,59,56,57,58,59,44,45,46,47,44,45,46,47,44,45,46,47,44,45,46,47,96,97,98,99,96,97,98,99,96,97,98,99,96,97,98,99,116,117,118,119,116,117,118,119,116,117,118,119,116,117,118,119]
+; X64-NEXT:    vpmovsxbd {{.*#+}} zmm2 = [14,14,14,14,11,11,11,11,24,24,24,24,29,29,29,29]
+; X64-NEXT:    vpermi2d %zmm0, %zmm1, %zmm2
 ; X64-NEXT:    kmovq %rdi, %k1
-; X64-NEXT:    vpermi2b %zmm0, %zmm1, %zmm2 {%k1} {z}
-; X64-NEXT:    vmovdqa64 %zmm2, %zmm0
+; X64-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1} {z}
 ; X64-NEXT:    retq
   %res0 = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> <i32 15, i32 0, i32 13, i32 2, i32 11, i32 4, i32 9, i32 6>
   %res1 = bitcast <8 x i64> %res0 to <64 x i8>

``````````

</details>


https://github.com/llvm/llvm-project/pull/97212


More information about the llvm-commits mailing list