[llvm] d57c046 - [InstCombine][X86] Only demand used bits for VPERMILPD/VPERMILPS mask values

Thu Aug 29 04:53:51 PDT 2024

Author: Simon Pilgrim
Date: 2024-08-29T12:53:36+01:00
New Revision: d57c04647e6f0a6f0cd79e280c257f570e8f30f4

URL: https://github.com/llvm/llvm-project/commit/d57c04647e6f0a6f0cd79e280c257f570e8f30f4
DIFF: https://github.com/llvm/llvm-project/commit/d57c04647e6f0a6f0cd79e280c257f570e8f30f4.diff

LOG: [InstCombine][X86] Only demand used bits for VPERMILPD/VPERMILPS mask values

VPERMILPS lower bits0-3 (to index per-lane i32/f32 0-3)
VPERMILPD uses bit1  (to index per-lane i64/f64 0-1)

Use SimplifyDemandedBits to ignore anything touching the remaining bits.

Part of #106413

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
    llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll
    llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index 7bd7f4bc241780..9cc5ed5d89ad70 100644

--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -2963,14 +2963,29 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
 
   case Intrinsic::x86_avx_vpermilvar_ps:
   case Intrinsic::x86_avx_vpermilvar_ps_256:
-  case Intrinsic::x86_avx512_vpermilvar_ps_512:
+  case Intrinsic::x86_avx512_vpermilvar_ps_512: {
+    if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
+      return IC.replaceInstUsesWith(II, V);
+    }
+
+    KnownBits KnownMask(32);
+    if (IC.SimplifyDemandedBits(&II, 1, APInt(32, 0b00011), KnownMask))
+      return &II;
+    break;
+  }
+
   case Intrinsic::x86_avx_vpermilvar_pd:
   case Intrinsic::x86_avx_vpermilvar_pd_256:
-  case Intrinsic::x86_avx512_vpermilvar_pd_512:
+  case Intrinsic::x86_avx512_vpermilvar_pd_512: {
     if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
       return IC.replaceInstUsesWith(II, V);
     }
+
+    KnownBits KnownMask(64);
+    if (IC.SimplifyDemandedBits(&II, 1, APInt(64, 0b00010), KnownMask))
+      return &II;
     break;
+  }
 
   case Intrinsic::x86_avx2_permd:
   case Intrinsic::x86_avx2_permps:

diff  --git a/llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll
index 6164e3ac5f27f8..e8fe0b1c379e1a 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll
@@ -225,8 +225,7 @@ define <8 x double> @poison_test_vpermilvar_pd_512(<8 x double> %v) {
 
 define <4 x float> @bits_test_vpermilvar_ps(<4 x float> %InVec, <4 x i32> %InMask) {
 ; CHECK-LABEL: @bits_test_vpermilvar_ps(
-; CHECK-NEXT:    [[M:%.*]] = or <4 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4>
-; CHECK-NEXT:    [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[S]]
 ;
   %m = or <4 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4>
@@ -236,8 +235,7 @@ define <4 x float> @bits_test_vpermilvar_ps(<4 x float> %InVec, <4 x i32> %InMas
 
 define <8 x float> @bits_test_vpermilvar_ps_256(<8 x float> %InVec, <8 x i32> %InMask) {
 ; CHECK-LABEL: @bits_test_vpermilvar_ps_256(
-; CHECK-NEXT:    [[M:%.*]] = or <8 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
-; CHECK-NEXT:    [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <8 x float> [[S]]
 ;
   %m = or <8 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4>
@@ -247,8 +245,7 @@ define <8 x float> @bits_test_vpermilvar_ps_256(<8 x float> %InVec, <8 x i32> %I
 
 define <16 x float> @bits_test_vpermilvar_ps_512(<16 x float> %InVec, <16 x i32> %InMask) {
 ; CHECK-LABEL: @bits_test_vpermilvar_ps_512(
-; CHECK-NEXT:    [[M:%.*]] = or <16 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
-; CHECK-NEXT:    [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <16 x float> [[S]]
 ;
   %m = or <16 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4>
@@ -258,8 +255,7 @@ define <16 x float> @bits_test_vpermilvar_ps_512(<16 x float> %InVec, <16 x i32>
 
 define <2 x double> @bits_test_vpermilvar_pd(<2 x double> %InVec, <2 x i64> %InMask) {
 ; CHECK-LABEL: @bits_test_vpermilvar_pd(
-; CHECK-NEXT:    [[M:%.*]] = or <2 x i64> [[INMASK:%.*]], <i64 0, i64 4294967293>
-; CHECK-NEXT:    [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[S]]
 ;
   %m = or <2 x i64> %InMask, <i64 0, i64 4294967293>
@@ -269,8 +265,7 @@ define <2 x double> @bits_test_vpermilvar_pd(<2 x double> %InVec, <2 x i64> %InM
 
 define <4 x double> @bits_test_vpermilvar_pd_256(<4 x double> %InVec, <4 x i64> %InMask) {
 ; CHECK-LABEL: @bits_test_vpermilvar_pd_256(
-; CHECK-NEXT:    [[M:%.*]] = or <4 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3>
-; CHECK-NEXT:    [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <4 x double> [[S]]
 ;
   %m = or <4 x i64> %InMask, <i64 0, i64 1, i64 4294967293, i64 -3>
@@ -280,8 +275,7 @@ define <4 x double> @bits_test_vpermilvar_pd_256(<4 x double> %InVec, <4 x i64>
 
 define <8 x double> @bits_test_vpermilvar_pd_512(<8 x double> %InVec, <8 x i64> %InMask) {
 ; CHECK-LABEL: @bits_test_vpermilvar_pd_512(
-; CHECK-NEXT:    [[M:%.*]] = or <8 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>
-; CHECK-NEXT:    [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <8 x double> [[S]]
 ;
   %m = or <8 x i64> %InMask, <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>

diff  --git a/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll b/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll
index 5fbfd2e3164ef2..7907fd84f3ffe9 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll
@@ -225,8 +225,7 @@ define <8 x double> @undef_test_vpermilvar_pd_512(<8 x double> %v) {
 
 define <4 x float> @bits_test_vpermilvar_ps(<4 x float> %InVec, <4 x i32> %InMask) {
 ; CHECK-LABEL: @bits_test_vpermilvar_ps(
-; CHECK-NEXT:    [[M:%.*]] = or <4 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4>
-; CHECK-NEXT:    [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[S]]
 ;
   %m = or <4 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4>
@@ -236,8 +235,7 @@ define <4 x float> @bits_test_vpermilvar_ps(<4 x float> %InVec, <4 x i32> %InMas
 
 define <8 x float> @bits_test_vpermilvar_ps_256(<8 x float> %InVec, <8 x i32> %InMask) {
 ; CHECK-LABEL: @bits_test_vpermilvar_ps_256(
-; CHECK-NEXT:    [[M:%.*]] = or <8 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
-; CHECK-NEXT:    [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <8 x float> [[S]]
 ;
   %m = or <8 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4>
@@ -247,8 +245,7 @@ define <8 x float> @bits_test_vpermilvar_ps_256(<8 x float> %InVec, <8 x i32> %I
 
 define <16 x float> @bits_test_vpermilvar_ps_512(<16 x float> %InVec, <16 x i32> %InMask) {
 ; CHECK-LABEL: @bits_test_vpermilvar_ps_512(
-; CHECK-NEXT:    [[M:%.*]] = or <16 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
-; CHECK-NEXT:    [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <16 x float> [[S]]
 ;
   %m = or <16 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4>
@@ -258,8 +255,7 @@ define <16 x float> @bits_test_vpermilvar_ps_512(<16 x float> %InVec, <16 x i32>
 
 define <2 x double> @bits_test_vpermilvar_pd(<2 x double> %InVec, <2 x i64> %InMask) {
 ; CHECK-LABEL: @bits_test_vpermilvar_pd(
-; CHECK-NEXT:    [[M:%.*]] = or <2 x i64> [[INMASK:%.*]], <i64 0, i64 4294967293>
-; CHECK-NEXT:    [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[S]]
 ;
   %m = or <2 x i64> %InMask, <i64 0, i64 4294967293>
@@ -269,8 +265,7 @@ define <2 x double> @bits_test_vpermilvar_pd(<2 x double> %InVec, <2 x i64> %InM
 
 define <4 x double> @bits_test_vpermilvar_pd_256(<4 x double> %InVec, <4 x i64> %InMask) {
 ; CHECK-LABEL: @bits_test_vpermilvar_pd_256(
-; CHECK-NEXT:    [[M:%.*]] = or <4 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3>
-; CHECK-NEXT:    [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <4 x double> [[S]]
 ;
   %m = or <4 x i64> %InMask, <i64 0, i64 1, i64 4294967293, i64 -3>
@@ -280,8 +275,7 @@ define <4 x double> @bits_test_vpermilvar_pd_256(<4 x double> %InVec, <4 x i64>
 
 define <8 x double> @bits_test_vpermilvar_pd_512(<8 x double> %InVec, <8 x i64> %InMask) {
 ; CHECK-LABEL: @bits_test_vpermilvar_pd_512(
-; CHECK-NEXT:    [[M:%.*]] = or <8 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>
-; CHECK-NEXT:    [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <8 x double> [[S]]
 ;
   %m = or <8 x i64> %InMask, <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>