[llvm] d57c046 - [InstCombine][X86] Only demand used bits for VPERMILPD/VPERMILPS mask values
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 29 04:53:51 PDT 2024
Author: Simon Pilgrim
Date: 2024-08-29T12:53:36+01:00
New Revision: d57c04647e6f0a6f0cd79e280c257f570e8f30f4
URL: https://github.com/llvm/llvm-project/commit/d57c04647e6f0a6f0cd79e280c257f570e8f30f4
DIFF: https://github.com/llvm/llvm-project/commit/d57c04647e6f0a6f0cd79e280c257f570e8f30f4.diff
LOG: [InstCombine][X86] Only demand used bits for VPERMILPD/VPERMILPS mask values
VPERMILPS lower bits0-3 (to index per-lane i32/f32 0-3)
VPERMILPD uses bit1 (to index per-lane i64/f64 0-1)
Use SimplifyDemandedBits to ignore anything touching the remaining bits.
Part of #106413
Added:
Modified:
llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll
llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index 7bd7f4bc241780..9cc5ed5d89ad70 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -2963,14 +2963,29 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
case Intrinsic::x86_avx_vpermilvar_ps:
case Intrinsic::x86_avx_vpermilvar_ps_256:
- case Intrinsic::x86_avx512_vpermilvar_ps_512:
+ case Intrinsic::x86_avx512_vpermilvar_ps_512: {
+ if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+
+ KnownBits KnownMask(32);
+ if (IC.SimplifyDemandedBits(&II, 1, APInt(32, 0b00011), KnownMask))
+ return ⅈ
+ break;
+ }
+
case Intrinsic::x86_avx_vpermilvar_pd:
case Intrinsic::x86_avx_vpermilvar_pd_256:
- case Intrinsic::x86_avx512_vpermilvar_pd_512:
+ case Intrinsic::x86_avx512_vpermilvar_pd_512: {
if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}
+
+ KnownBits KnownMask(64);
+ if (IC.SimplifyDemandedBits(&II, 1, APInt(64, 0b00010), KnownMask))
+ return ⅈ
break;
+ }
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll
index 6164e3ac5f27f8..e8fe0b1c379e1a 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll
@@ -225,8 +225,7 @@ define <8 x double> @poison_test_vpermilvar_pd_512(<8 x double> %v) {
define <4 x float> @bits_test_vpermilvar_ps(<4 x float> %InVec, <4 x i32> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_ps(
-; CHECK-NEXT: [[M:%.*]] = or <4 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4>
-; CHECK-NEXT: [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[M]])
+; CHECK-NEXT: [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[INMASK:%.*]])
; CHECK-NEXT: ret <4 x float> [[S]]
;
%m = or <4 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4>
@@ -236,8 +235,7 @@ define <4 x float> @bits_test_vpermilvar_ps(<4 x float> %InVec, <4 x i32> %InMas
define <8 x float> @bits_test_vpermilvar_ps_256(<8 x float> %InVec, <8 x i32> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_ps_256(
-; CHECK-NEXT: [[M:%.*]] = or <8 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
-; CHECK-NEXT: [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[M]])
+; CHECK-NEXT: [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[INMASK:%.*]])
; CHECK-NEXT: ret <8 x float> [[S]]
;
%m = or <8 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4>
@@ -247,8 +245,7 @@ define <8 x float> @bits_test_vpermilvar_ps_256(<8 x float> %InVec, <8 x i32> %I
define <16 x float> @bits_test_vpermilvar_ps_512(<16 x float> %InVec, <16 x i32> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_ps_512(
-; CHECK-NEXT: [[M:%.*]] = or <16 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
-; CHECK-NEXT: [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[M]])
+; CHECK-NEXT: [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[INMASK:%.*]])
; CHECK-NEXT: ret <16 x float> [[S]]
;
%m = or <16 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4>
@@ -258,8 +255,7 @@ define <16 x float> @bits_test_vpermilvar_ps_512(<16 x float> %InVec, <16 x i32>
define <2 x double> @bits_test_vpermilvar_pd(<2 x double> %InVec, <2 x i64> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_pd(
-; CHECK-NEXT: [[M:%.*]] = or <2 x i64> [[INMASK:%.*]], <i64 0, i64 4294967293>
-; CHECK-NEXT: [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[M]])
+; CHECK-NEXT: [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[INMASK:%.*]])
; CHECK-NEXT: ret <2 x double> [[S]]
;
%m = or <2 x i64> %InMask, <i64 0, i64 4294967293>
@@ -269,8 +265,7 @@ define <2 x double> @bits_test_vpermilvar_pd(<2 x double> %InVec, <2 x i64> %InM
define <4 x double> @bits_test_vpermilvar_pd_256(<4 x double> %InVec, <4 x i64> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_pd_256(
-; CHECK-NEXT: [[M:%.*]] = or <4 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3>
-; CHECK-NEXT: [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[M]])
+; CHECK-NEXT: [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[INMASK:%.*]])
; CHECK-NEXT: ret <4 x double> [[S]]
;
%m = or <4 x i64> %InMask, <i64 0, i64 1, i64 4294967293, i64 -3>
@@ -280,8 +275,7 @@ define <4 x double> @bits_test_vpermilvar_pd_256(<4 x double> %InVec, <4 x i64>
define <8 x double> @bits_test_vpermilvar_pd_512(<8 x double> %InVec, <8 x i64> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_pd_512(
-; CHECK-NEXT: [[M:%.*]] = or <8 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>
-; CHECK-NEXT: [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[M]])
+; CHECK-NEXT: [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[INMASK:%.*]])
; CHECK-NEXT: ret <8 x double> [[S]]
;
%m = or <8 x i64> %InMask, <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll b/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll
index 5fbfd2e3164ef2..7907fd84f3ffe9 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll
@@ -225,8 +225,7 @@ define <8 x double> @undef_test_vpermilvar_pd_512(<8 x double> %v) {
define <4 x float> @bits_test_vpermilvar_ps(<4 x float> %InVec, <4 x i32> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_ps(
-; CHECK-NEXT: [[M:%.*]] = or <4 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4>
-; CHECK-NEXT: [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[M]])
+; CHECK-NEXT: [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[INMASK:%.*]])
; CHECK-NEXT: ret <4 x float> [[S]]
;
%m = or <4 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4>
@@ -236,8 +235,7 @@ define <4 x float> @bits_test_vpermilvar_ps(<4 x float> %InVec, <4 x i32> %InMas
define <8 x float> @bits_test_vpermilvar_ps_256(<8 x float> %InVec, <8 x i32> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_ps_256(
-; CHECK-NEXT: [[M:%.*]] = or <8 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
-; CHECK-NEXT: [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[M]])
+; CHECK-NEXT: [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[INMASK:%.*]])
; CHECK-NEXT: ret <8 x float> [[S]]
;
%m = or <8 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4>
@@ -247,8 +245,7 @@ define <8 x float> @bits_test_vpermilvar_ps_256(<8 x float> %InVec, <8 x i32> %I
define <16 x float> @bits_test_vpermilvar_ps_512(<16 x float> %InVec, <16 x i32> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_ps_512(
-; CHECK-NEXT: [[M:%.*]] = or <16 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
-; CHECK-NEXT: [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[M]])
+; CHECK-NEXT: [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[INMASK:%.*]])
; CHECK-NEXT: ret <16 x float> [[S]]
;
%m = or <16 x i32> %InMask, <i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4, i32 0, i32 12, i32 4294967292, i32 -4>
@@ -258,8 +255,7 @@ define <16 x float> @bits_test_vpermilvar_ps_512(<16 x float> %InVec, <16 x i32>
define <2 x double> @bits_test_vpermilvar_pd(<2 x double> %InVec, <2 x i64> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_pd(
-; CHECK-NEXT: [[M:%.*]] = or <2 x i64> [[INMASK:%.*]], <i64 0, i64 4294967293>
-; CHECK-NEXT: [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[M]])
+; CHECK-NEXT: [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[INMASK:%.*]])
; CHECK-NEXT: ret <2 x double> [[S]]
;
%m = or <2 x i64> %InMask, <i64 0, i64 4294967293>
@@ -269,8 +265,7 @@ define <2 x double> @bits_test_vpermilvar_pd(<2 x double> %InVec, <2 x i64> %InM
define <4 x double> @bits_test_vpermilvar_pd_256(<4 x double> %InVec, <4 x i64> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_pd_256(
-; CHECK-NEXT: [[M:%.*]] = or <4 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3>
-; CHECK-NEXT: [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[M]])
+; CHECK-NEXT: [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[INMASK:%.*]])
; CHECK-NEXT: ret <4 x double> [[S]]
;
%m = or <4 x i64> %InMask, <i64 0, i64 1, i64 4294967293, i64 -3>
@@ -280,8 +275,7 @@ define <4 x double> @bits_test_vpermilvar_pd_256(<4 x double> %InVec, <4 x i64>
define <8 x double> @bits_test_vpermilvar_pd_512(<8 x double> %InVec, <8 x i64> %InMask) {
; CHECK-LABEL: @bits_test_vpermilvar_pd_512(
-; CHECK-NEXT: [[M:%.*]] = or <8 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>
-; CHECK-NEXT: [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[M]])
+; CHECK-NEXT: [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[INMASK:%.*]])
; CHECK-NEXT: ret <8 x double> [[S]]
;
%m = or <8 x i64> %InMask, <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>
More information about the llvm-commits
mailing list