[llvm] [InstCombine][X86] Only demand the active index bits for VPERMV/VPERMV3 mask values (PR #106750)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 30 10:37:44 PDT 2024
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/106750
>From 224797d53d62d0ccd2a38b870481c0dbbdf58a37 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Fri, 30 Aug 2024 16:33:59 +0100
Subject: [PATCH 1/2] [InstCombine][X86] Only demand the active index bits for
VPERMV/VPERMV3 mask values
VPERMV/VPERMV3 only uses the lower bits of the vector element indices - so use SimplifyDemandedBits to ignore anything touching the remaining bits.
Fixes #106413
---
.../Target/X86/X86InstCombineIntrinsic.cpp | 20 +++++++++
.../Transforms/InstCombine/X86/x86-vperm.ll | 42 +++++++------------
.../Transforms/InstCombine/X86/x86-vpermi2.ll | 36 ++++++----------
3 files changed, 46 insertions(+), 52 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index 9cc5ed5d89ad70..e96ce28616c898 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -2144,6 +2144,22 @@ static Value *simplifyX86vpermv3(const IntrinsicInst &II,
return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, Size));
}
+// Simplify VPERMV/VPERMV3 mask - only demand the active index bits.
+static bool simplifyX86VPERMMask(Instruction *II, bool IsBinary,
+ InstCombiner &IC) {
+ auto *VecTy = cast<FixedVectorType>(II->getType());
+ unsigned EltSizeInBits = VecTy->getScalarSizeInBits();
+ unsigned NumElts = VecTy->getNumElements();
+ assert(isPowerOf2_32(NumElts) && isPowerOf2_32(EltSizeInBits) &&
+ "Unexpected shuffle mask size");
+
+ unsigned IdxSizeInBits = Log2_32(IsBinary ? (2 * NumElts) : NumElts);
+ APInt DemandedMask = APInt::getLowBitsSet(EltSizeInBits, IdxSizeInBits);
+
+ KnownBits KnownMask(EltSizeInBits);
+ return IC.SimplifyDemandedBits(II, 1, DemandedMask, KnownMask);
+}
+
std::optional<Instruction *>
X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
auto SimplifyDemandedVectorEltsLow = [&IC](Value *Op, unsigned Width,
@@ -3004,6 +3020,8 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (Value *V = simplifyX86vpermv(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}
+ if (simplifyX86VPERMMask(&II, false, IC))
+ return &II;
break;
case Intrinsic::x86_avx512_vpermi2var_d_128:
@@ -3027,6 +3045,8 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (Value *V = simplifyX86vpermv3(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}
+ if (simplifyX86VPERMMask(&II, true, IC))
+ return &II;
break;
case Intrinsic::x86_avx_maskload_ps:
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vperm.ll b/llvm/test/Transforms/InstCombine/X86/x86-vperm.ll
index 6519e4f5348484..9a52c62d01404e 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-vperm.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-vperm.ll
@@ -91,8 +91,7 @@ define <8 x i32> @undef_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passt
define <8 x i32> @demandedbit_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @demandedbit_test_permvar_si_256_mask(
-; CHECK-NEXT: [[M:%.*]] = or <8 x i32> [[A1:%.*]], <i32 0, i32 8, i32 -8, i32 16, i32 -16, i32 32, i32 -32, i32 64>
-; CHECK-NEXT: [[S:%.*]] = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> [[A0:%.*]], <8 x i32> [[M]])
+; CHECK-NEXT: [[S:%.*]] = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
; CHECK-NEXT: ret <8 x i32> [[S]]
;
%m = or <8 x i32> %a1, <i32 0, i32 8, i32 -8, i32 16, i32 -16, i32 32, i32 -32, i32 64>
@@ -190,8 +189,7 @@ define <8 x float> @undef_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float>
define <8 x float> @demandedbit_test_permvar_sf_256_mask(<8 x float> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @demandedbit_test_permvar_sf_256_mask(
-; CHECK-NEXT: [[M:%.*]] = or <8 x i32> [[A1:%.*]], <i32 0, i32 8, i32 -8, i32 16, i32 -16, i32 32, i32 -32, i32 64>
-; CHECK-NEXT: [[S:%.*]] = call <8 x float> @llvm.x86.avx2.permps(<8 x float> [[A0:%.*]], <8 x i32> [[M]])
+; CHECK-NEXT: [[S:%.*]] = call <8 x float> @llvm.x86.avx2.permps(<8 x float> [[A0:%.*]], <8 x i32> [[A1:%.*]])
; CHECK-NEXT: ret <8 x float> [[S]]
;
%m = or <8 x i32> %a1, <i32 0, i32 8, i32 -8, i32 16, i32 -16, i32 32, i32 -32, i32 64>
@@ -297,8 +295,7 @@ define <4 x i64> @undef_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passt
define <4 x i64> @demandedbits_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: @demandedbits_test_permvar_di_256_mask(
-; CHECK-NEXT: [[M:%.*]] = or <4 x i64> [[A1:%.*]], <i64 0, i64 4, i64 -4, i64 8>
-; CHECK-NEXT: [[S:%.*]] = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> [[A0:%.*]], <4 x i64> [[M]])
+; CHECK-NEXT: [[S:%.*]] = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]])
; CHECK-NEXT: ret <4 x i64> [[S]]
;
%m = or <4 x i64> %a1, <i64 0, i64 4, i64 -4, i64 8>
@@ -404,8 +401,7 @@ define <4 x double> @undef_test_permvar_df_256_mask(<4 x double> %a0, <4 x doubl
define <4 x double> @demandedbits_test_permvar_df_256_mask(<4 x double> %a0, <4 x i64> %a1) {
; CHECK-LABEL: @demandedbits_test_permvar_df_256_mask(
-; CHECK-NEXT: [[M:%.*]] = or <4 x i64> [[A1:%.*]], <i64 0, i64 4, i64 -4, i64 8>
-; CHECK-NEXT: [[S:%.*]] = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> [[A0:%.*]], <4 x i64> [[M]])
+; CHECK-NEXT: [[S:%.*]] = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> [[A0:%.*]], <4 x i64> [[A1:%.*]])
; CHECK-NEXT: ret <4 x double> [[S]]
;
%m = or <4 x i64> %a1, <i64 0, i64 4, i64 -4, i64 8>
@@ -503,8 +499,7 @@ define <16 x i32> @undef_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %pa
define <16 x i32> @demandedbit_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @demandedbit_test_permvar_si_512_mask(
-; CHECK-NEXT: [[M:%.*]] = or <16 x i32> [[A1:%.*]], <i32 0, i32 16, i32 -16, i32 32, i32 -32, i32 64, i32 -64, i32 128, i32 -128, i32 256, i32 -256, i32 512, i32 -512, i32 1024, i32 -1024, i32 2048>
-; CHECK-NEXT: [[S:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[A0:%.*]], <16 x i32> [[M]])
+; CHECK-NEXT: [[S:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1:%.*]])
; CHECK-NEXT: ret <16 x i32> [[S]]
;
%m = or <16 x i32> %a1, <i32 0, i32 16, i32 -16, i32 32, i32 -32, i32 64, i32 -64, i32 128, i32 -128, i32 256, i32 -256, i32 512, i32 -512, i32 1024, i32 -1024, i32 2048>
@@ -602,8 +597,7 @@ define <16 x float> @undef_test_permvar_sf_512_mask(<16 x float> %a0, <16 x floa
define <16 x float> @demandedbit_test_permvar_sf_512_mask(<16 x float> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @demandedbit_test_permvar_sf_512_mask(
-; CHECK-NEXT: [[M:%.*]] = or <16 x i32> [[A1:%.*]], <i32 0, i32 16, i32 -16, i32 32, i32 -32, i32 64, i32 -64, i32 128, i32 -128, i32 256, i32 -256, i32 512, i32 -512, i32 1024, i32 -1024, i32 2048>
-; CHECK-NEXT: [[S:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[A0:%.*]], <16 x i32> [[M]])
+; CHECK-NEXT: [[S:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[A0:%.*]], <16 x i32> [[A1:%.*]])
; CHECK-NEXT: ret <16 x float> [[S]]
;
%m = or <16 x i32> %a1, <i32 0, i32 16, i32 -16, i32 32, i32 -32, i32 64, i32 -64, i32 128, i32 -128, i32 256, i32 -256, i32 512, i32 -512, i32 1024, i32 -1024, i32 2048>
@@ -701,8 +695,7 @@ define <8 x i64> @undef_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passt
define <8 x i64> @demandedbit_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %a1) {
; CHECK-LABEL: @demandedbit_test_permvar_di_512_mask(
-; CHECK-NEXT: [[M:%.*]] = or <8 x i64> [[A1:%.*]], <i64 0, i64 8, i64 -8, i64 16, i64 -16, i64 32, i64 -32, i64 64>
-; CHECK-NEXT: [[S:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[A0:%.*]], <8 x i64> [[M]])
+; CHECK-NEXT: [[S:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1:%.*]])
; CHECK-NEXT: ret <8 x i64> [[S]]
;
%m = or <8 x i64> %a1, <i64 0, i64 8, i64 -8, i64 16, i64 -16, i64 32, i64 -32, i64 64>
@@ -800,8 +793,7 @@ define <8 x double> @undef_test_permvar_df_512_mask(<8 x double> %a0, <8 x doubl
define <8 x double> @demandedbit_test_permvar_df_512_mask(<8 x double> %a0, <8 x i64> %a1) {
; CHECK-LABEL: @demandedbit_test_permvar_df_512_mask(
-; CHECK-NEXT: [[M:%.*]] = or <8 x i64> [[A1:%.*]], <i64 0, i64 8, i64 -8, i64 16, i64 -16, i64 32, i64 -32, i64 64>
-; CHECK-NEXT: [[S:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[A0:%.*]], <8 x i64> [[M]])
+; CHECK-NEXT: [[S:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[A0:%.*]], <8 x i64> [[A1:%.*]])
; CHECK-NEXT: ret <8 x double> [[S]]
;
%m = or <8 x i64> %a1, <i64 0, i64 8, i64 -8, i64 16, i64 -16, i64 32, i64 -32, i64 64>
@@ -899,8 +891,7 @@ define <8 x i16> @undef_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passt
define <8 x i16> @demandedbit_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: @demandedbit_test_permvar_hi_128_mask(
-; CHECK-NEXT: [[M:%.*]] = or <8 x i16> [[A1:%.*]], <i16 0, i16 8, i16 -8, i16 16, i16 -16, i16 32, i16 -32, i16 64>
-; CHECK-NEXT: [[S:%.*]] = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> [[A0:%.*]], <8 x i16> [[M]])
+; CHECK-NEXT: [[S:%.*]] = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
; CHECK-NEXT: ret <8 x i16> [[S]]
;
%m = or <8 x i16> %a1, <i16 0, i16 8, i16 -8, i16 16, i16 -16, i16 32, i16 -32, i16 64>
@@ -998,8 +989,7 @@ define <16 x i16> @undef_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %pa
define <16 x i16> @demandedbit_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %a1) {
; CHECK-LABEL: @demandedbit_test_permvar_hi_256_mask(
-; CHECK-NEXT: [[M:%.*]] = or <16 x i16> [[A1:%.*]], <i16 0, i16 16, i16 -16, i16 32, i16 -32, i16 64, i16 -64, i16 128, i16 -128, i16 256, i16 -256, i16 512, i16 -512, i16 1024, i16 -1024, i16 2048>
-; CHECK-NEXT: [[S:%.*]] = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> [[A0:%.*]], <16 x i16> [[M]])
+; CHECK-NEXT: [[S:%.*]] = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
; CHECK-NEXT: ret <16 x i16> [[S]]
;
%m = or <16 x i16> %a1, <i16 0, i16 16, i16 -16, i16 32, i16 -32, i16 64, i16 -64, i16 128, i16 -128, i16 256, i16 -256, i16 512, i16 -512, i16 1024, i16 -1024, i16 2048>
@@ -1097,8 +1087,7 @@ define <32 x i16> @undef_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %pa
define <32 x i16> @demandedbit_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %a1) {
; CHECK-LABEL: @demandedbit_test_permvar_hi_512_mask(
-; CHECK-NEXT: [[M:%.*]] = or <32 x i16> [[A1:%.*]], <i16 0, i16 32, i16 -32, i16 64, i16 -64, i16 128, i16 -128, i16 256, i16 -256, i16 512, i16 -512, i16 1024, i16 -1024, i16 2048, i16 -2048, i16 4096, i16 0, i16 32, i16 -32, i16 64, i16 -64, i16 128, i16 -128, i16 256, i16 -256, i16 512, i16 -512, i16 1024, i16 -1024, i16 2048, i16 -2048, i16 4096>
-; CHECK-NEXT: [[S:%.*]] = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> [[A0:%.*]], <32 x i16> [[M]])
+; CHECK-NEXT: [[S:%.*]] = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> [[A0:%.*]], <32 x i16> [[A1:%.*]])
; CHECK-NEXT: ret <32 x i16> [[S]]
;
%m = or <32 x i16> %a1, <i16 0, i16 32, i16 -32, i16 64, i16 -64, i16 128, i16 -128, i16 256, i16 -256, i16 512, i16 -512, i16 1024, i16 -1024, i16 2048, i16 -2048, i16 4096, i16 0, i16 32, i16 -32, i16 64, i16 -64, i16 128, i16 -128, i16 256, i16 -256, i16 512, i16 -512, i16 1024, i16 -1024, i16 2048, i16 -2048, i16 4096>
@@ -1196,8 +1185,7 @@ define <16 x i8> @undef_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passt
define <16 x i8> @demandedbit_test_permvar_qi_129_mask(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: @demandedbit_test_permvar_qi_129_mask(
-; CHECK-NEXT: [[M:%.*]] = or <16 x i8> [[A1:%.*]], <i8 0, i8 16, i8 -16, i8 32, i8 -32, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 16, i8 -16, i8 32, i8 -32, i8 64, i8 -64>
-; CHECK-NEXT: [[S:%.*]] = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> [[A0:%.*]], <16 x i8> [[M]])
+; CHECK-NEXT: [[S:%.*]] = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]])
; CHECK-NEXT: ret <16 x i8> [[S]]
;
%m = or <16 x i8> %a1, <i8 0, i8 16, i8 -16, i8 32, i8 -32, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 16, i8 -16, i8 32, i8 -32, i8 64, i8 -64>
@@ -1295,8 +1283,7 @@ define <32 x i8> @undef_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passt
define <32 x i8> @demandedbit_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %a1) {
; CHECK-LABEL: @demandedbit_test_permvar_qi_256_mask(
-; CHECK-NEXT: [[M:%.*]] = or <32 x i8> [[A1:%.*]], <i8 0, i8 32, i8 -32, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 32, i8 -32, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
-; CHECK-NEXT: [[S:%.*]] = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> [[A0:%.*]], <32 x i8> [[M]])
+; CHECK-NEXT: [[S:%.*]] = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
; CHECK-NEXT: ret <32 x i8> [[S]]
;
%m = or <32 x i8> %a1, <i8 0, i8 32, i8 -32, i8 64, i8 -64, i8 128, i8 -128, i8 256, i8 -256, i8 512, i8 -512, i8 1024, i8 -1024, i8 2048, i8 -2048, i8 4096, i8 0, i8 32, i8 -32, i8 64, i8 -64, i8 128, i8 -128, i8 256, i8 -256, i8 512, i8 -512, i8 1024, i8 -1024, i8 2048, i8 -2048, i8 4096>
@@ -1394,8 +1381,7 @@ define <64 x i8> @undef_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passt
define <64 x i8> @demandedbit_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %a1) {
; CHECK-LABEL: @demandedbit_test_permvar_qi_512_mask(
-; CHECK-NEXT: [[M:%.*]] = or <64 x i8> [[A1:%.*]], <i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128>
-; CHECK-NEXT: [[S:%.*]] = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> [[A0:%.*]], <64 x i8> [[M]])
+; CHECK-NEXT: [[S:%.*]] = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> [[A0:%.*]], <64 x i8> [[A1:%.*]])
; CHECK-NEXT: ret <64 x i8> [[S]]
;
%m = or <64 x i8> %a1, <i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128>
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vpermi2.ll b/llvm/test/Transforms/InstCombine/X86/x86-vpermi2.ll
index eb6ad4458d932e..d38e48ac817aee 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-vpermi2.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-vpermi2.ll
@@ -28,8 +28,7 @@ define <2 x i64> @shuffle_vpermv3_v2i64_unary(<2 x i64> %x0) {
define <2 x i64> @shuffle_vpermv3_v2i64_demandedbits(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %m) {
; CHECK-LABEL: define <2 x i64> @shuffle_vpermv3_v2i64_demandedbits(
; CHECK-SAME: <2 x i64> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x i64> [[M:%.*]]) {
-; CHECK-NEXT: [[T:%.*]] = or <2 x i64> [[M]], <i64 0, i64 4>
-; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> [[T]], <2 x i64> [[X1]])
+; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> [[M]], <2 x i64> [[X1]])
; CHECK-NEXT: ret <2 x i64> [[R]]
;
%t = or <2 x i64> %m, <i64 0, i64 4>
@@ -72,8 +71,7 @@ define <4 x i64> @shuffle_vpermv3_v4i64_unary(<4 x i64> %x0) {
define <4 x i64> @shuffle_vpermv3_v4i64_demandedbits(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %m) {
; CHECK-LABEL: define <4 x i64> @shuffle_vpermv3_v4i64_demandedbits(
; CHECK-SAME: <4 x i64> [[X0:%.*]], <4 x i64> [[X1:%.*]], <4 x i64> [[M:%.*]]) {
-; CHECK-NEXT: [[T:%.*]] = or <4 x i64> [[M]], <i64 0, i64 8, i64 16, i64 32>
-; CHECK-NEXT: [[R:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X0]], <4 x i64> [[T]], <4 x i64> [[X1]])
+; CHECK-NEXT: [[R:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X0]], <4 x i64> [[M]], <4 x i64> [[X1]])
; CHECK-NEXT: ret <4 x i64> [[R]]
;
%t = or <4 x i64> %m, <i64 0, i64 8, i64 16, i64 32>
@@ -104,8 +102,7 @@ define <8 x i64> @shuffle_vpermv3_v8i64_unary(<8 x i64> %x0) {
define <8 x i64> @shuffle_vpermv3_v8i64_demandedbits(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %m) {
; CHECK-LABEL: define <8 x i64> @shuffle_vpermv3_v8i64_demandedbits(
; CHECK-SAME: <8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[M:%.*]]) {
-; CHECK-NEXT: [[T:%.*]] = or <8 x i64> [[M]], <i64 0, i64 16, i64 32, i64 64, i64 256, i64 512, i64 1024, i64 -16>
-; CHECK-NEXT: [[R:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0]], <8 x i64> [[T]], <8 x i64> [[X1]])
+; CHECK-NEXT: [[R:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0]], <8 x i64> [[M]], <8 x i64> [[X1]])
; CHECK-NEXT: ret <8 x i64> [[R]]
;
%t = or <8 x i64> %m, <i64 0, i64 16, i64 32, i64 64, i64 256, i64 512, i64 1024, i64 -16>
@@ -140,8 +137,7 @@ define <4 x i32> @shuffle_vpermv3_v4i32_unary(<4 x i32> %x0) {
define <4 x i32> @shuffle_vpermv3_v4i32_demandedbits(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %m) {
; CHECK-LABEL: define <4 x i32> @shuffle_vpermv3_v4i32_demandedbits(
; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[M:%.*]]) {
-; CHECK-NEXT: [[T:%.*]] = or <4 x i32> [[M]], <i32 0, i32 8, i32 16, i32 32>
-; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X0]], <4 x i32> [[T]], <4 x i32> [[X1]])
+; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X0]], <4 x i32> [[M]], <4 x i32> [[X1]])
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%t = or <4 x i32> %m, <i32 0, i32 8, i32 16, i32 32>
@@ -172,8 +168,7 @@ define <8 x i32> @shuffle_vpermv3_v8i32_unary(<8 x i32> %x0) {
define <8 x i32> @shuffle_vpermv3_v8i32_demandedbits(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %m) {
; CHECK-LABEL: define <8 x i32> @shuffle_vpermv3_v8i32_demandedbits(
; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[M:%.*]]) {
-; CHECK-NEXT: [[T:%.*]] = or <8 x i32> [[M]], <i32 0, i32 16, i32 32, i32 64, i32 256, i32 512, i32 -16, i32 -32>
-; CHECK-NEXT: [[R:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X0]], <8 x i32> [[T]], <8 x i32> [[X1]])
+; CHECK-NEXT: [[R:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X0]], <8 x i32> [[M]], <8 x i32> [[X1]])
; CHECK-NEXT: ret <8 x i32> [[R]]
;
%t = or <8 x i32> %m, <i32 0, i32 16, i32 32, i32 64, i32 256, i32 512, i32 -16, i32 -32>
@@ -204,8 +199,7 @@ define <16 x i32> @shuffle_vpermv3_v16i32_unary(<16 x i32> %x0) {
define <16 x i32> @shuffle_vpermv3_v16i32_demandedbits(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %m) {
; CHECK-LABEL: define <16 x i32> @shuffle_vpermv3_v16i32_demandedbits(
; CHECK-SAME: <16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[M:%.*]]) {
-; CHECK-NEXT: [[T:%.*]] = or <16 x i32> [[M]], <i32 0, i32 32, i32 64, i32 256, i32 512, i32 1024, i32 2048, i32 4096, i32 8192, i32 -32, i32 -64, i32 -128, i32 -256, i32 -512, i32 -1024, i32 -2048>
-; CHECK-NEXT: [[R:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0]], <16 x i32> [[T]], <16 x i32> [[X1]])
+; CHECK-NEXT: [[R:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0]], <16 x i32> [[M]], <16 x i32> [[X1]])
; CHECK-NEXT: ret <16 x i32> [[R]]
;
%t = or <16 x i32> %m, <i32 0, i32 32, i32 64, i32 256, i32 512, i32 1024, i32 2048, i32 4096, i32 8192, i32 -32, i32 -64, i32 -128, i32 -256, i32 -512, i32 -1024, i32 -2048>
@@ -240,8 +234,7 @@ define <8 x i16> @shuffle_vpermv3_v8i16_unary(<8 x i16> %x0) {
define <8 x i16> @shuffle_vpermv3_v8i16_demandedbits(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %m) {
; CHECK-LABEL: define <8 x i16> @shuffle_vpermv3_v8i16_demandedbits(
; CHECK-SAME: <8 x i16> [[X0:%.*]], <8 x i16> [[X1:%.*]], <8 x i16> [[M:%.*]]) {
-; CHECK-NEXT: [[T:%.*]] = or <8 x i16> [[M]], <i16 0, i16 16, i16 32, i16 64, i16 256, i16 512, i16 -16, i16 -32>
-; CHECK-NEXT: [[R:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[X0]], <8 x i16> [[T]], <8 x i16> [[X1]])
+; CHECK-NEXT: [[R:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[X0]], <8 x i16> [[M]], <8 x i16> [[X1]])
; CHECK-NEXT: ret <8 x i16> [[R]]
;
%t = or <8 x i16> %m, <i16 0, i16 16, i16 32, i16 64, i16 256, i16 512, i16 -16, i16 -32>
@@ -272,8 +265,7 @@ define <16 x i16> @shuffle_vpermv3_v16i16_unary(<16 x i16> %x0) {
define <16 x i16> @shuffle_vpermv3_v16i16_demandedbits(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %m) {
; CHECK-LABEL: define <16 x i16> @shuffle_vpermv3_v16i16_demandedbits(
; CHECK-SAME: <16 x i16> [[X0:%.*]], <16 x i16> [[X1:%.*]], <16 x i16> [[M:%.*]]) {
-; CHECK-NEXT: [[T:%.*]] = or <16 x i16> [[M]], <i16 0, i16 32, i16 64, i16 256, i16 512, i16 1024, i16 2048, i16 4096, i16 -32, i16 -64, i16 -128, i16 -256, i16 -512, i16 -1024, i16 -2048, i16 -4096>
-; CHECK-NEXT: [[R:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[X0]], <16 x i16> [[T]], <16 x i16> [[X1]])
+; CHECK-NEXT: [[R:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[X0]], <16 x i16> [[M]], <16 x i16> [[X1]])
; CHECK-NEXT: ret <16 x i16> [[R]]
;
%t = or <16 x i16> %m, <i16 0, i16 32, i16 64, i16 256, i16 512, i16 1024, i16 2048, i16 4096, i16 -32, i16 -64, i16 -128, i16 -256, i16 -512, i16 -1024, i16 -2048, i16 -4096>
@@ -304,8 +296,7 @@ define <32 x i16> @shuffle_vpermv3_v32i16_unary(<32 x i16> %x0) {
define <32 x i16> @shuffle_vpermv3_v32i16_demandedbits(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %m) {
; CHECK-LABEL: define <32 x i16> @shuffle_vpermv3_v32i16_demandedbits(
; CHECK-SAME: <32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]], <32 x i16> [[M:%.*]]) {
-; CHECK-NEXT: [[T:%.*]] = or <32 x i16> [[M]], <i16 0, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096, i16 0, i16 -64, i16 -128, i16 -256, i16 -512, i16 -1024, i16 -2048, i16 -4096, i16 0, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096, i16 0, i16 -64, i16 -128, i16 -256, i16 -512, i16 -1024, i16 -2048, i16 -4096>
-; CHECK-NEXT: [[R:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0]], <32 x i16> [[T]], <32 x i16> [[X1]])
+; CHECK-NEXT: [[R:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0]], <32 x i16> [[M]], <32 x i16> [[X1]])
; CHECK-NEXT: ret <32 x i16> [[R]]
;
%t = or <32 x i16> %m, <i16 0, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096, i16 0, i16 -64, i16 -128, i16 -256, i16 -512, i16 -1024, i16 -2048, i16 -4096, i16 0, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096, i16 0, i16 -64, i16 -128, i16 -256, i16 -512, i16 -1024, i16 -2048, i16 -4096>
@@ -340,8 +331,7 @@ define <16 x i8> @shuffle_vpermv3_v16i8_unary(<16 x i8> %x0) {
define <16 x i8> @shuffle_vpermv3_v16i8_demandedbits(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %m) {
; CHECK-LABEL: define <16 x i8> @shuffle_vpermv3_v16i8_demandedbits(
; CHECK-SAME: <16 x i8> [[X0:%.*]], <16 x i8> [[X1:%.*]], <16 x i8> [[M:%.*]]) {
-; CHECK-NEXT: [[T:%.*]] = or <16 x i8> [[M]], <i8 0, i8 32, i8 64, i8 -128, i8 0, i8 -32, i8 -64, i8 -128, i8 0, i8 32, i8 64, i8 -128, i8 0, i8 -32, i8 -64, i8 -128>
-; CHECK-NEXT: [[R:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[X0]], <16 x i8> [[T]], <16 x i8> [[X1]])
+; CHECK-NEXT: [[R:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[X0]], <16 x i8> [[M]], <16 x i8> [[X1]])
; CHECK-NEXT: ret <16 x i8> [[R]]
;
%t = or <16 x i8> %m, <i8 0, i8 32, i8 64, i8 128, i8 0, i8 -32, i8 -64, i8 -128, i8 0, i8 32, i8 64, i8 128, i8 0, i8 -32, i8 -64, i8 -128>
@@ -372,8 +362,7 @@ define <32 x i8> @shuffle_vpermv3_v32i8_unary(<32 x i8> %x0) {
define <32 x i8> @shuffle_vpermv3_v32i8_demandedbits(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %m) {
; CHECK-LABEL: define <32 x i8> @shuffle_vpermv3_v32i8_demandedbits(
; CHECK-SAME: <32 x i8> [[X0:%.*]], <32 x i8> [[X1:%.*]], <32 x i8> [[M:%.*]]) {
-; CHECK-NEXT: [[T:%.*]] = or <32 x i8> [[M]], <i8 0, i8 0, i8 64, i8 -128, i8 0, i8 0, i8 -64, i8 -128, i8 0, i8 0, i8 64, i8 -128, i8 0, i8 0, i8 -64, i8 -128, i8 0, i8 0, i8 64, i8 -128, i8 0, i8 0, i8 -64, i8 -128, i8 0, i8 0, i8 64, i8 -128, i8 0, i8 0, i8 -64, i8 -128>
-; CHECK-NEXT: [[R:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[X0]], <32 x i8> [[T]], <32 x i8> [[X1]])
+; CHECK-NEXT: [[R:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[X0]], <32 x i8> [[M]], <32 x i8> [[X1]])
; CHECK-NEXT: ret <32 x i8> [[R]]
;
%t = or <32 x i8> %m, <i8 0, i8 0, i8 64, i8 128, i8 0, i8 0, i8 -64, i8 -128, i8 0, i8 0, i8 64, i8 128, i8 0, i8 0, i8 -64, i8 -128, i8 0, i8 0, i8 64, i8 128, i8 0, i8 0, i8 -64, i8 -128, i8 0, i8 0, i8 64, i8 128, i8 0, i8 0, i8 -64, i8 -128>
@@ -404,8 +393,7 @@ define <64 x i8> @shuffle_vpermv3_v64i8_unary(<64 x i8> %x0) {
define <64 x i8> @shuffle_vpermv3_v64i8_demandedbits(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %m) {
; CHECK-LABEL: define <64 x i8> @shuffle_vpermv3_v64i8_demandedbits(
; CHECK-SAME: <64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]], <64 x i8> [[M:%.*]]) {
-; CHECK-NEXT: [[T:%.*]] = or <64 x i8> [[M]], <i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128>
-; CHECK-NEXT: [[R:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[X0]], <64 x i8> [[T]], <64 x i8> [[X1]])
+; CHECK-NEXT: [[R:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[X0]], <64 x i8> [[M]], <64 x i8> [[X1]])
; CHECK-NEXT: ret <64 x i8> [[R]]
;
%t = or <64 x i8> %m, <i8 0, i8 128, i8 0, i8 -128, i8 0, i8 128, i8 0, i8 -128, i8 0, i8 128, i8 0, i8 -128, i8 0, i8 128, i8 0, i8 -128, i8 0, i8 128, i8 0, i8 -128, i8 0, i8 128, i8 0, i8 -128, i8 0, i8 128, i8 0, i8 -128, i8 0, i8 128, i8 0, i8 -128, i8 0, i8 128, i8 0, i8 -128, i8 0, i8 128, i8 0, i8 -128, i8 0, i8 128, i8 0, i8 -128, i8 0, i8 128, i8 0, i8 -128, i8 0, i8 128, i8 0, i8 -128, i8 0, i8 128, i8 0, i8 -128, i8 0, i8 128, i8 0, i8 -128, i8 0, i8 128, i8 0, i8 -128>
>From 3cc3cb393b14f570827169d81d7aff6c542faac2 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Fri, 30 Aug 2024 18:37:29 +0100
Subject: [PATCH 2/2] Add missing arg name comments
---
.../Target/X86/X86InstCombineIntrinsic.cpp | 22 +++++++++----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index e96ce28616c898..e8bf0fda4e843b 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -3020,22 +3020,22 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (Value *V = simplifyX86vpermv(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}
- if (simplifyX86VPERMMask(&II, false, IC))
+ if (simplifyX86VPERMMask(&II, /*IsBinary=*/false, IC))
return &II;
break;
case Intrinsic::x86_avx512_vpermi2var_d_128:
case Intrinsic::x86_avx512_vpermi2var_d_256:
case Intrinsic::x86_avx512_vpermi2var_d_512:
- case Intrinsic::x86_avx512_vpermi2var_hi_128:
- case Intrinsic::x86_avx512_vpermi2var_hi_256:
- case Intrinsic::x86_avx512_vpermi2var_hi_512:
- case Intrinsic::x86_avx512_vpermi2var_pd_128:
- case Intrinsic::x86_avx512_vpermi2var_pd_256:
- case Intrinsic::x86_avx512_vpermi2var_pd_512:
- case Intrinsic::x86_avx512_vpermi2var_ps_128:
- case Intrinsic::x86_avx512_vpermi2var_ps_256:
- case Intrinsic::x86_avx512_vpermi2var_ps_512:
+ case Intrinsic::x86_avx512_vpermi2var_hi_128:
+ case Intrinsic::x86_avx512_vpermi2var_hi_256:
+ case Intrinsic::x86_avx512_vpermi2var_hi_512:
+ case Intrinsic::x86_avx512_vpermi2var_pd_128:
+ case Intrinsic::x86_avx512_vpermi2var_pd_256:
+ case Intrinsic::x86_avx512_vpermi2var_pd_512:
+ case Intrinsic::x86_avx512_vpermi2var_ps_128:
+ case Intrinsic::x86_avx512_vpermi2var_ps_256:
+ case Intrinsic::x86_avx512_vpermi2var_ps_512:
case Intrinsic::x86_avx512_vpermi2var_q_128:
case Intrinsic::x86_avx512_vpermi2var_q_256:
case Intrinsic::x86_avx512_vpermi2var_q_512:
@@ -3045,7 +3045,7 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (Value *V = simplifyX86vpermv3(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}
- if (simplifyX86VPERMMask(&II, true, IC))
+ if (simplifyX86VPERMMask(&II, /*IsBinary=*/true, IC))
return &II;
break;
More information about the llvm-commits
mailing list