[llvm] [InstCombine][X86] Only demand used bits for PSHUFB mask values (PR #106377)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 28 05:10:08 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
@llvm/pr-subscribers-llvm-transforms
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
(V)PSHUFB only uses the sign bit (for zeroing) and the lower 4 bits (to index per-lane byte 0-15) - so use SimplifyDemandedBits to ignore anything touching the remaining bits.
Fixes #<!-- -->106256
---
Full diff: https://github.com/llvm/llvm-project/pull/106377.diff
3 Files Affected:
- (modified) llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp (+6-1)
- (modified) llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll (+32)
- (modified) llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll (+32)
``````````diff
diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index 793d62ba2a8e79..42ad8905a703c4 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -2950,11 +2950,16 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
case Intrinsic::x86_ssse3_pshuf_b_128:
case Intrinsic::x86_avx2_pshuf_b:
- case Intrinsic::x86_avx512_pshuf_b_512:
+ case Intrinsic::x86_avx512_pshuf_b_512: {
if (Value *V = simplifyX86pshufb(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}
+ KnownBits KnownMask(8);
+ if (IC.SimplifyDemandedBits(&II, 1, APInt(8, 0b10001111), KnownMask)) {
+ return &II;
+ }
break;
+ }
case Intrinsic::x86_avx_vpermilvar_ps:
case Intrinsic::x86_avx_vpermilvar_ps_256:
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll
index 2f301e9e9c1072..f8cec9c152a08b 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll
@@ -468,6 +468,38 @@ define <64 x i8> @fold_with_allpoison_elts_avx512(<64 x i8> %InVec) {
ret <64 x i8> %1
}
+; Demanded bits tests (PR106256)
+
+define <16 x i8> @demanded_bits_mask(<16 x i8> %InVec, <16 x i8> %InMask) {
+; CHECK-LABEL: @demanded_bits_mask(
+; CHECK-NEXT: [[S:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> [[INVEC:%.*]], <16 x i8> [[INMASK:%.*]])
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %m = or <16 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+ %s = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %m)
+ ret <16 x i8> %s
+}
+
+define <32 x i8> @demanded_bits_mask_avx2(<32 x i8> %InVec, <32 x i8> %InMask) {
+; CHECK-LABEL: @demanded_bits_mask_avx2(
+; CHECK-NEXT: [[S:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[INMASK:%.*]])
+; CHECK-NEXT: ret <32 x i8> [[S]]
+;
+ %m = or <32 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+ %s = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> %m)
+ ret <32 x i8> %s
+}
+
+define <64 x i8> @demanded_bits_mask_avx512(<64 x i8> %InVec, <64 x i8> %InMask) {
+; CHECK-LABEL: @demanded_bits_mask_avx512(
+; CHECK-NEXT: [[S:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[INVEC:%.*]], <64 x i8> [[INMASK:%.*]])
+; CHECK-NEXT: ret <64 x i8> [[S]]
+;
+ %m = or <64 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+ %s = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> %m)
+ ret <64 x i8> %s
+}
+
; Demanded elts tests.
define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) {
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll
index cd90696eafac6f..fd99fd880a8098 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll
@@ -468,6 +468,38 @@ define <64 x i8> @fold_with_allundef_elts_avx512(<64 x i8> %InVec) {
ret <64 x i8> %1
}
+; Demanded bits tests (PR106256)
+
+define <16 x i8> @demanded_bits_mask(<16 x i8> %InVec, <16 x i8> %InMask) {
+; CHECK-LABEL: @demanded_bits_mask(
+; CHECK-NEXT: [[S:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> [[INVEC:%.*]], <16 x i8> [[INMASK:%.*]])
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %m = or <16 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+ %s = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %m)
+ ret <16 x i8> %s
+}
+
+define <32 x i8> @demanded_bits_mask_avx2(<32 x i8> %InVec, <32 x i8> %InMask) {
+; CHECK-LABEL: @demanded_bits_mask_avx2(
+; CHECK-NEXT: [[S:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[INMASK:%.*]])
+; CHECK-NEXT: ret <32 x i8> [[S]]
+;
+ %m = or <32 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+ %s = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> %m)
+ ret <32 x i8> %s
+}
+
+define <64 x i8> @demanded_bits_mask_avx512(<64 x i8> %InVec, <64 x i8> %InMask) {
+; CHECK-LABEL: @demanded_bits_mask_avx512(
+; CHECK-NEXT: [[S:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[INVEC:%.*]], <64 x i8> [[INMASK:%.*]])
+; CHECK-NEXT: ret <64 x i8> [[S]]
+;
+ %m = or <64 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+ %s = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> %m)
+ ret <64 x i8> %s
+}
+
; Demanded elts tests.
define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) {
``````````
</details>
https://github.com/llvm/llvm-project/pull/106377
More information about the llvm-commits
mailing list