[llvm] [InstCombine][X86] Only demand used bits for PSHUFB mask values (PR #106377)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 28 05:09:30 PDT 2024


https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/106377

(V)PSHUFB only uses the sign bit (for zeroing) and the lower 4 bits (to index per-lane byte 0-15) - so use SimplifyDemandedBits to ignore anything touching the remaining bits.

Fixes #106256

>From 93d20cdfa500aa3e2bb47cc56427e63f9ed34448 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 28 Aug 2024 13:03:49 +0100
Subject: [PATCH 1/2] [InstCombine][X86] Add test coverage for Issue #106256

---
 .../X86/x86-pshufb-inseltpoison.ll            | 35 +++++++++++++++++++
 .../Transforms/InstCombine/X86/x86-pshufb.ll  | 35 +++++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll
index 2f301e9e9c1072..408f8cf226db04 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll
@@ -468,6 +468,41 @@ define <64 x i8> @fold_with_allpoison_elts_avx512(<64 x i8> %InVec) {
   ret <64 x i8> %1
 }
 
+; Demanded bits tests (PR106256)
+
+define <16 x i8> @demanded_bits_mask(<16 x i8> %InVec, <16 x i8> %InMask) {
+; CHECK-LABEL: @demanded_bits_mask(
+; CHECK-NEXT:    [[M:%.*]] = or <16 x i8> [[INMASK:%.*]], <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+; CHECK-NEXT:    [[S:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> [[INVEC:%.*]], <16 x i8> [[M]])
+; CHECK-NEXT:    ret <16 x i8> [[S]]
+;
+  %m = or <16 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+  %s = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %m)
+  ret <16 x i8> %s
+}
+
+define <32 x i8> @demanded_bits_mask_avx2(<32 x i8> %InVec, <32 x i8> %InMask) {
+; CHECK-LABEL: @demanded_bits_mask_avx2(
+; CHECK-NEXT:    [[M:%.*]] = or <32 x i8> [[INMASK:%.*]], <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+; CHECK-NEXT:    [[S:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[M]])
+; CHECK-NEXT:    ret <32 x i8> [[S]]
+;
+  %m = or <32 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+  %s = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> %m)
+  ret <32 x i8> %s
+}
+
+define <64 x i8> @demanded_bits_mask_avx512(<64 x i8> %InVec, <64 x i8> %InMask) {
+; CHECK-LABEL: @demanded_bits_mask_avx512(
+; CHECK-NEXT:    [[M:%.*]] = or <64 x i8> [[INMASK:%.*]], <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+; CHECK-NEXT:    [[S:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[INVEC:%.*]], <64 x i8> [[M]])
+; CHECK-NEXT:    ret <64 x i8> [[S]]
+;
+  %m = or <64 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+  %s = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> %m)
+  ret <64 x i8> %s
+}
+
 ; Demanded elts tests.
 
 define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) {
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll
index cd90696eafac6f..ff21dd22be8be7 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll
@@ -468,6 +468,41 @@ define <64 x i8> @fold_with_allundef_elts_avx512(<64 x i8> %InVec) {
   ret <64 x i8> %1
 }
 
+; Demanded bits tests (PR106256)
+
+define <16 x i8> @demanded_bits_mask(<16 x i8> %InVec, <16 x i8> %InMask) {
+; CHECK-LABEL: @demanded_bits_mask(
+; CHECK-NEXT:    [[M:%.*]] = or <16 x i8> [[INMASK:%.*]], <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+; CHECK-NEXT:    [[S:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> [[INVEC:%.*]], <16 x i8> [[M]])
+; CHECK-NEXT:    ret <16 x i8> [[S]]
+;
+  %m = or <16 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+  %s = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %m)
+  ret <16 x i8> %s
+}
+
+define <32 x i8> @demanded_bits_mask_avx2(<32 x i8> %InVec, <32 x i8> %InMask) {
+; CHECK-LABEL: @demanded_bits_mask_avx2(
+; CHECK-NEXT:    [[M:%.*]] = or <32 x i8> [[INMASK:%.*]], <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+; CHECK-NEXT:    [[S:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[M]])
+; CHECK-NEXT:    ret <32 x i8> [[S]]
+;
+  %m = or <32 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+  %s = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> %m)
+  ret <32 x i8> %s
+}
+
+define <64 x i8> @demanded_bits_mask_avx512(<64 x i8> %InVec, <64 x i8> %InMask) {
+; CHECK-LABEL: @demanded_bits_mask_avx512(
+; CHECK-NEXT:    [[M:%.*]] = or <64 x i8> [[INMASK:%.*]], <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+; CHECK-NEXT:    [[S:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[INVEC:%.*]], <64 x i8> [[M]])
+; CHECK-NEXT:    ret <64 x i8> [[S]]
+;
+  %m = or <64 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
+  %s = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> %m)
+  ret <64 x i8> %s
+}
+
 ; Demanded elts tests.
 
 define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) {

>From 96b6b7e2bddb18e26345c566da3832141c16f2e4 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 28 Aug 2024 13:07:27 +0100
Subject: [PATCH 2/2] [InstCombine][X86] Only demand used bits for PSHUFB mask
 values

(V)PSHUFB only uses the sign bit (for zeroing) and the lower 4 bits (to index per-lane byte 0-15) - so use SimplifyDemandedBits to ignore anything touching the remaining bits.

Fixes #106256
---
 llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp          | 7 ++++++-
 .../InstCombine/X86/x86-pshufb-inseltpoison.ll           | 9 +++------
 llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll       | 9 +++------
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index 793d62ba2a8e79..42ad8905a703c4 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -2950,11 +2950,16 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
 
   case Intrinsic::x86_ssse3_pshuf_b_128:
   case Intrinsic::x86_avx2_pshuf_b:
-  case Intrinsic::x86_avx512_pshuf_b_512:
+  case Intrinsic::x86_avx512_pshuf_b_512: {
     if (Value *V = simplifyX86pshufb(II, IC.Builder)) {
       return IC.replaceInstUsesWith(II, V);
     }
+    KnownBits KnownMask(8);
+    if (IC.SimplifyDemandedBits(&II, 1, APInt(8, 0b10001111), KnownMask)) {
+      return &II;
+    }
     break;
+  }
 
   case Intrinsic::x86_avx_vpermilvar_ps:
   case Intrinsic::x86_avx_vpermilvar_ps_256:
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll
index 408f8cf226db04..f8cec9c152a08b 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll
@@ -472,8 +472,7 @@ define <64 x i8> @fold_with_allpoison_elts_avx512(<64 x i8> %InVec) {
 
 define <16 x i8> @demanded_bits_mask(<16 x i8> %InVec, <16 x i8> %InMask) {
 ; CHECK-LABEL: @demanded_bits_mask(
-; CHECK-NEXT:    [[M:%.*]] = or <16 x i8> [[INMASK:%.*]], <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
-; CHECK-NEXT:    [[S:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> [[INVEC:%.*]], <16 x i8> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> [[INVEC:%.*]], <16 x i8> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <16 x i8> [[S]]
 ;
   %m = or <16 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
@@ -483,8 +482,7 @@ define <16 x i8> @demanded_bits_mask(<16 x i8> %InVec, <16 x i8> %InMask) {
 
 define <32 x i8> @demanded_bits_mask_avx2(<32 x i8> %InVec, <32 x i8> %InMask) {
 ; CHECK-LABEL: @demanded_bits_mask_avx2(
-; CHECK-NEXT:    [[M:%.*]] = or <32 x i8> [[INMASK:%.*]], <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
-; CHECK-NEXT:    [[S:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <32 x i8> [[S]]
 ;
   %m = or <32 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
@@ -494,8 +492,7 @@ define <32 x i8> @demanded_bits_mask_avx2(<32 x i8> %InVec, <32 x i8> %InMask) {
 
 define <64 x i8> @demanded_bits_mask_avx512(<64 x i8> %InVec, <64 x i8> %InMask) {
 ; CHECK-LABEL: @demanded_bits_mask_avx512(
-; CHECK-NEXT:    [[M:%.*]] = or <64 x i8> [[INMASK:%.*]], <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
-; CHECK-NEXT:    [[S:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[INVEC:%.*]], <64 x i8> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[INVEC:%.*]], <64 x i8> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <64 x i8> [[S]]
 ;
   %m = or <64 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll
index ff21dd22be8be7..fd99fd880a8098 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll
@@ -472,8 +472,7 @@ define <64 x i8> @fold_with_allundef_elts_avx512(<64 x i8> %InVec) {
 
 define <16 x i8> @demanded_bits_mask(<16 x i8> %InVec, <16 x i8> %InMask) {
 ; CHECK-LABEL: @demanded_bits_mask(
-; CHECK-NEXT:    [[M:%.*]] = or <16 x i8> [[INMASK:%.*]], <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
-; CHECK-NEXT:    [[S:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> [[INVEC:%.*]], <16 x i8> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> [[INVEC:%.*]], <16 x i8> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <16 x i8> [[S]]
 ;
   %m = or <16 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
@@ -483,8 +482,7 @@ define <16 x i8> @demanded_bits_mask(<16 x i8> %InVec, <16 x i8> %InMask) {
 
 define <32 x i8> @demanded_bits_mask_avx2(<32 x i8> %InVec, <32 x i8> %InMask) {
 ; CHECK-LABEL: @demanded_bits_mask_avx2(
-; CHECK-NEXT:    [[M:%.*]] = or <32 x i8> [[INMASK:%.*]], <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
-; CHECK-NEXT:    [[S:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <32 x i8> [[S]]
 ;
   %m = or <32 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
@@ -494,8 +492,7 @@ define <32 x i8> @demanded_bits_mask_avx2(<32 x i8> %InVec, <32 x i8> %InMask) {
 
 define <64 x i8> @demanded_bits_mask_avx512(<64 x i8> %InVec, <64 x i8> %InMask) {
 ; CHECK-LABEL: @demanded_bits_mask_avx512(
-; CHECK-NEXT:    [[M:%.*]] = or <64 x i8> [[INMASK:%.*]], <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>
-; CHECK-NEXT:    [[S:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[INVEC:%.*]], <64 x i8> [[M]])
+; CHECK-NEXT:    [[S:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[INVEC:%.*]], <64 x i8> [[INMASK:%.*]])
 ; CHECK-NEXT:    ret <64 x i8> [[S]]
 ;
   %m = or <64 x i8> %InMask, <i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112, i8 16, i8 48, i8 112, i8 112>



More information about the llvm-commits mailing list