[PATCH] D36992: [AVX512] Don't create SHRUNKBLEND SDNodes for 512-bit vectors.x

Mon Aug 21 17:53:10 PDT 2017

craig.topper created this revision.

There are no 512-bit blend instructions so we shouldn't create SHRUNKBLEND for them.

On a side note, it looks like there may be a missed opportunity for constant folding TESTM when LHS and RHS are equal.

This fixes PR34139.


https://reviews.llvm.org/D36992

Files:
  lib/Target/X86/X86ISelLowering.cpp
  test/CodeGen/X86/pr34139.ll


Index: test/CodeGen/X86/pr34139.ll
===================================================================

--- /dev/null
+++ test/CodeGen/X86/pr34139.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl | FileCheck %s
+
+define void @f_f() {
+; CHECK-LABEL: f_f:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, (%rax)
+; CHECK-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; CHECK-NEXT:    vmovapd (%rax), %zmm1
+; CHECK-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; CHECK-NEXT:    vmovapd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovapd %zmm1, (%rax)
+  store <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8>* undef
+  %load_mask8.i.i.i = load <16 x i8>, <16 x i8>* undef
+  %v.i.i.i.i = load <16 x double>, <16 x double>* undef
+  %mask_vec_i1.i.i.i51.i.i = icmp ne <16 x i8> %load_mask8.i.i.i, zeroinitializer
+  %v1.i.i.i.i = select <16 x i1> %mask_vec_i1.i.i.i51.i.i, <16 x double> undef, <16 x double> %v.i.i.i.i
+  store <16 x double> %v1.i.i.i.i, <16 x double>* undef
+  unreachable
+}
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -30629,6 +30629,9 @@
     // Byte blends are only available in AVX2
     if (VT == MVT::v32i8 && !Subtarget.hasAVX2())
       return SDValue();
+    // There are no 512-bit blend instructions that use sign bits.
+    if (VT.is512BitVector())
+      return SDValue();
 
     assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
     APInt DemandedMask(APInt::getSignMask(BitWidth));


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D36992.112097.patch
Type: text/x-patch
Size: 1813 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170822/f45ce085/attachment.bin>