[llvm] r331847 - [X86] Combine (vXi1 (bitcast (-1)))) and (vXi1 (bitcast (0))) to all ones or all zeros vXi1 vector.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue May 8 23:07:20 PDT 2018


Author: ctopper
Date: Tue May  8 23:07:20 2018
New Revision: 331847

URL: http://llvm.org/viewvc/llvm-project?rev=331847&view=rev
Log:
[X86] Combine (vXi1 (bitcast (-1)))) and (vXi1 (bitcast (0))) to all ones or all zeros vXi1 vector.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=331847&r1=331846&r2=331847&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue May  8 23:07:20 2018
@@ -31095,6 +31095,16 @@ static SDValue combineBitcast(SDNode *N,
     return combinevXi1ConstantToInteger(N0, DAG);
   }
 
+  if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() &&
+      VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
+      isa<ConstantSDNode>(N0)) {
+    auto *C = cast<ConstantSDNode>(N0);
+    if (C->isAllOnesValue())
+      return DAG.getConstant(1, SDLoc(N0), VT);
+    if (C->isNullValue())
+      return DAG.getConstant(0, SDLoc(N0), VT);
+  }
+
   // Try to remove bitcasts from input and output of mask arithmetic to
   // remove GPR<->K-register crossings.
   if (SDValue V = combineCastedMaskArithmetic(N, DAG, DCI, Subtarget))

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=331847&r1=331846&r2=331847&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Tue May  8 23:07:20 2018
@@ -3446,3 +3446,41 @@ entry:
   store <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i1>* %R
   ret void
 }
+
+; Make sure we bring the -1 constant into the mask domain.
+define void @mask_not_cast(i8*, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) {
+; CHECK-LABEL: mask_not_cast:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    vpcmpleud %zmm3, %zmm2, %k0
+; CHECK-NEXT:    knotw %k0, %k1
+; CHECK-NEXT:    vptestmd %zmm0, %zmm1, %k1 {%k1}
+; CHECK-NEXT:    vmovdqu32 %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+;
+; X86-LABEL: mask_not_cast:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpcmpleud %zmm3, %zmm2, %k0
+; X86-NEXT:    knotw %k0, %k1
+; X86-NEXT:    vptestmd %zmm0, %zmm1, %k1 {%k1}
+; X86-NEXT:    vmovdqu32 %zmm0, (%eax) {%k1}
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+  %6 = and <8 x i64> %2, %1
+  %7 = bitcast <8 x i64> %6 to <16 x i32>
+  %8 = icmp ne <16 x i32> %7, zeroinitializer
+  %9 = bitcast <16 x i1> %8 to i16
+  %10 = bitcast <8 x i64> %3 to <16 x i32>
+  %11 = bitcast <8 x i64> %4 to <16 x i32>
+  %12 = icmp ule <16 x i32> %10, %11
+  %13 = bitcast <16 x i1> %12 to i16
+  %14 = xor i16 %13, -1
+  %15 = and i16 %14, %9
+  %16 = bitcast <8 x i64> %1 to <16 x i32>
+  %17 = bitcast i8* %0 to <16 x i32>*
+  %18 = bitcast i16 %15 to <16 x i1>
+  tail call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %16, <16 x i32>* %17, i32 1, <16 x i1> %18) #2
+  ret void
+}
+declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)




More information about the llvm-commits mailing list