[llvm] r331896 - [DAGCombiner] In visitBITCAST when trying to constant fold the bitcast, only call getBitcast if its an fp->int or int->fp conversion even when before legalize ops.

Wed May 9 10:14:27 PDT 2018

Author: ctopper
Date: Wed May  9 10:14:27 2018
New Revision: 331896

URL: http://llvm.org/viewvc/llvm-project?rev=331896&view=rev
Log:
[DAGCombiner] In visitBITCAST when trying to constant fold the bitcast, only call getBitcast if its an fp->int or int->fp conversion even when before legalize ops.

Previously if !LegalOperations we would blindly call getBitcast and hope that getNode would constant fold it. But if the conversion is between a vector and a scalar, getNode has no simplification.

This means we would just get back the original N. We would then return that N which would make the caller of visitBITCAST think that we used CombineTo and did our own worklist management. This prevents target specific optimizations from being called for vector/scalar bitcasts until after legal operations.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=331896&r1=331895&r2=331896&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed May  9 10:14:27 2018
@@ -9210,17 +9210,17 @@ SDValue DAGCombiner::visitBITCAST(SDNode
   }
 
   // If the input is a constant, let getNode fold it.
-  if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
-    // If we can't allow illegal operations, we need to check that this is just
-    // a fp -> int or int -> conversion and that the resulting operation will
-    // be legal.
-    if (!LegalOperations ||
-        (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
-         TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
-        (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
-         TLI.isOperationLegal(ISD::Constant, VT)))
-      return DAG.getBitcast(VT, N0);
-  }
+  // We always need to check that this is just a fp -> int or int -> conversion
+  // otherwise we will get back N which will confuse the caller into thinking
+  // we used CombineTo. This can block target combines from running. If we can't
+  // allowed legal operations, we need to ensure the resulting operation will be
+  // legal.
+  // TODO: Maybe we should check that the return value isn't N explicitly?
+  if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
+       (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) ||
+      (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
+       (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT))))
+    return DAG.getBitcast(VT, N0);
 
   // (conv (conv x, t1), t2) -> (conv x, t2)
   if (N0.getOpcode() == ISD::BITCAST)

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=331896&r1=331895&r2=331896&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Wed May  9 10:14:27 2018
@@ -3451,8 +3451,7 @@ entry:
 define void @mask_not_cast(i8*, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) {
 ; CHECK-LABEL: mask_not_cast:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpleud %zmm3, %zmm2, %k0
-; CHECK-NEXT:    knotw %k0, %k1
+; CHECK-NEXT:    vpcmpnleud %zmm3, %zmm2, %k1
 ; CHECK-NEXT:    vptestmd %zmm0, %zmm1, %k1 {%k1}
 ; CHECK-NEXT:    vmovdqu32 %zmm0, (%rdi) {%k1}
 ; CHECK-NEXT:    vzeroupper
@@ -3461,8 +3460,7 @@ define void @mask_not_cast(i8*, <8 x i64
 ; X86-LABEL: mask_not_cast:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    vpcmpleud %zmm3, %zmm2, %k0
-; X86-NEXT:    knotw %k0, %k1
+; X86-NEXT:    vpcmpnleud %zmm3, %zmm2, %k1
 ; X86-NEXT:    vptestmd %zmm0, %zmm1, %k1 {%k1}
 ; X86-NEXT:    vmovdqu32 %zmm0, (%eax) {%k1}
 ; X86-NEXT:    vzeroupper