[llvm] r297762 - [DAG] vector div/rem with any zero element in divisor is undef

Tue Mar 14 11:06:28 PDT 2017

Author: spatel
Date: Tue Mar 14 13:06:28 2017
New Revision: 297762

URL: http://llvm.org/viewvc/llvm-project?rev=297762&view=rev
Log:
[DAG] vector div/rem with any zero element in divisor is undef

This is the backend counterpart to:
https://reviews.llvm.org/rL297390
https://reviews.llvm.org/rL297409
and follow-up to:
https://reviews.llvm.org/rL297384

It surprised me that we need to duplicate the check in FoldConstantArithmetic and FoldConstantVectorArithmetic, 
but one or the other doesn't catch all of the test cases. There is an existing code comment about merging those 
someday.

Differential Revision: https://reviews.llvm.org/D30826

Modified:
    llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/trunk/test/CodeGen/X86/div-rem-simplify.ll
    llvm/trunk/test/CodeGen/X86/vec_sdiv_to_shift.ll

Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=297762&r1=297761&r2=297762&view=diff
==============================================================================

--- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Tue Mar 14 13:06:28 2017
@@ -740,6 +740,9 @@ public:
     return getNode(ISD::CALLSEQ_END, DL, NodeTys, Ops);
   }
 
+  /// Return true if the result of this operation is always undefined.
+  bool isUndef(unsigned Opcode, ArrayRef<SDValue> Ops);
+
   /// Return an UNDEF node. UNDEF does not have a useful SDLoc.
   SDValue getUNDEF(EVT VT) {
     return getNode(ISD::UNDEF, SDLoc(), VT);

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=297762&r1=297761&r2=297762&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Mar 14 13:06:28 2017
@@ -2524,15 +2524,7 @@ static SDValue simplifyDivRem(SDNode *N,
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
-  // X / undef -> undef
-  // X % undef -> undef
-  if (N1.isUndef())
-    return N1;
-
-  // X / 0 --> undef
-  // X % 0 --> undef
-  // We don't need to preserve faults!
-  if (isNullConstantOrNullSplatConstant(N1))
+  if (DAG.isUndef(N->getOpcode(), {N0, N1}))
     return DAG.getUNDEF(VT);
 
   // undef / X -> 0

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=297762&r1=297761&r2=297762&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Tue Mar 14 13:06:28 2017
@@ -3695,12 +3695,6 @@ SDValue SelectionDAG::FoldConstantArithm
   if (Cst1->isOpaque() || Cst2->isOpaque())
     return SDValue();
 
-  // Division/remainder with a zero divisor is undefined behavior.
-  if ((Opcode == ISD::SDIV || Opcode == ISD::UDIV ||
-       Opcode == ISD::SREM || Opcode == ISD::UREM) &&
-      Cst2->isNullValue())
-    return getUNDEF(VT);
-
   std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(),
                                             Cst2->getAPIntValue());
   if (!Folded.second)
@@ -3728,6 +3722,30 @@ SDValue SelectionDAG::FoldSymbolOffset(u
                           GA->getOffset() + uint64_t(Offset));
 }
 
+bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
+  switch (Opcode) {
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM: {
+    // If a divisor is zero/undef or any element of a divisor vector is
+    // zero/undef, the whole op is undef.
+    assert(Ops.size() == 2 && "Div/rem should have 2 operands");
+    SDValue Divisor = Ops[1];
+    if (Divisor.isUndef() || isNullConstant(Divisor))
+      return true;
+
+    return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) &&
+           any_of(Divisor->op_values(),
+                  [](SDValue V) { return V.isUndef() || isNullConstant(V); });
+    // TODO: Handle signed overflow.
+  }
+  // TODO: Handle oversized shifts.
+  default:
+    return false;
+  }
+}
+
 SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
                                              EVT VT, SDNode *Cst1,
                                              SDNode *Cst2) {
@@ -3737,6 +3755,9 @@ SDValue SelectionDAG::FoldConstantArithm
   if (Opcode >= ISD::BUILTIN_OP_END)
     return SDValue();
 
+  if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)}))
+    return getUNDEF(VT);
+
   // Handle the case of two scalars.
   if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) {
     if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) {
@@ -3804,6 +3825,9 @@ SDValue SelectionDAG::FoldConstantVector
   if (Opcode >= ISD::BUILTIN_OP_END)
     return SDValue();
 
+  if (isUndef(Opcode, Ops))
+    return getUNDEF(VT);
+
   // We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
   if (!VT.isVector())
     return SDValue();

Modified: llvm/trunk/test/CodeGen/X86/div-rem-simplify.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/div-rem-simplify.ll?rev=297762&r1=297761&r2=297762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/div-rem-simplify.ll (original)
+++ llvm/trunk/test/CodeGen/X86/div-rem-simplify.ll Tue Mar 14 13:06:28 2017
@@ -152,7 +152,6 @@ define <4 x i32> @sel_sdiv0_vec(i1 %cond
 define <4 x i32> @sdiv0elt_vec(<4 x i32> %x) {
 ; CHECK-LABEL: sdiv0elt_vec:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movaps {{.*#+}} xmm0 = <u,12,u,4294967292>
 ; CHECK-NEXT:    retq
   %zero = and <4 x i32> %x, <i32 0, i32 0, i32 0, i32 0>
   %some_ones = or <4 x i32> %zero, <i32 0, i32 -1, i32 0, i32 3>
@@ -163,7 +162,6 @@ define <4 x i32> @sdiv0elt_vec(<4 x i32>
 define <4 x i32> @udiv0elt_vec(<4 x i32> %x) {
 ; CHECK-LABEL: udiv0elt_vec:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movaps {{.*#+}} xmm0 = <u,4,3,u>
 ; CHECK-NEXT:    retq
   %div = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, <i32 0, i32 3, i32 4, i32 0>
   ret <4 x i32> %div
@@ -172,7 +170,6 @@ define <4 x i32> @udiv0elt_vec(<4 x i32>
 define <4 x i32> @urem0elt_vec(<4 x i32> %x) {
 ; CHECK-LABEL: urem0elt_vec:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movaps {{.*#+}} xmm0 = <u,u,u,2>
 ; CHECK-NEXT:    retq
   %zero = and <4 x i32> %x, <i32 0, i32 0, i32 0, i32 0>
   %some_ones = or <4 x i32> %zero, <i32 0, i32 0, i32 0, i32 3>
@@ -183,8 +180,6 @@ define <4 x i32> @urem0elt_vec(<4 x i32>
 define <4 x i32> @srem0elt_vec(<4 x i32> %x) {
 ; CHECK-LABEL: srem0elt_vec:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movl $-2, %eax
-; CHECK-NEXT:    movd %eax, %xmm0
 ; CHECK-NEXT:    retq
   %rem = srem <4 x i32> <i32 -11, i32 -12, i32 -13, i32 -14>, <i32 -3, i32 -3, i32 0, i32 2>
   ret <4 x i32> %rem

Modified: llvm/trunk/test/CodeGen/X86/vec_sdiv_to_shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_sdiv_to_shift.ll?rev=297762&r1=297761&r2=297762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_sdiv_to_shift.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_sdiv_to_shift.ll Tue Mar 14 13:06:28 2017
@@ -184,27 +184,15 @@ entry:
   ret <16 x i16> %a0
 }
 
-; TODO: The div-by-0 lanes are folded away, so we use scalar ops. Would it be better to keep this in the vector unit?
+; Div-by-0 in any lane is UB.
 
 define <4 x i32> @sdiv_non_splat(<4 x i32> %x) {
 ; SSE-LABEL: sdiv_non_splat:
 ; SSE:       # BB#0:
-; SSE-NEXT:    movd %xmm0, %eax
-; SSE-NEXT:    movl %eax, %ecx
-; SSE-NEXT:    shrl $31, %ecx
-; SSE-NEXT:    addl %eax, %ecx
-; SSE-NEXT:    sarl %ecx
-; SSE-NEXT:    movd %ecx, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: sdiv_non_splat:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vmovd %xmm0, %eax
-; AVX-NEXT:    movl %eax, %ecx
-; AVX-NEXT:    shrl $31, %ecx
-; AVX-NEXT:    addl %eax, %ecx
-; AVX-NEXT:    sarl %ecx
-; AVX-NEXT:    vmovd %ecx, %xmm0
 ; AVX-NEXT:    retq
   %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0>
   ret <4 x i32> %y