[llvm] r239070 - [DAGCombiner] Fix wrong folding of a build_vector into a blend with zero.

Thu Jun 4 12:15:02 PDT 2015

Author: adibiagio
Date: Thu Jun  4 14:15:01 2015
New Revision: 239070

URL: http://llvm.org/viewvc/llvm-project?rev=239070&view=rev
Log:
[DAGCombiner] Fix wrong folding of a build_vector into a blend with zero.

Method 'visitBUILD_VECTOR' in the DAGCombiner knows how to combine a
build_vector of a bunch of extract_vector_elt nodes and constant zero nodes
into a shuffle blend with a zero vector.

However, method 'visitBUILD_VECTOR' forgot that a floating point
build_vector may contain negative zero as well as positive zero.

Example:

define <2 x double> @example(<2 x double> %A) {
entry:
  %0 = extractelement <2 x double> %A, i32 0
  %1 = insertelement <2 x double> undef, double %0, i32 0
  %2 = insertelement <2 x double> %1, double -0.0, i32 1
  ret <2 x double> %2
}

Before this patch, llc (with -mattr=+sse4.1) wrongly generated
  movq   %xmm0, %xmm0  # xmm0 = xmm0[0],zero

So, the sign bit of the negative zero was effectively lost.

This patch fixes the problem by adding explicit checks for positive zero.

With this patch, llc produces the following code for the example above:
  movhpd .LCPI0_0(%rip), %xmm0

where .LCPI0_0 referes to a 'double -0'.

Added:
    llvm/trunk/test/CodeGen/X86/fold-buildvector-bug.ll
Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=239070&r1=239069&r2=239070&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Jun  4 14:15:01 2015
@@ -1587,6 +1587,11 @@ static bool isNullConstant(SDValue V) {
   return Const != nullptr && Const->isNullValue();
 }
 
+static bool isNullFPConstant(SDValue V) {
+  ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
+  return Const != nullptr && Const->isZero() && !Const->isNegative();
+}
+
 static bool isAllOnesConstant(SDValue V) {
   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
   return Const != nullptr && Const->isAllOnesValue();
@@ -11912,9 +11917,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(S
     if (Op.getOpcode() == ISD::UNDEF) continue;
 
     // See if we can combine this build_vector into a blend with a zero vector.
-    if (!VecIn2.getNode() && (isNullConstant(Op) ||
-        (Op.getOpcode() == ISD::ConstantFP &&
-        cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) {
+    if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
       UsesZeroVector = true;
       continue;
     }

Added: llvm/trunk/test/CodeGen/X86/fold-buildvector-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-buildvector-bug.ll?rev=239070&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fold-buildvector-bug.ll (added)
+++ llvm/trunk/test/CodeGen/X86/fold-buildvector-bug.ll Thu Jun  4 14:15:01 2015
@@ -0,0 +1,40 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s
+
+; Verify that the DAGCombiner doesn't wrongly fold a build_vector into a
+; blend with a zero vector if the build_vector contains negative zero.
+;
+; TODO: the codegen for function 'test_negative_zero_1' is sub-optimal.
+; Ideally, we should generate a single shuffle blend operation.
+
+define <4 x float> @test_negative_zero_1(<4 x float> %A) {
+; CHECK-LABEL: test_negative_zero_1:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movapd %xmm0, %xmm1
+; CHECK-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; CHECK-NEXT:    xorps %xmm2, %xmm2
+; CHECK-NEXT:    blendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; CHECK-NEXT:    retq
+entry:
+  %0 = extractelement <4 x float> %A, i32 0
+  %1 = insertelement <4 x float> undef, float %0, i32 0
+  %2 = insertelement <4 x float> %1, float -0.0, i32 1
+  %3 = extractelement <4 x float> %A, i32 2
+  %4 = insertelement <4 x float> %2, float %3, i32 2
+  %5 = insertelement <4 x float> %4, float 0.0, i32 3
+  ret <4 x float> %5
+}
+
+define <2 x double> @test_negative_zero_2(<2 x double> %A) {
+; CHECK-LABEL: test_negative_zero_2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movhpd {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %0 = extractelement <2 x double> %A, i32 0
+  %1 = insertelement <2 x double> undef, double %0, i32 0
+  %2 = insertelement <2 x double> %1, double -0.0, i32 1
+  ret <2 x double> %2
+}