[llvm] r214670 - fix for PR20354 - Miscompile of fabs due to vectorization

Sanjay Patel spatel at rotateright.com
Sun Aug 3 15:48:24 PDT 2014


Author: spatel
Date: Sun Aug  3 17:48:23 2014
New Revision: 214670

URL: http://llvm.org/viewvc/llvm-project?rev=214670&view=rev
Log:
fix for PR20354 - Miscompile of fabs due to vectorization

This is intended to be the minimal change needed to fix PR20354 ( http://llvm.org/bugs/show_bug.cgi?id=20354 ). The check for a vector operation was wrong; we need to check that the fabs itself is not a vector operation.

This patch will not generate the optimal code. A constant pool load and 'and' op will be generated instead of just returning a value that we can calculate in advance (as we do for the scalar case). I've put a 'TODO' comment for that here and expect to have that patch ready soon.

There is a very similar optimization that we can do in visitFNEG, so I've put another 'TODO' there and expect to have another patch for that too.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/X86/vec_fabs.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=214670&r1=214669&r2=214670&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sun Aug  3 17:48:23 2014
@@ -7311,6 +7311,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N
 
   // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
   // constant pool values.
+  // TODO: We can also optimize for vectors here, but we need to make sure
+  // that the sign mask is created properly for each vector element.
   if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
       !VT.isVector() &&
       N0.getNode()->hasOneUse() &&
@@ -7403,10 +7405,12 @@ SDValue DAGCombiner::visitFABS(SDNode *N
 
   // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
   // constant pool values.
+  // TODO: We can also optimize for vectors here, but we need to make sure
+  // that the sign mask is created properly for each vector element.
   if (!TLI.isFAbsFree(VT) &&
       N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
       N0.getOperand(0).getValueType().isInteger() &&
-      !N0.getOperand(0).getValueType().isVector()) {
+      !VT.isVector()) {
     SDValue Int = N0.getOperand(0);
     EVT IntVT = Int.getValueType();
     if (IntVT.isInteger() && !IntVT.isVector()) {

Modified: llvm/trunk/test/CodeGen/X86/vec_fabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fabs.ll?rev=214670&r1=214669&r2=214670&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fabs.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fabs.ll Sun Aug  3 17:48:23 2014
@@ -1,9 +1,9 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
 
 
 define <2 x double> @fabs_v2f64(<2 x double> %p)
 {
-  ; CHECK: fabs_v2f64
+  ; CHECK-LABEL: fabs_v2f64
   ; CHECK: vandps
   %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
   ret <2 x double> %t
@@ -12,7 +12,7 @@ declare <2 x double> @llvm.fabs.v2f64(<2
 
 define <4 x float> @fabs_v4f32(<4 x float> %p)
 {
-  ; CHECK: fabs_v4f32
+  ; CHECK-LABEL: fabs_v4f32
   ; CHECK: vandps
   %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
   ret <4 x float> %t
@@ -21,7 +21,7 @@ declare <4 x float> @llvm.fabs.v4f32(<4
 
 define <4 x double> @fabs_v4f64(<4 x double> %p)
 {
-  ; CHECK: fabs_v4f64
+  ; CHECK-LABEL: fabs_v4f64
   ; CHECK: vandps
   %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
   ret <4 x double> %t
@@ -30,9 +30,31 @@ declare <4 x double> @llvm.fabs.v4f64(<4
 
 define <8 x float> @fabs_v8f32(<8 x float> %p)
 {
-  ; CHECK: fabs_v8f32
+  ; CHECK-LABEL: fabs_v8f32
   ; CHECK: vandps
   %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
   ret <8 x float> %t
 }
 declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
+
+; PR20354: when generating code for a vector fabs op,
+; make sure the correct mask is used for all vector elements.
+; CHECK-LABEL: LCPI4_0
+; CHECK: .long	2147483648
+; CHECK: .long	2147483648
+; CHECK-LABEL: LCPI4_1 
+; CHECK: .long	2147483647
+; CHECK: .long	2147483647
+; CHECK-LABEL: fabs_v2f32_1
+; CHECK: vmovdqa LCPI4_0, %xmm0
+; CHECK: vpand   LCPI4_1, %xmm0, %xmm0
+; CHECK: vmovd   %xmm0, %eax
+; CHECK: vpextrd $1, %xmm0, %edx
+define i64 @fabs_v2f32_1() {
+  %highbits = bitcast i64 9223372039002259456 to <2 x float> ; 0x8000_0000_8000_0000
+  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %highbits)
+  %ret = bitcast <2 x float> %fabs to i64
+  ret i64 %ret
+}
+
+declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p)





More information about the llvm-commits mailing list