[PATCH] D17221: Don't combine fp_round (fp_round x) if f80 to f16 is generated
Pirama Arumuga Nainar via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 12 16:12:24 PST 2016
This revision was automatically updated to reflect the committed changes.
Closed by commit rL260769: Don't combine fp_round (fp_round x) if f80 to f16 is generated (authored by pirama).
Changed prior to commit:
http://reviews.llvm.org/D17221?vs=47869&id=47877#toc
Repository:
rL LLVM
http://reviews.llvm.org/D17221
Files:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/half.ll
Index: llvm/trunk/test/CodeGen/X86/half.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/half.ll
+++ llvm/trunk/test/CodeGen/X86/half.ll
@@ -2,6 +2,8 @@
; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -asm-verbose=false \
; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-F16C
+; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -asm-verbose=false \
+; RUN: | FileCheck %s -check-prefix=CHECK-I686
define void @test_load_store(half* %in, half* %out) {
; CHECK-LABEL: test_load_store:
@@ -260,4 +262,17 @@
ret void
}
+declare float @test_floatret();
+
+; On i686, if SSE2 is available, the return value from test_floatret is loaded
+; to f80 and then rounded to f32. The DAG combiner should not combine this
+; fp_round and the subsequent fptrunc from float to half.
+define half @test_f80trunc_nodagcombine() #0 {
+; CHECK-LABEL: test_f80trunc_nodagcombine:
+; CHECK-I686-NOT: calll __truncxfhf2
+ %1 = call float @test_floatret()
+ %2 = fptrunc float %1 to half
+ ret half %2
+}
+
attributes #0 = { nounwind }
Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9019,6 +9019,17 @@
if (N0.getOpcode() == ISD::FP_ROUND) {
const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
+
+ // Skip this folding if it results in an fp_round from f80 to f16.
+ //
+ // f80 to f16 always generates an expensive (and as yet, unimplemented)
+ // libcall to __truncxfhf2 instead of selecting native f16 conversion
+ // instructions from f32 or f64. Moreover, the first (value-preserving)
+ // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
+ // x86.
+ if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
+ return SDValue();
+
// If the first fp_round isn't a value preserving truncation, it might
// introduce a tie in the second fp_round, that wouldn't occur in the
// single-step fp_round we want to fold to.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D17221.47877.patch
Type: text/x-patch
Size: 2369 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160213/3a375a50/attachment.bin>
More information about the llvm-commits
mailing list