[PATCH] R600: support extension from fp16 to f64
Tim Northover
t.p.northover at gmail.com
Thu Jul 17 05:39:28 PDT 2014
Hi,
I'm in the process of reworking how we handle the __fp16 type slightly. I have larger goals, but the most important immediate one is to perform extensions and truncations in one step so that this C code has IEEE-sensible semantics:
void my_round(double in, __fp16 *out) { *out = in; }
Now, I *think* this is fairly academic as far as OpenCL is concerned (you have to use the vload_half/vstore_half functions to access __fp16 at all times), but I'd like to minimise breakage as far as possible anyway.
As part of this I've made the @llvm.convert.from.fp16 and @llvm.convert.to.fp16 intrinsics polymorphic, and would like to add support for f64 variants in as many places as possible.
For R600, it looks like there is no single-step truncation, or support for intrinsics. This means the truncation will always fail to compile, but the attached patch implements extension correctly by splitting it into two operations: f16 -> f32 -> f64.
Are you happy for me to commit the change?
Cheers.
Tim.
http://reviews.llvm.org/D4557
Files:
lib/Target/R600/SIISelLowering.cpp
test/CodeGen/R600/fp16_to_fp.ll
test/CodeGen/R600/fp32_to_fp16.ll
Index: lib/Target/R600/SIISelLowering.cpp
===================================================================
--- lib/Target/R600/SIISelLowering.cpp
+++ lib/Target/R600/SIISelLowering.cpp
@@ -168,6 +168,8 @@
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
Index: test/CodeGen/R600/fp16_to_fp.ll
===================================================================
--- /dev/null
+++ test/CodeGen/R600/fp16_to_fp.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
+declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
+
+; SI-LABEL: @test_convert_fp16_to_fp32:
+; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
+; SI: V_CVT_F32_F16_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: BUFFER_STORE_DWORD [[RESULT]]
+define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
+ %val = load i16 addrspace(1)* %in, align 2
+ %cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
+ store float %cvt, float addrspace(1)* %out, align 4
+ ret void
+}
+
+
+; SI-LABEL: @test_convert_fp16_to_fp64:
+; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
+; SI: V_CVT_F32_F16_e32 [[RESULT32:v[0-9]+]], [[VAL]]
+; SI: V_CVT_F64_F32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
+define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
+ %val = load i16 addrspace(1)* %in, align 2
+ %cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
+ store double %cvt, double addrspace(1)* %out, align 4
+ ret void
+}
Index: test/CodeGen/R600/fp32_to_fp16.ll
===================================================================
--- test/CodeGen/R600/fp32_to_fp16.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
-
-; SI-LABEL: @test_convert_fp16_to_fp32:
-; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
-; SI: V_CVT_F32_F16_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
-define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
- %val = load i16 addrspace(1)* %in, align 2
- %cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
- store float %cvt, float addrspace(1)* %out, align 4
- ret void
-}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D4557.11572.patch
Type: text/x-patch
Size: 2836 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140717/d612f603/attachment.bin>
More information about the llvm-commits
mailing list