[PATCH] D49845: AMDGPU: Conversions always produce canonical results
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 26 04:29:19 PDT 2018
arsenm created this revision.
arsenm added reviewers: rampitec, scanon, b-sumner.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl.
Not sure why this was checking for denormals for f16.
My interpretation of the IEEE standard is conversions
should produce a canonical result, and the ISA manual
says denormals are created when appropriate.
https://reviews.llvm.org/D49845
Files:
lib/Target/AMDGPU/SIISelLowering.cpp
test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
Index: test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
===================================================================
--- test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -215,6 +215,22 @@
ret void
}
+; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16_flushf16:
+; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}}
+; GCN-NOT: v_mul
+; GCN-NOT: v_max
+; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
+define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16_flushf16(half addrspace(1)* %arg, float addrspace(1)* %out) #2 {
+ %id = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
+ %load = load half, half addrspace(1)* %gep, align 2
+ %v = fpext half %load to float
+ %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
+ %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
+ store float %canonicalized, float addrspace(1)* %gep2, align 4
+ ret void
+}
+
; GCN-LABEL: test_fold_canonicalize_fpround_value_f32_f64:
; GCN: v_cvt_f32_f64_e32 [[V:v[0-9]+]], v[{{[0-9:]+}}]
; GCN-NOT: v_mul
@@ -233,8 +249,9 @@
; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32:
; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
+; GCN-NOT: v_max
+; GCN-NOT: v_mul
; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
-; GCN-NOT: 1.0
define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float addrspace(1)* %arg, half addrspace(1)* %out) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
@@ -246,6 +263,22 @@
ret void
}
+; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32_flushf16:
+; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
+; GCN-NOT: v_max
+; GCN-NOT: v_mul
+; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
+define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32_flushf16(float addrspace(1)* %arg, half addrspace(1)* %out) #2 {
+ %id = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
+ %load = load float, float addrspace(1)* %gep, align 4
+ %v = fptrunc float %load to half
+ %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
+ %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
+ store half %canonicalized, half addrspace(1)* %gep2, align 2
+ ret void
+}
+
; GCN-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32:
; GCN-DAG: v_cvt_f16_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
; VI-DAG: v_cvt_f16_f32_sdwa [[V1:v[0-9]+]], v{{[0-9]+}}
@@ -738,3 +771,4 @@
attributes #0 = { nounwind readnone }
attributes #1 = { "no-nans-fp-math"="true" }
+attributes #2 = { "target-features"="-fp64-fp16-denormals" }
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6747,20 +6747,13 @@
case ISD::FSQRT:
case ISD::FDIV:
case ISD::FREM:
- case AMDGPUISD::FMUL_LEGACY:
- case AMDGPUISD::FMAD_FTZ:
- return true;
case ISD::FP_ROUND:
- return Op.getValueType().getScalarType() != MVT::f16 ||
- Subtarget->hasFP16Denormals();
-
case ISD::FP_EXTEND:
- return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 ||
- Subtarget->hasFP16Denormals();
-
case ISD::FP16_TO_FP:
case ISD::FP_TO_FP16:
- return Subtarget->hasFP16Denormals();
+ case AMDGPUISD::FMUL_LEGACY:
+ case AMDGPUISD::FMAD_FTZ:
+ return true;
// It can/will be lowered or combined as a bit operation.
// Need to check their input recursively to handle.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D49845.157462.patch
Type: text/x-patch
Size: 3794 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180726/757d3111/attachment.bin>
More information about the llvm-commits
mailing list