[llvm] r339064 - AMDGPU: Conversions always produce canonical results

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 6 14:51:52 PDT 2018


Author: arsenm
Date: Mon Aug  6 14:51:52 2018
New Revision: 339064

URL: http://llvm.org/viewvc/llvm-project?rev=339064&view=rev
Log:
AMDGPU: Conversions always produce canonical results

Not sure why this was checking for denormals for f16.
My interpretation of the IEEE standard is conversions
should produce a canonical result, and the ISA manual
says denormals are created when appropriate.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=339064&r1=339063&r2=339064&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Mon Aug  6 14:51:52 2018
@@ -6775,16 +6775,11 @@ bool SITargetLowering::isCanonicalized(S
   case ISD::FSQRT:
   case ISD::FDIV:
   case ISD::FREM:
+  case ISD::FP_ROUND:
+  case ISD::FP_EXTEND:
   case AMDGPUISD::FMUL_LEGACY:
   case AMDGPUISD::FMAD_FTZ:
     return true;
-  case ISD::FP_ROUND:
-    return Op.getValueType().getScalarType() != MVT::f16 ||
-           Subtarget->hasFP16Denormals();
-
-  case ISD::FP_EXTEND:
-    return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 ||
-           Subtarget->hasFP16Denormals();
 
   // It can/will be lowered or combined as a bit operation.
   // Need to check their input recursively to handle.

Modified: llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll?rev=339064&r1=339063&r2=339064&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll Mon Aug  6 14:51:52 2018
@@ -215,6 +215,22 @@ define amdgpu_kernel void @test_fold_can
   ret void
 }
 
+; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16_flushf16:
+; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}}
+; GCN-NOT: v_mul
+; GCN-NOT: v_max
+; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
+define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16_flushf16(half addrspace(1)* %arg, float addrspace(1)* %out) #2 {
+  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
+  %load = load half, half addrspace(1)* %gep, align 2
+  %v = fpext half %load to float
+  %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
+  %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
+  store float %canonicalized, float addrspace(1)* %gep2, align 4
+  ret void
+}
+
 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f32_f64:
 ; GCN: v_cvt_f32_f64_e32 [[V:v[0-9]+]], v[{{[0-9:]+}}]
 ; GCN-NOT: v_mul
@@ -233,8 +249,9 @@ define amdgpu_kernel void @test_fold_can
 
 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32:
 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
+; GCN-NOT: v_max
+; GCN-NOT: v_mul
 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
-; GCN-NOT: 1.0
 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float addrspace(1)* %arg, half addrspace(1)* %out) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
@@ -246,6 +263,22 @@ define amdgpu_kernel void @test_fold_can
   ret void
 }
 
+; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32_flushf16:
+; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
+; GCN-NOT: v_max
+; GCN-NOT: v_mul
+; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
+define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32_flushf16(float addrspace(1)* %arg, half addrspace(1)* %out) #2 {
+  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
+  %load = load float, float addrspace(1)* %gep, align 4
+  %v = fptrunc float %load to half
+  %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
+  %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
+  store half %canonicalized, half addrspace(1)* %gep2, align 2
+  ret void
+}
+
 ; GCN-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32:
 ; GCN-DAG: v_cvt_f16_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
 ; VI-DAG: v_cvt_f16_f32_sdwa [[V1:v[0-9]+]], v{{[0-9]+}}
@@ -738,3 +771,4 @@ declare double @llvm.maxnum.f64(double,
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { "no-nans-fp-math"="true" }
+attributes #2 = { "target-features"="-fp64-fp16-denormals" }




More information about the llvm-commits mailing list