[llvm] r290300 - AMDGPU: Implement f16 fcanonicalize

Wed Dec 21 19:05:37 PST 2016

Author: arsenm
Date: Wed Dec 21 21:05:37 2016
New Revision: 290300

URL: http://llvm.org/viewvc/llvm-project?rev=290300&view=rev
Log:
AMDGPU: Implement f16 fcanonicalize

Added:
    llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstructions.td

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td?rev=290300&r1=290299&r2=290300&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td Wed Dec 21 21:05:37 2016
@@ -391,6 +391,7 @@ int TWO_PI = 0x40c90fdb;
 int PI = 0x40490fdb;
 int TWO_PI_INV = 0x3e22f983;
 int FP_UINT_MAX_PLUS_1 = 0x4f800000;    // 1 << 32 in floating point encoding
+int FP16_ONE = 0x3C00;
 int FP32_ONE = 0x3f800000;
 int FP32_NEG_ONE = 0xbf800000;
 int FP64_ONE = 0x3ff0000000000000;

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=290300&r1=290299&r2=290300&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Dec 21 21:05:37 2016
@@ -3648,6 +3648,9 @@ SDValue SITargetLowering::performFCanoni
 
     if (VT == MVT::f64 && !Subtarget->hasFP64Denormals())
       return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
+    if (VT == MVT::f16 && !Subtarget->hasFP16Denormals())
+      return DAG.getConstantFP(0.0, SDLoc(N), VT);
   }
 
   if (C.isNaN()) {

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=290300&r1=290299&r2=290300&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Wed Dec 21 21:05:37 2016
@@ -1022,6 +1022,11 @@ defm : BFMPatterns <i32, S_BFM_B32, S_MO
 def : BFEPattern <V_BFE_U32, S_MOV_B32>;
 
 def : Pat<
+  (fcanonicalize f16:$src),
+  (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), 0, $src, 0, 0)
+>;
+
+def : Pat<
   (fcanonicalize f32:$src),
   (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), 0, $src, 0, 0)
 >;

Added: llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.f16.ll?rev=290300&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.f16.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/fcanonicalize.f16.ll Wed Dec 21 21:05:37 2016
@@ -0,0 +1,172 @@
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+declare half @llvm.canonicalize.f16(half) #0
+
+; GCN-LABEL: {{^}}v_test_canonicalize_var_f16:
+; GCN: v_mul_f16_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
+; GCN: buffer_store_short [[REG]]
+define void @v_test_canonicalize_var_f16(half addrspace(1)* %out) #1 {
+  %val = load half, half addrspace(1)* %out
+  %canonicalized = call half @llvm.canonicalize.f16(half %val)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}s_test_canonicalize_var_f16:
+; GCN: v_mul_f16_e64 [[REG:v[0-9]+]], 1.0, {{s[0-9]+}}
+; GCN: buffer_store_short [[REG]]
+define void @s_test_canonicalize_var_f16(half addrspace(1)* %out, i16 zeroext %val.arg) #1 {
+  %val = bitcast i16 %val.arg to half
+  %canonicalized = call half @llvm.canonicalize.f16(half %val)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @test_fold_canonicalize_p0_f16(half addrspace(1)* %out) #1 {
+  %canonicalized = call half @llvm.canonicalize.f16(half 0.0)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @test_fold_canonicalize_n0_f16(half addrspace(1)* %out) #1 {
+  %canonicalized = call half @llvm.canonicalize.f16(half -0.0)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @test_fold_canonicalize_p1_f16(half addrspace(1)* %out) #1 {
+  %canonicalized = call half @llvm.canonicalize.f16(half 1.0)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @test_fold_canonicalize_n1_f16(half addrspace(1)* %out) #1 {
+  %canonicalized = call half @llvm.canonicalize.f16(half -1.0)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c00{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @test_fold_canonicalize_literal_f16(half addrspace(1)* %out) #1 {
+  %canonicalized = call half @llvm.canonicalize.f16(half 16.0)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @test_no_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 {
+  %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @test_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #3 {
+  %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @test_no_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 {
+  %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @test_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #3 {
+  %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c00{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @test_fold_canonicalize_qnan_f16(half addrspace(1)* %out) #1 {
+  %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C00)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @test_fold_canonicalize_qnan_value_neg1_f16(half addrspace(1)* %out) #1 {
+  %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -1 to half))
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @test_fold_canonicalize_qnan_value_neg2_f16(half addrspace(1)* %out) #1 {
+  %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -2 to half))
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @test_fold_canonicalize_snan0_value_f16(half addrspace(1)* %out) #1 {
+  %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C01)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @test_fold_canonicalize_snan1_value_f16(half addrspace(1)* %out) #1 {
+  %canonicalized = call half @llvm.canonicalize.f16(half 0xH7DFF)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @test_fold_canonicalize_snan2_value_f16(half addrspace(1)* %out) #1 {
+  %canonicalized = call half @llvm.canonicalize.f16(half 0xHFDFF)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @test_fold_canonicalize_snan3_value_f16(half addrspace(1)* %out) #1 {
+  %canonicalized = call half @llvm.canonicalize.f16(half 0xHFC01)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind "target-features"="-fp16-denormals,-fp16-denormals" }
+attributes #3 = { nounwind "target-features"="+fp16-denormals,+fp64-denormals" }