[PATCH] D28496: [AMDGPU] Implement f16 fcopysign

Mon Jan 9 15:29:34 PST 2017

kzhuravl created this revision.
kzhuravl added reviewers: arsenm, tstellarAMD.
kzhuravl added subscribers: b-sumner, llvm-commits.
Herald added subscribers: tony-tye, yaxunl, nhaehnle, wdng.

https://reviews.llvm.org/D28496

Files:
  lib/Target/AMDGPU/SIISelLowering.cpp
  lib/Target/AMDGPU/SIInstructions.td
  test/CodeGen/AMDGPU/fcopysign.f16.ll


Index: test/CodeGen/AMDGPU/fcopysign.f16.ll
===================================================================

--- test/CodeGen/AMDGPU/fcopysign.f16.ll
+++ test/CodeGen/AMDGPU/fcopysign.f16.ll
@@ -0,0 +1,50 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
+
+declare half @llvm.copysign.f16(half, half) nounwind readnone
+declare <2 x half> @llvm.copysign.v2f16(<2 x half>, <2 x half>) nounwind readnone
+declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>) nounwind readnone
+
+; FUNC-LABEL: {{^}}test_copysign_f16:
+; GCN: buffer_load_ushort v[[VMAG:[0-9]+]]
+; GCN: buffer_load_ushort v[[VSIGN:[0-9]+]]
+; SI:  s_brev_b32 s[[SCONST:[0-9]+]], -2
+; SI:  v_cvt_f32_f16_e32 v[[VMAG_F32:[0-9]+]], v[[VMAG]]
+; SI:  v_cvt_f32_f16_e32 v[[VSIGN_F32:[0-9]+]], v[[VSIGN]]
+; SI:  v_bfi_b32 v[[VDST_F32:[0-9]+]], s[[SCONST]], v[[VMAG_F32]], v[[VSIGN_F32]]
+; SI:  v_cvt_f16_f32_e32 v[[VDST:[0-9]+]], v[[VDST_F32]]
+; VI:  s_movk_i32 s[[SCONST:[0-9]+]], 0x7fff
+; VI:  v_bfi_b32 v[[VDST:[0-9]+]], s[[SCONST]], v[[VMAG]], v[[VSIGN]]
+; GCN: buffer_store_short v[[VDST]]
+; GCN: s_endpgm
+define void @test_copysign_f16(half addrspace(1)* %mag_ptr,
+                               half addrspace(1)* %sign_ptr,
+                               half addrspace(1)* %out) nounwind {
+  %mag = load half, half addrspace(1)* %mag_ptr
+  %sign = load half, half addrspace(1)* %sign_ptr
+  %result = call half @llvm.copysign.f16(half %mag, half %sign)
+  store half %result, half addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_copysign_v2f16:
+; GCN: v_bfi_b32
+; GCN: v_bfi_b32
+; GCN: s_endpgm
+define void @test_copysign_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %mag, <2 x half> %sign) nounwind {
+  %result = call <2 x half> @llvm.copysign.v2f16(<2 x half> %mag, <2 x half> %sign)
+  store <2 x half> %result, <2 x half> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_copysign_v4f16:
+; GCN: v_bfi_b32
+; GCN: v_bfi_b32
+; GCN: v_bfi_b32
+; GCN: v_bfi_b32
+; GCN: s_endpgm
+define void @test_copysign_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %mag, <4 x half> %sign) nounwind {
+  %result = call <4 x half> @llvm.copysign.v4f16(<4 x half> %mag, <4 x half> %sign)
+  store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16
+  ret void
+}
Index: lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- lib/Target/AMDGPU/SIInstructions.td
+++ lib/Target/AMDGPU/SIInstructions.td
@@ -678,6 +678,11 @@
 >;
 
 def : Pat <
+  (fcopysign f16:$src0, f16:$src1),
+  (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1)
+>;
+
+def : Pat <
   (fneg f16:$src),
   (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x00008000)))
 >;
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -310,6 +310,8 @@
     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
     setOperationAction(ISD::FDIV, MVT::f16, Custom);
+    if (!Subtarget->hasBFI())
+      setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
 
     // F16 - VOP3 Actions.
     setOperationAction(ISD::FMA, MVT::f16, Legal);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D28496.83722.patch
Type: text/x-patch
Size: 3504 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170109/989f2892/attachment.bin>