[PATCH] D47184: AMDGPU: Fix v2f16 fneg/fabs pattern

Tue May 22 01:33:03 PDT 2018

arsenm created this revision.
arsenm added reviewers: rampitec, cfang, kzhuravl.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng.

The integer operation convertion for some reason only happens
if the source is a bitcast from an integer, which happens to
always be the situation when the result is loaded. Add
an additional pattern for when the source operation is really
an FP operation.


https://reviews.llvm.org/D47184

Files:
  lib/Target/AMDGPU/SIInstructions.td
  test/CodeGen/AMDGPU/fneg-fabs.f16.ll


Index: test/CodeGen/AMDGPU/fneg-fabs.f16.ll
===================================================================

--- test/CodeGen/AMDGPU/fneg-fabs.f16.ll
+++ test/CodeGen/AMDGPU/fneg-fabs.f16.ll
@@ -68,16 +68,34 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}s_fneg_fabs_v2f16_non_bc_src:
+; GFX9-DAG: s_load_dword [[VAL:s[0-9]+]]
+; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x40003c00
+; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[VAL]], [[K]]
+; GFX9: v_or_b32_e32 [[RESULT:v[0-9]+]], 0x80008000, [[ADD]]
+
+; VI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}}
+define amdgpu_kernel void @s_fneg_fabs_v2f16_non_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) {
+  %add = fadd <2 x half> %in, <half 1.0, half 2.0>
+  %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %add)
+  %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
+  store <2 x half> %fneg.fabs, <2 x half> addrspace(1)* %out
+  ret void
+}
+
 ; FIXME: single bit op
-; GCN-LABEL: {{^}}s_fneg_fabs_v2f16:
+
+; Combine turns this into integer op when bitcast source (from load)
+
+; GCN-LABEL: {{^}}s_fneg_fabs_v2f16_bc_src:
 ; CI: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, v{{[0-9]+}}
 ; CI: v_or_b32_e32 [[OR:v[0-9]+]], v{{[0-9]+}}, [[SHL]]
 ; CI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, [[OR]]
 
 ; FIXME: Random commute
 ; VI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000
 ; GFX9: s_or_b32 s{{[0-9]+}}, 0x80008000, s{{[0-9]+}}
-define amdgpu_kernel void @s_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) {
+define amdgpu_kernel void @s_fneg_fabs_v2f16_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) {
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
   %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
   store <2 x half> %fneg.fabs, <2 x half> addrspace(1)* %out
Index: lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- lib/Target/AMDGPU/SIInstructions.td
+++ lib/Target/AMDGPU/SIInstructions.td
@@ -996,6 +996,11 @@
   (S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit
 >;
 
+def : GCNPat <
+  (fneg (v2f16 (fabs v2f16:$src))),
+  (S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit
+>;
+
 /********** ================== **********/
 /********** Immediate Patterns **********/
 /********** ================== **********/


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D47184.147958.patch
Type: text/x-patch
Size: 2313 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180522/ac391285/attachment.bin>