[llvm] r333019 - AMDGPU: Fix v2f16 fneg/fabs pattern
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue May 22 13:13:34 PDT 2018
Author: arsenm
Date: Tue May 22 13:13:34 2018
New Revision: 333019
URL: http://llvm.org/viewvc/llvm-project?rev=333019&view=rev
Log:
AMDGPU: Fix v2f16 fneg/fabs pattern
The integer operation convertion for some reason only happens
if the source is a bitcast from an integer, which happens to
always be the situation when the result is loaded. Add
an additional pattern for when the source operation is really
an FP operation.
Modified:
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=333019&r1=333018&r2=333019&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Tue May 22 13:13:34 2018
@@ -996,6 +996,11 @@ def : GCNPat <
(S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit
>;
+def : GCNPat <
+ (fneg (v2f16 (fabs v2f16:$src))),
+ (S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit
+>;
+
/********** ================== **********/
/********** Immediate Patterns **********/
/********** ================== **********/
Modified: llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.f16.ll?rev=333019&r1=333018&r2=333019&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.f16.ll Tue May 22 13:13:34 2018
@@ -68,8 +68,26 @@ define amdgpu_kernel void @v_fneg_fabs_f
ret void
}
+; GCN-LABEL: {{^}}s_fneg_fabs_v2f16_non_bc_src:
+; GFX9-DAG: s_load_dword [[VAL:s[0-9]+]]
+; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x40003c00
+; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[VAL]], [[K]]
+; GFX9: v_or_b32_e32 [[RESULT:v[0-9]+]], 0x80008000, [[ADD]]
+
+; VI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}}
+define amdgpu_kernel void @s_fneg_fabs_v2f16_non_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) {
+ %add = fadd <2 x half> %in, <half 1.0, half 2.0>
+ %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %add)
+ %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
+ store <2 x half> %fneg.fabs, <2 x half> addrspace(1)* %out
+ ret void
+}
+
; FIXME: single bit op
-; GCN-LABEL: {{^}}s_fneg_fabs_v2f16:
+
+; Combine turns this into integer op when bitcast source (from load)
+
+; GCN-LABEL: {{^}}s_fneg_fabs_v2f16_bc_src:
; CI: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, v{{[0-9]+}}
; CI: v_or_b32_e32 [[OR:v[0-9]+]], v{{[0-9]+}}, [[SHL]]
; CI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, [[OR]]
@@ -77,7 +95,7 @@ define amdgpu_kernel void @v_fneg_fabs_f
; FIXME: Random commute
; VI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000
; GFX9: s_or_b32 s{{[0-9]+}}, 0x80008000, s{{[0-9]+}}
-define amdgpu_kernel void @s_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) {
+define amdgpu_kernel void @s_fneg_fabs_v2f16_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) {
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
%fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
store <2 x half> %fneg.fabs, <2 x half> addrspace(1)* %out
More information about the llvm-commits
mailing list