[llvm] r293127 - AMDGPU: Fold fneg into round instructions
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 25 17:25:36 PST 2017
Author: arsenm
Date: Wed Jan 25 19:25:36 2017
New Revision: 293127
URL: http://llvm.org/viewvc/llvm-project?rev=293127&view=rev
Log:
AMDGPU: Fold fneg into round instructions
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll
llvm/trunk/test/CodeGen/AMDGPU/frem.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=293127&r1=293126&r2=293127&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Wed Jan 25 19:25:36 2017
@@ -492,6 +492,9 @@ static bool fnegFoldsIntoOp(unsigned Opc
case ISD::FMA:
case ISD::FMAD:
case ISD::FSIN:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::SIN_HW:
@@ -2924,9 +2927,12 @@ SDValue AMDGPUTargetLowering::performFNe
return Res;
}
case ISD::FP_EXTEND:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT: // XXX - Should fround be handled?
+ case ISD::FSIN:
case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:
- case ISD::FSIN:
case AMDGPUISD::SIN_HW: {
SDValue CvtSrc = N0.getOperand(0);
if (CvtSrc.getOpcode() == ISD::FNEG) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll?rev=293127&r1=293126&r2=293127&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll Wed Jan 25 19:25:36 2017
@@ -1327,7 +1327,91 @@ define void @v_fneg_amdgcn_sin_f32(float
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%sin = call float @llvm.amdgcn.sin.f32(float %a)
- %fneg = fsub float -0.000000e+00, %sin
+ %fneg = fsub float -0.0, %sin
+ store float %fneg, float addrspace(1)* %out.gep
+ ret void
+}
+
+; --------------------------------------------------------------------------------
+; ftrunc tests
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}v_fneg_trunc_f32:
+; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
+; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @v_fneg_trunc_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load volatile float, float addrspace(1)* %a.gep
+ %trunc = call float @llvm.trunc.f32(float %a)
+ %fneg = fsub float -0.0, %trunc
+ store float %fneg, float addrspace(1)* %out.gep
+ ret void
+}
+
+; --------------------------------------------------------------------------------
+; fround tests
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}v_fneg_round_f32:
+; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
+; GCN: v_trunc_f32_e32
+; GCN: v_subrev_f32_e32
+; GCN: v_cndmask_b32
+; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, v{{[0-9]+}}
+; GCN: buffer_store_dword [[RESULT]]
+define void @v_fneg_round_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load volatile float, float addrspace(1)* %a.gep
+ %round = call float @llvm.round.f32(float %a)
+ %fneg = fsub float -0.0, %round
+ store float %fneg, float addrspace(1)* %out.gep
+ ret void
+}
+
+; --------------------------------------------------------------------------------
+; rint tests
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}v_fneg_rint_f32:
+; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
+; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @v_fneg_rint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load volatile float, float addrspace(1)* %a.gep
+ %rint = call float @llvm.rint.f32(float %a)
+ %fneg = fsub float -0.0, %rint
+ store float %fneg, float addrspace(1)* %out.gep
+ ret void
+}
+
+; --------------------------------------------------------------------------------
+; nearbyint tests
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}v_fneg_nearbyint_f32:
+; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
+; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load volatile float, float addrspace(1)* %a.gep
+ %nearbyint = call float @llvm.nearbyint.f32(float %a)
+ %fneg = fsub float -0.0, %nearbyint
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@@ -1336,6 +1420,10 @@ declare i32 @llvm.amdgcn.workitem.id.x()
declare float @llvm.fma.f32(float, float, float) #1
declare float @llvm.fmuladd.f32(float, float, float) #1
declare float @llvm.sin.f32(float) #1
+declare float @llvm.trunc.f32(float) #1
+declare float @llvm.round.f32(float) #1
+declare float @llvm.rint.f32(float) #1
+declare float @llvm.nearbyint.f32(float) #1
declare float @llvm.amdgcn.sin.f32(float) #1
declare float @llvm.amdgcn.rcp.f32(float) #1
Modified: llvm/trunk/test/CodeGen/AMDGPU/frem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/frem.ll?rev=293127&r1=293126&r2=293127&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/frem.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/frem.ll Wed Jan 25 19:25:36 2017
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}frem_f32:
; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
@@ -12,8 +12,8 @@
; GCN: v_mul_f32_e32
; GCN: v_div_fmas_f32
; GCN: v_div_fixup_f32
-; GCN: v_trunc_f32_e32
-; GCN: v_mad_f32
+; GCN: v_trunc_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}
+; GCN: v_mac_f32_e32
; GCN: s_endpgm
define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2) #0 {
@@ -28,11 +28,11 @@ define void @frem_f32(float addrspace(1)
; FUNC-LABEL: {{^}}unsafe_frem_f32:
; GCN: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
; GCN: buffer_load_dword [[X:v[0-9]+]], {{.*}}
-; GCN: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]]
+; GCN: v_rcp_f32_e64 [[INVY:v[0-9]+]], -[[Y]]
; GCN: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
; GCN: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
-; GCN: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
-; GCN: buffer_store_dword [[RESULT]]
+; GCN: v_mac_f32_e32 [[X]], [[Y]], [[TRUNC]]
+; GCN: buffer_store_dword [[X]]
; GCN: s_endpgm
define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2) #1 {
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll?rev=293127&r1=293126&r2=293127&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll Wed Jan 25 19:25:36 2017
@@ -46,8 +46,8 @@ entry:
; GCN-LABEL: {{^}}class_f16_fneg
; GCN: s_load_dword s[[SA_F16:[0-9]+]]
; GCN: s_load_dword s[[SB_I32:[0-9]+]]
-; VI: v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]]
-; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -v[[VA_F16]], s[[SB_I32]]
+; VI: v_trunc_f16_e64 v[[VA_F16:[0-9]+]], -s[[SA_F16]]
+; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], v[[VA_F16]], s[[SB_I32]]
; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
; GCN: buffer_store_dword v[[VR_I32]]
; GCN: s_endpgm
More information about the llvm-commits
mailing list