[PATCH] D28784: AMDGPU: Check users in fneg/fabs select combine
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 16 15:21:57 PST 2017
arsenm created this revision.
Herald added a reviewer: tstellarAMD.
Herald added subscribers: tony-tye, yaxunl, nhaehnle, wdng, kzhuravl.
https://reviews.llvm.org/D28784
Files:
lib/Target/AMDGPU/AMDGPUISelLowering.cpp
test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
Index: test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
===================================================================
--- test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
+++ test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
@@ -810,14 +810,12 @@
ret void
}
-; FIXME: This one should fold to rcp
; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32:
; GCN: buffer_load_dword [[X:v[0-9]+]]
-; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[X]]
-; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc
-; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
-; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
+; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[X]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[RCP]], vcc
+; GCN-NEXT: buffer_store_dword [[SELECT]]
define void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
Index: test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
===================================================================
--- test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
+++ test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
@@ -7,10 +7,9 @@
; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_legacy_f32:
; GCN: buffer_load_dword [[X:v[0-9]+]]
-; GCN: v_rcp_legacy_f32_e32 [[RCP:v[0-9]+]], [[X]]
-; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc
-; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
-; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
+; GCN: v_rcp_legacy_f32_e64 [[RCP:v[0-9]+]], -[[X]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[RCP]], vcc
+; GCN-NEXT: buffer_store_dword [[SELECT]]
define void @select_fneg_posk_src_rcp_legacy_f32(i32 %c) #2 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -25,10 +24,9 @@
; GCN-LABEL: {{^}}select_fneg_posk_src_mul_legacy_f32:
; GCN: buffer_load_dword [[X:v[0-9]+]]
-; GCN: v_mul_legacy_f32_e32 [[MUL:v[0-9]+]], 4.0, [[X]]
-; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[MUL]], vcc
-; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
-; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
+; GCN: v_mul_legacy_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]]
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc
+; GCN-NEXT: buffer_store_dword [[SELECT]]
define void @select_fneg_posk_src_mul_legacy_f32(i32 %c) #2 {
%x = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2793,8 +2793,10 @@
SDValue RHS = N.getOperand(2);
EVT VT = N.getValueType();
- if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) ||
- (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) {
+ if (((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) ||
+ (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) &&
+ (!LHS.hasOneUse() || !RHS.hasOneUse() ||
+ allUsesHaveSourceMods(N.getNode()))) {
return distributeOpThroughSelect(DCI, LHS.getOpcode(),
SDLoc(N), Cond, LHS, RHS);
}
@@ -2807,7 +2809,8 @@
// TODO: Support vector constants.
ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
- if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS) {
+ if (CRHS && (LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) &&
+ allUsesHaveSourceMods(N.getNode())) {
SDLoc SL(N);
// If one side is an fneg/fabs and the other is a constant, we can push the
// fneg/fabs down. If it's an fabs, the constant needs to be non-negative.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D28784.84604.patch
Type: text/x-patch
Size: 3859 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170116/bf904569/attachment.bin>
More information about the llvm-commits
mailing list