[llvm] fce1a6f - Revert "AMDGPU: Try to commute sub of boolean ext"
Tim Renouf via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 13 04:49:41 PST 2019
Author: Tim Renouf
Date: 2019-12-13T12:49:06Z
New Revision: fce1a6f5848d644d56ab72d5bac71aa8053f8f2e
URL: https://github.com/llvm/llvm-project/commit/fce1a6f5848d644d56ab72d5bac71aa8053f8f2e
DIFF: https://github.com/llvm/llvm-project/commit/fce1a6f5848d644d56ab72d5bac71aa8053f8f2e.diff
LOG: Revert "AMDGPU: Try to commute sub of boolean ext"
This reverts commit 69fcfb7d3597e0cdb5554b4e672e9032b411b167.
As shown in the test I attached to this commit, the change I reverted
causes a problem with "zext(cc1) - zext(cc2)". It commuted
the operands to the sub and used different logic to select the addc/subc
instruction:
sub zext (setcc), x => addcarry 0, x, setcc
sub sext (setcc), x => subcarry 0, x, setcc
... but that is bogus. I believe it is not possible to fold those commuted
patterns into any form of addcarry or subcarry. It may have worked as
intended before "AMDGPU: Change boolean content type to 0 or 1" because
the setcc was considered to be -1 rather than 1.
Differential Revision: https://reviews.llvm.org/D70978
Change-Id: If2139421aa6c935cbd1d925af58fe4a4aa9e8f43
Added:
llvm/test/CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll
Modified:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 77d3da169cab..8c2012204450 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9622,16 +9622,7 @@ SDValue SITargetLowering::performSubCombine(SDNode *N,
// sub x, zext (setcc) => subcarry x, 0, setcc
// sub x, sext (setcc) => addcarry x, 0, setcc
-
- bool Commuted = false;
- unsigned Opc = LHS.getOpcode();
- if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND ||
- Opc == ISD::ANY_EXTEND) {
- std::swap(RHS, LHS);
- Commuted = true;
- }
-
- Opc = RHS.getOpcode();
+ unsigned Opc = RHS.getOpcode();
switch (Opc) {
default: break;
case ISD::ZERO_EXTEND:
@@ -9643,22 +9634,8 @@ SDValue SITargetLowering::performSubCombine(SDNode *N,
if (!isBoolSGPR(Cond))
break;
SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
- SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
- SDValue Args[3];
- Args[2] = Cond;
-
- if (Commuted) {
- // sub zext (setcc), x => addcarry 0, x, setcc
- // sub sext (setcc), x => subcarry 0, x, setcc
- Args[0] = Zero;
- Args[1] = LHS;
- Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::SUBCARRY : ISD::ADDCARRY;
- } else {
- Args[0] = LHS;
- Args[1] = Zero;
- Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::ADDCARRY : ISD::SUBCARRY;
- }
-
+ SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
+ Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::ADDCARRY : ISD::SUBCARRY;
return DAG.getNode(Opc, SL, VTList, Args);
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
index 37ae4d2d5c28..617f280e046e 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
@@ -130,14 +130,18 @@ bb:
; GCN-LABEL: {{^}}sub_sube_commuted:
; GCN-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-DAG: buffer_load_dword [[V:v[0-9]+]],
-; GCN: v_addc_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
-; GCN: v_add_i32_e32 {{.*}}, 0x64, [[ADDC]]
+; GCN: v_cndmask_b32_e64 [[CCZEXT:v[0-9]+]], 0, 1, [[CC]]
+; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, [[CCZEXT]], v4
+; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, {{.*}}, [[SUB]]
+; GCN: v_add_i32_e32 {{.*}}, 0x64, [[ADD]]
; GFX9-LABEL: {{^}}sub_sube_commuted:
; GFX9-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GFX9-DAG: global_load_dword [[V:v[0-9]+]],
-; GFX9: v_addc_co_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
-; GFX9: v_add_u32_e32 {{.*}}, 0x64, [[ADDC]]
+; GFX9-DAG: v_cndmask_b32_e64 [[CCZEXT:v[0-9]+]], 0, 1, [[CC]]
+; GFX9: v_sub_u32_e32 {{.*}}, [[CCZEXT]]
+; GFX9: v_add_u32_e32
+; GFX9: v_add_u32_e32 {{.*}}, 0x64,
define amdgpu_kernel void @sub_sube_commuted(i32 addrspace(1)* nocapture %arg, i32 %a) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -312,9 +316,10 @@ bb:
; Check case where sub is commuted with zext
; GCN-LABEL: {{^}}sub_zext_setcc_commute:
; GCN: v_cmp_gt_u32_e32 vcc, v
-; GCN-NOT: vcc
-; GCN: v_addc_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
-; GCN: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, s{{[0-9]+}}, [[ADDC]]
+; GCN: v_cndmask
+; GCN: v_sub_i32_e32
+; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc,
+; GCN: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, s{{[0-9]+}}, [[ADD]]
define amdgpu_kernel void @sub_zext_setcc_commute(i32 addrspace(1)* nocapture %arg, i32 %a, i32%b) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -333,9 +338,9 @@ bb:
; Check case where sub is commuted with sext
; GCN-LABEL: {{^}}sub_sext_setcc_commute:
; GCN: v_cmp_gt_u32_e32 vcc, v
-; GCN-NOT: vcc
-; GCN: v_subb_u32_e32 [[SUBB:v[0-9]+]], vcc, 0, v{{[0-9]+}}, vcc
-; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[SUBB]]
+; GCN: v_cndmask
+; GCN: v_sub_i32_e32
+; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc,
; GCN: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, s{{[0-9]+}}, [[ADD]]
define amdgpu_kernel void @sub_sext_setcc_commute(i32 addrspace(1)* nocapture %arg, i32 %a, i32%b) {
bb:
diff --git a/llvm/test/CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll b/llvm/test/CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll
new file mode 100644
index 000000000000..911677478d51
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs <%s | FileCheck -check-prefixes=GCN %s
+;
+; This test checks that we have the correct fold for zext(cc1) - zext(cc2).
+;
+; GCN-LABEL: sub_zext_zext:
+; GCN: ds_read_b32 [[VAL:v[0-9]+]],
+; GCN-DAG: v_cmp_lt_f32{{.*}} [[CC1:s\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
+; GCN-DAG: v_cmp_gt_f32{{.*}} vcc, 0, [[VAL]]
+; GCN: v_cndmask_{{.*}} [[ZEXTCC1:v[0-9]+]], 0, 1, [[CC1]]
+; GCN: v_subbrev{{.*}} {{v[0-9]+}}, vcc, 0, [[ZEXTCC1]], vcc
+;
+; Before the reversion that this test is attached to, the compiler commuted
+; the operands to the sub and used
diff erent logic to select the addc/subc
+; instruction:
+; sub zext (setcc), x => addcarry 0, x, setcc
+; sub sext (setcc), x => subcarry 0, x, setcc
+;
+; ... but that is bogus. I believe it is not possible to fold those commuted
+; patterns into any form of addcarry or subcarry.
+
+define amdgpu_cs float @sub_zext_zext() {
+.entry:
+
+ %t519 = load float, float addrspace(3)* null
+
+ %t524 = fcmp ogt float %t519, 0.000000e+00
+ %t525 = fcmp olt float %t519, 0.000000e+00
+ %t526 = zext i1 %t524 to i32
+ %t527 = zext i1 %t525 to i32
+ %t528 = sub nsw i32 %t526, %t527
+ %t529 = sitofp i32 %t528 to float
+ ret float %t529
+}
+
More information about the llvm-commits
mailing list