[llvm] fce1a6f - Revert "AMDGPU: Try to commute sub of boolean ext"

Fri Dec 13 04:49:41 PST 2019

Author: Tim Renouf
Date: 2019-12-13T12:49:06Z
New Revision: fce1a6f5848d644d56ab72d5bac71aa8053f8f2e

URL: https://github.com/llvm/llvm-project/commit/fce1a6f5848d644d56ab72d5bac71aa8053f8f2e
DIFF: https://github.com/llvm/llvm-project/commit/fce1a6f5848d644d56ab72d5bac71aa8053f8f2e.diff

LOG: Revert "AMDGPU: Try to commute sub of boolean ext"

This reverts commit 69fcfb7d3597e0cdb5554b4e672e9032b411b167.

As shown in the test I attached to this commit, the change I reverted
causes a problem with "zext(cc1) - zext(cc2)". It commuted
the operands to the sub and used different logic to select the addc/subc
instruction:
   sub zext (setcc), x => addcarry 0, x, setcc
   sub sext (setcc), x => subcarry 0, x, setcc

... but that is bogus. I believe it is not possible to fold those commuted
patterns into any form of addcarry or subcarry. It may have worked as
intended before "AMDGPU: Change boolean content type to 0 or 1" because
the setcc was considered to be -1 rather than 1.

Differential Revision: https://reviews.llvm.org/D70978

Change-Id: If2139421aa6c935cbd1d925af58fe4a4aa9e8f43

Added: 
    llvm/test/CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 77d3da169cab..8c2012204450 100644

--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9622,16 +9622,7 @@ SDValue SITargetLowering::performSubCombine(SDNode *N,
 
   // sub x, zext (setcc) => subcarry x, 0, setcc
   // sub x, sext (setcc) => addcarry x, 0, setcc
-
-  bool Commuted = false;
-  unsigned Opc = LHS.getOpcode();
-  if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND ||
-      Opc == ISD::ANY_EXTEND) {
-    std::swap(RHS, LHS);
-    Commuted = true;
-  }
-
-  Opc = RHS.getOpcode();
+  unsigned Opc = RHS.getOpcode();
   switch (Opc) {
   default: break;
   case ISD::ZERO_EXTEND:
@@ -9643,22 +9634,8 @@ SDValue SITargetLowering::performSubCombine(SDNode *N,
     if (!isBoolSGPR(Cond))
       break;
     SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
-    SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
-    SDValue Args[3];
-    Args[2] = Cond;
-
-    if (Commuted) {
-      // sub zext (setcc), x => addcarry 0, x, setcc
-      // sub sext (setcc), x => subcarry 0, x, setcc
-      Args[0] = Zero;
-      Args[1] = LHS;
-      Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::SUBCARRY : ISD::ADDCARRY;
-    } else {
-      Args[0] = LHS;
-      Args[1] = Zero;
-      Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::ADDCARRY : ISD::SUBCARRY;
-    }
-
+    SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
+    Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::ADDCARRY : ISD::SUBCARRY;
     return DAG.getNode(Opc, SL, VTList, Args);
   }
   }

diff  --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
index 37ae4d2d5c28..617f280e046e 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
@@ -130,14 +130,18 @@ bb:
 ; GCN-LABEL: {{^}}sub_sube_commuted:
 ; GCN-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-DAG: buffer_load_dword [[V:v[0-9]+]],
-; GCN: v_addc_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
-; GCN: v_add_i32_e32 {{.*}}, 0x64, [[ADDC]]
+; GCN: v_cndmask_b32_e64 [[CCZEXT:v[0-9]+]], 0, 1, [[CC]]
+; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, [[CCZEXT]], v4
+; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, {{.*}}, [[SUB]]
+; GCN: v_add_i32_e32 {{.*}}, 0x64, [[ADD]]
 
 ; GFX9-LABEL: {{^}}sub_sube_commuted:
 ; GFX9-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
 ; GFX9-DAG: global_load_dword [[V:v[0-9]+]],
-; GFX9: v_addc_co_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
-; GFX9: v_add_u32_e32 {{.*}}, 0x64, [[ADDC]]
+; GFX9-DAG: v_cndmask_b32_e64 [[CCZEXT:v[0-9]+]], 0, 1, [[CC]]
+; GFX9: v_sub_u32_e32 {{.*}}, [[CCZEXT]]
+; GFX9: v_add_u32_e32
+; GFX9: v_add_u32_e32 {{.*}}, 0x64,
 define amdgpu_kernel void @sub_sube_commuted(i32 addrspace(1)* nocapture %arg, i32 %a) {
 bb:
   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -312,9 +316,10 @@ bb:
 ; Check case where sub is commuted with zext
 ; GCN-LABEL: {{^}}sub_zext_setcc_commute:
 ; GCN: v_cmp_gt_u32_e32 vcc, v
-; GCN-NOT: vcc
-; GCN: v_addc_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
-; GCN: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, s{{[0-9]+}}, [[ADDC]]
+; GCN: v_cndmask
+; GCN: v_sub_i32_e32
+; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc,
+; GCN: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, s{{[0-9]+}}, [[ADD]]
 define amdgpu_kernel void @sub_zext_setcc_commute(i32 addrspace(1)* nocapture %arg, i32 %a, i32%b) {
 bb:
   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -333,9 +338,9 @@ bb:
 ; Check case where sub is commuted with sext
 ; GCN-LABEL: {{^}}sub_sext_setcc_commute:
 ; GCN: v_cmp_gt_u32_e32 vcc, v
-; GCN-NOT: vcc
-; GCN: v_subb_u32_e32 [[SUBB:v[0-9]+]], vcc, 0, v{{[0-9]+}}, vcc
-; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[SUBB]]
+; GCN: v_cndmask
+; GCN: v_sub_i32_e32
+; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc,
 ; GCN: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, s{{[0-9]+}}, [[ADD]]
 define amdgpu_kernel void @sub_sext_setcc_commute(i32 addrspace(1)* nocapture %arg, i32 %a, i32%b) {
 bb:

diff  --git a/llvm/test/CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll b/llvm/test/CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll
new file mode 100644
index 000000000000..911677478d51
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs <%s | FileCheck -check-prefixes=GCN %s
+;
+; This test checks that we have the correct fold for zext(cc1) - zext(cc2).
+;
+; GCN-LABEL: sub_zext_zext:
+; GCN: ds_read_b32 [[VAL:v[0-9]+]],
+; GCN-DAG: v_cmp_lt_f32{{.*}} [[CC1:s\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
+; GCN-DAG: v_cmp_gt_f32{{.*}} vcc, 0, [[VAL]]
+; GCN: v_cndmask_{{.*}} [[ZEXTCC1:v[0-9]+]], 0, 1, [[CC1]]
+; GCN: v_subbrev{{.*}} {{v[0-9]+}}, vcc, 0, [[ZEXTCC1]], vcc
+;
+; Before the reversion that this test is attached to, the compiler commuted
+; the operands to the sub and used 
diff erent logic to select the addc/subc
+; instruction:
+;    sub zext (setcc), x => addcarry 0, x, setcc
+;    sub sext (setcc), x => subcarry 0, x, setcc
+;
+; ... but that is bogus. I believe it is not possible to fold those commuted
+; patterns into any form of addcarry or subcarry.
+
+define amdgpu_cs float @sub_zext_zext() {
+.entry:
+
+  %t519 = load float, float addrspace(3)* null
+
+  %t524 = fcmp ogt float %t519, 0.000000e+00
+  %t525 = fcmp olt float %t519, 0.000000e+00
+  %t526 = zext i1 %t524 to i32
+  %t527 = zext i1 %t525 to i32
+  %t528 = sub nsw i32 %t526, %t527
+  %t529 = sitofp i32 %t528 to float
+  ret float %t529
+}
+