[llvm] c872faf - [AMDGPU] Do not generate S_CMP_LG_U64 on gfx7
Piotr Sobczak via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 19 05:45:09 PDT 2020
Author: Piotr Sobczak
Date: 2020-10-19T14:44:31+02:00
New Revision: c872faf6e0913d05ead41975d72eeb9fc085ce2b
URL: https://github.com/llvm/llvm-project/commit/c872faf6e0913d05ead41975d72eeb9fc085ce2b
DIFF: https://github.com/llvm/llvm-project/commit/c872faf6e0913d05ead41975d72eeb9fc085ce2b.diff
LOG: [AMDGPU] Do not generate S_CMP_LG_U64 on gfx7
S_CMP_LG_U64 was added in gfx8 and is guarded by hasScalarCompareEq64().
Rewrite S_CMP_LG_U64 to S_OR_B32 + S_CMP_LG_U32 for targets that
do not support 64-bit scalar compare.
Differential Revision: https://reviews.llvm.org/D89536
Added:
Modified:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 6e28d45aaa2d..01ca19079706 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4006,10 +4006,29 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
Src2.setReg(RegOp2);
}
- if (TRI->getRegSizeInBits(*MRI.getRegClass(Src2.getReg())) == 64) {
- BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
- .addReg(Src2.getReg())
- .addImm(0);
+ const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
+ if (TRI->getRegSizeInBits(*Src2RC) == 64) {
+ if (ST.hasScalarCompareEq64()) {
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
+ .addReg(Src2.getReg())
+ .addImm(0);
+ } else {
+ const TargetRegisterClass *SubRC =
+ TRI->getSubRegClass(Src2RC, AMDGPU::sub0);
+ MachineOperand Src2Sub0 = TII->buildExtractSubRegOrImm(
+ MII, MRI, Src2, Src2RC, AMDGPU::sub0, SubRC);
+ MachineOperand Src2Sub1 = TII->buildExtractSubRegOrImm(
+ MII, MRI, Src2, Src2RC, AMDGPU::sub1, SubRC);
+ Register Src2_32 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_OR_B32), Src2_32)
+ .add(Src2Sub0)
+ .add(Src2Sub1);
+
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U32))
+ .addReg(Src2_32, RegState::Kill)
+ .addImm(0);
+ }
} else {
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMPK_LG_U32))
.addReg(Src2.getReg())
diff --git a/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll b/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
index aad3ea52ab81..4a5039b77dc4 100644
--- a/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
+++ b/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
@@ -1,8 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx704 < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
define i32 @s_add_co_select_user() {
+; GFX7-LABEL: s_add_co_select_user:
+; GFX7: ; %bb.0: ; %bb
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-NEXT: s_load_dword s6, s[4:5], 0x0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e64 v0, s[4:5], s6, s6
+; GFX7-NEXT: s_or_b32 s4, s4, s5
+; GFX7-NEXT: s_cmp_lg_u32 s4, 0
+; GFX7-NEXT: s_addc_u32 s4, s6, 0
+; GFX7-NEXT: v_mov_b32_e32 v1, s4
+; GFX7-NEXT: s_cselect_b64 vcc, 1, 0
+; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX7-NEXT: v_cmp_gt_u32_e64 vcc, s6, 31
+; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: s_add_co_select_user:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -52,6 +70,32 @@ bb:
}
define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
+; GFX7-LABEL: s_add_co_br_user:
+; GFX7: ; %bb.0: ; %bb
+; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_add_i32 s1, s0, s0
+; GFX7-NEXT: v_mov_b32_e32 v0, s0
+; GFX7-NEXT: v_cmp_lt_u32_e32 vcc, s1, v0
+; GFX7-NEXT: s_or_b32 s1, vcc_lo, vcc_hi
+; GFX7-NEXT: s_cmp_lg_u32 s1, 0
+; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7-NEXT: s_addc_u32 s0, s0, 0
+; GFX7-NEXT: v_cmp_ge_u32_e32 vcc, s0, v0
+; GFX7-NEXT: s_and_b64 vcc, exec, vcc
+; GFX7-NEXT: s_cbranch_vccnz BB1_2
+; GFX7-NEXT: ; %bb.1: ; %bb0
+; GFX7-NEXT: v_mov_b32_e32 v0, 0
+; GFX7-NEXT: v_mov_b32_e32 v2, 9
+; GFX7-NEXT: v_mov_b32_e32 v1, 0
+; GFX7-NEXT: flat_store_dword v[0:1], v2
+; GFX7-NEXT: BB1_2: ; %bb1
+; GFX7-NEXT: v_mov_b32_e32 v0, 0
+; GFX7-NEXT: v_mov_b32_e32 v2, 10
+; GFX7-NEXT: v_mov_b32_e32 v1, 0
+; GFX7-NEXT: flat_store_dword v[0:1], v2
+; GFX7-NEXT: s_endpgm
+;
; GFX9-LABEL: s_add_co_br_user:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0
More information about the llvm-commits
mailing list