[llvm] 3a312c3 - [AMDGPU][GlobalISel] Refactor selectDS1Addr1Offset/selectDS64Bit4ByteAligned
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 11 16:59:15 PST 2020
Author: Austin Kerbow
Date: 2020-02-11T16:57:13-08:00
New Revision: 3a312c3ee5f946ca18fb87d25f6f32fd5dc755ac
URL: https://github.com/llvm/llvm-project/commit/3a312c3ee5f946ca18fb87d25f6f32fd5dc755ac
DIFF: https://github.com/llvm/llvm-project/commit/3a312c3ee5f946ca18fb87d25f6f32fd5dc755ac.diff
LOG: [AMDGPU][GlobalISel] Refactor selectDS1Addr1Offset/selectDS64Bit4ByteAligned
Differential Revision: https://reviews.llvm.org/D74261
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 600988a8080f..935db23da635 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2374,18 +2374,16 @@ AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(MachineOperand &Root) const
return std::make_pair(Root.getReg(), 0);
int64_t ConstAddr = 0;
- if (isBaseWithConstantOffset(Root, *MRI)) {
- const MachineOperand &LHS = RootDef->getOperand(1);
- const MachineOperand &RHS = RootDef->getOperand(2);
- const MachineInstr *LHSDef = MRI->getVRegDef(LHS.getReg());
- const MachineInstr *RHSDef = MRI->getVRegDef(RHS.getReg());
- if (LHSDef && RHSDef) {
- int64_t PossibleOffset =
- RHSDef->getOperand(1).getCImm()->getSExtValue();
- if (isDSOffsetLegal(LHS.getReg(), PossibleOffset, 16)) {
- // (add n0, c0)
- return std::make_pair(LHS.getReg(), PossibleOffset);
- }
+
+ Register PtrBase;
+ int64_t Offset;
+ std::tie(PtrBase, Offset) =
+ getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
+
+ if (Offset) {
+ if (isDSOffsetLegal(PtrBase, Offset, 16)) {
+ // (add n0, c0)
+ return std::make_pair(PtrBase, Offset);
}
} else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
// TODO
@@ -2401,7 +2399,6 @@ AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(MachineOperand &Root) const
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
-
Register Reg;
unsigned Offset;
std::tie(Reg, Offset) = selectDS1Addr1OffsetImpl(Root);
@@ -2413,19 +2410,26 @@ AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(MachineOperand &Root) const {
+ Register Reg;
+ unsigned Offset;
+ std::tie(Reg, Offset) = selectDS64Bit4ByteAlignedImpl(Root);
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset+1); }
+ }};
+}
+
+std::pair<Register, unsigned>
+AMDGPUInstructionSelector::selectDS64Bit4ByteAlignedImpl(MachineOperand &Root) const {
const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg());
- if (!RootDef) {
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(1); }
- }};
- }
+ if (!RootDef)
+ return std::make_pair(Root.getReg(), 0);
int64_t ConstAddr = 0;
+
Register PtrBase;
int64_t Offset;
-
std::tie(PtrBase, Offset) =
getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
@@ -2434,11 +2438,7 @@ AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(MachineOperand &Root) const
int64_t DWordOffset1 = DWordOffset0 + 1;
if (isDSOffsetLegal(PtrBase, DWordOffset1, 8)) {
// (add n0, c0)
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrBase); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(DWordOffset0); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(DWordOffset1); }
- }};
+ return std::make_pair(PtrBase, DWordOffset0);
}
} else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
// TODO
@@ -2448,11 +2448,7 @@ AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(MachineOperand &Root) const
}
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(1); }
- }};
+ return std::make_pair(Root.getReg(), 0);
}
/// If \p Root is a G_PTR_ADD with a G_CONSTANT on the right hand side, return
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 38b76a76299f..d440932c72dd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -175,10 +175,12 @@ class AMDGPUInstructionSelector : public InstructionSelector {
unsigned OffsetBits) const;
std::pair<Register, unsigned>
- selectDS1Addr1OffsetImpl(MachineOperand &Src) const;
-
+ selectDS1Addr1OffsetImpl(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectDS1Addr1Offset(MachineOperand &Root) const;
+
+ std::pair<Register, unsigned>
+ selectDS64Bit4ByteAlignedImpl(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectDS64Bit4ByteAligned(MachineOperand &Root) const;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
index 32bef20c9ffd..3cad9f11bdba 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
@@ -1141,10 +1141,9 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32
; CI-NEXT: v_add_i32_e32 v2, vcc, 2, v0
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CI-NEXT: v_add_i32_e32 v0, vcc, 0, v0
-; CI-NEXT: v_add_i32_e32 v0, vcc, 8, v0
; CI-NEXT: v_mov_b32_e32 v1, 9
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_dec_rtn_u32 v3, v0, v1
+; CI-NEXT: ds_dec_rtn_u32 v3, v0, v1 offset:8
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_mov_b32_e32 v0, s2
; CI-NEXT: v_mov_b32_e32 v1, s3
@@ -1160,10 +1159,9 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32
; VI-NEXT: v_add_u32_e32 v2, vcc, 2, v0
; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; VI-NEXT: v_add_u32_e32 v0, vcc, 0, v0
-; VI-NEXT: v_add_u32_e32 v0, vcc, 8, v0
; VI-NEXT: v_mov_b32_e32 v1, 9
; VI-NEXT: s_mov_b32 m0, -1
-; VI-NEXT: ds_dec_rtn_u32 v3, v0, v1
+; VI-NEXT: ds_dec_rtn_u32 v3, v0, v1 offset:8
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
@@ -1708,15 +1706,14 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspa
define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
; CI-LABEL: atomic_dec_shl_base_lds_0_i64:
; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
; CI-NEXT: v_add_i32_e32 v4, vcc, 2, v0
; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
-; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; CI-NEXT: v_add_i32_e32 v0, vcc, 0, v0
-; CI-NEXT: v_add_i32_e32 v2, vcc, 16, v0
+; CI-NEXT: v_add_i32_e32 v2, vcc, 0, v0
; CI-NEXT: v_mov_b32_e32 v0, 9
; CI-NEXT: v_mov_b32_e32 v1, 0
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1]
+; CI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] offset:16
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_mov_b32_e32 v2, s2
; CI-NEXT: v_mov_b32_e32 v3, s3
@@ -1728,15 +1725,14 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out,
;
; VI-LABEL: atomic_dec_shl_base_lds_0_i64:
; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
; VI-NEXT: v_add_u32_e32 v4, vcc, 2, v0
; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
-; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; VI-NEXT: v_add_u32_e32 v0, vcc, 0, v0
-; VI-NEXT: v_add_u32_e32 v2, vcc, 16, v0
+; VI-NEXT: v_add_u32_e32 v2, vcc, 0, v0
; VI-NEXT: v_mov_b32_e32 v0, 9
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: s_mov_b32 m0, -1
-; VI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1]
+; VI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] offset:16
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: v_mov_b32_e32 v3, s3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
index 1f42aa8a67bc..f4b01506ad45 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
@@ -506,10 +506,9 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out,
; CI-NEXT: v_add_i32_e32 v2, vcc, 2, v0
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CI-NEXT: v_add_i32_e32 v0, vcc, 0, v0
-; CI-NEXT: v_add_i32_e32 v0, vcc, 8, v0
; CI-NEXT: v_mov_b32_e32 v1, 9
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_inc_rtn_u32 v3, v0, v1
+; CI-NEXT: ds_inc_rtn_u32 v3, v0, v1 offset:8
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_mov_b32_e32 v0, s2
; CI-NEXT: v_mov_b32_e32 v1, s3
@@ -525,10 +524,9 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out,
; VI-NEXT: v_add_u32_e32 v2, vcc, 2, v0
; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; VI-NEXT: v_add_u32_e32 v0, vcc, 0, v0
-; VI-NEXT: v_add_u32_e32 v0, vcc, 8, v0
; VI-NEXT: v_mov_b32_e32 v1, 9
; VI-NEXT: s_mov_b32 m0, -1
-; VI-NEXT: ds_inc_rtn_u32 v3, v0, v1
+; VI-NEXT: ds_inc_rtn_u32 v3, v0, v1 offset:8
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
@@ -544,9 +542,8 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out,
; GFX9-NEXT: v_add_u32_e32 v2, 2, v0
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: v_add_u32_e32 v0, 0, v0
-; GFX9-NEXT: v_add_u32_e32 v0, 8, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 9
-; GFX9-NEXT: ds_inc_rtn_u32 v3, v0, v1
+; GFX9-NEXT: ds_inc_rtn_u32 v3, v0, v1 offset:8
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s2
; GFX9-NEXT: v_mov_b32_e32 v1, s3
@@ -1279,15 +1276,14 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0
define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
; CI-LABEL: atomic_inc_shl_base_lds_0_i64:
; CI: ; %bb.0:
+; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
; CI-NEXT: v_add_i32_e32 v4, vcc, 2, v0
; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
-; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; CI-NEXT: v_add_i32_e32 v0, vcc, 0, v0
-; CI-NEXT: v_add_i32_e32 v2, vcc, 16, v0
+; CI-NEXT: v_add_i32_e32 v2, vcc, 0, v0
; CI-NEXT: v_mov_b32_e32 v0, 9
; CI-NEXT: v_mov_b32_e32 v1, 0
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
+; CI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] offset:16
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_mov_b32_e32 v2, s2
; CI-NEXT: v_mov_b32_e32 v3, s3
@@ -1299,15 +1295,14 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out,
;
; VI-LABEL: atomic_inc_shl_base_lds_0_i64:
; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
; VI-NEXT: v_add_u32_e32 v4, vcc, 2, v0
; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
-; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; VI-NEXT: v_add_u32_e32 v0, vcc, 0, v0
-; VI-NEXT: v_add_u32_e32 v2, vcc, 16, v0
+; VI-NEXT: v_add_u32_e32 v2, vcc, 0, v0
; VI-NEXT: v_mov_b32_e32 v0, 9
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: s_mov_b32 m0, -1
-; VI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
+; VI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] offset:16
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: v_mov_b32_e32 v3, s3
@@ -1319,14 +1314,13 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out,
;
; GFX9-LABEL: atomic_inc_shl_base_lds_0_i64:
; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
; GFX9-NEXT: v_add_u32_e32 v4, 2, v0
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GFX9-NEXT: v_add_u32_e32 v0, 0, v0
-; GFX9-NEXT: v_add_u32_e32 v2, 16, v0
+; GFX9-NEXT: v_add_u32_e32 v2, 0, v0
; GFX9-NEXT: v_mov_b32_e32 v0, 9
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
+; GFX9-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] offset:16
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s2
; GFX9-NEXT: v_mov_b32_e32 v3, s3
More information about the llvm-commits
mailing list