[llvm] 7f81c5a - [AMDGPU] getMemOperandsWithOffset: add vaddr operand for stack access BUF instructions
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri May 14 02:12:05 PDT 2021
Author: Jay Foad
Date: 2021-05-14T10:10:43+01:00
New Revision: 7f81c5a5bae8329379b226d4efff6a44dfdeece9
URL: https://github.com/llvm/llvm-project/commit/7f81c5a5bae8329379b226d4efff6a44dfdeece9
DIFF: https://github.com/llvm/llvm-project/commit/7f81c5a5bae8329379b226d4efff6a44dfdeece9.diff
LOG: [AMDGPU] getMemOperandsWithOffset: add vaddr operand for stack access BUF instructions
A consequence is that checkInstOffsetsDoNotOverlap can now distinguish
sp+offset from fp+offset, so it knows that it shouldn't try to work out
whether the accesses overlap just by comparing the offsets. For example
in these two instructions:
MIR:
BUFFER_STORE_DWORD_OFFSET %0:vgpr_32(s32), $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into stack + 4, addrspace 5)
%4:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0.alloca, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from `i8 addrspace(5)* undef`, addrspace 5)
ISA:
buffer_store_dword v0, off, s[0:3], s32 offset:4
buffer_load_dword v0, off, s[0:3], s34
Differential Revision: https://reviews.llvm.org/D73957
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
llvm/test/CodeGen/AMDGPU/call-argument-types.ll
llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll
llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d9e08804ab7f..8858d90bb709 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -316,39 +316,22 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth(
}
if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
- const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
- if (SOffset && SOffset->isReg()) {
- // We can only handle this if it's a stack access, as any other resource
- // would require reporting multiple base registers.
- const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
- if (AddrReg && !AddrReg->isFI())
- return false;
-
- const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc);
- const SIMachineFunctionInfo *MFI
- = LdSt.getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
- if (RSrc->getReg() != MFI->getScratchRSrcReg())
- return false;
-
- const MachineOperand *OffsetImm =
- getNamedOperand(LdSt, AMDGPU::OpName::offset);
- BaseOps.push_back(RSrc);
- BaseOps.push_back(SOffset);
- Offset = OffsetImm->getImm();
- } else {
- BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::srsrc);
- if (!BaseOp) // e.g. BUFFER_WBINVL1_VOL
- return false;
+ const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc);
+ if (!RSrc) // e.g. BUFFER_WBINVL1_VOL
+ return false;
+ BaseOps.push_back(RSrc);
+ BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+ if (BaseOp && !BaseOp->isFI())
BaseOps.push_back(BaseOp);
-
- BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
- if (BaseOp)
- BaseOps.push_back(BaseOp);
-
- const MachineOperand *OffsetImm =
- getNamedOperand(LdSt, AMDGPU::OpName::offset);
- Offset = OffsetImm->getImm();
- if (SOffset) // soffset can be an inline immediate.
+ const MachineOperand *OffsetImm =
+ getNamedOperand(LdSt, AMDGPU::OpName::offset);
+ Offset = OffsetImm->getImm();
+ const MachineOperand *SOffset =
+ getNamedOperand(LdSt, AMDGPU::OpName::soffset);
+ if (SOffset) {
+ if (SOffset->isReg())
+ BaseOps.push_back(SOffset);
+ else
Offset += SOffset->getImm();
}
// Get appropriate operand, and compute width accordingly.
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
index 4e03df464581..a1f4d67acb15 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
@@ -149,7 +149,7 @@ attributes #0 = { nounwind }
; GCN-NEXT: - .registers:
; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
; SDAG-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
-; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
+; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ce{{$}}
; GISEL-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
; GCN-NEXT: .shader_functions:
; GCN-NEXT: dynamic_stack:
diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
index 26c914c36bfb..503a18dd36f7 100644
--- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
@@ -739,8 +739,8 @@ entry:
; GCN-LABEL: {{^}}tail_call_byval_align16:
; GCN-NOT: s32
-; GCN: buffer_load_dword [[VREG1:v[0-9]+]], off, s[0:3], s32 offset:8
; GCN: buffer_load_dword [[VREG2:v[0-9]+]], off, s[0:3], s32 offset:12
+; GCN: buffer_load_dword [[VREG1:v[0-9]+]], off, s[0:3], s32 offset:8
; GCN: s_getpc_b64
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
index 98b42cb1e36a..bbfc9813c7ea 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
@@ -3294,14 +3294,15 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
; GFX9-LABEL: tail_call_byval_align16:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:12
+; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg at rel32@hi+12
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4
-; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
+; GFX9-NEXT: s_waitcnt vmcnt(1)
+; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
+; GFX9-NEXT: s_waitcnt vmcnt(1)
+; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32
; GFX9-NEXT: s_setpc_b64 s[4:5]
;
; GFX10-LABEL: tail_call_byval_align16:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll
index 462c9dcf1a23..29eb6c1d0cbc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll
@@ -39,8 +39,8 @@ main_body:
}
; GCN-LABEL: {{^}}tbuffer_load_immoffs_large
-; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] offset:73
+; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] offset:1
; GFX10-DAG: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_FMT_10_10_10_2_SSCALED] offset:4095
; GFX10-DAG: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_FMT_32_32_UINT] offset:73
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll
index ac18761534ea..28d9d75d1c07 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll
@@ -42,8 +42,8 @@ main_body:
; GCN-LABEL: {{^}}tbuffer_load_immoffs_large
; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0
-; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] idxen offset:4095
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] idxen offset:73
+; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] idxen offset:4095
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:1
; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_FMT_10_10_10_2_SSCALED] idxen offset:4095
; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_FMT_32_32_UINT] idxen offset:73
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll
index f874ce02a2a7..c400f26fde69 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll
@@ -33,8 +33,8 @@ main_body:
}
; GCN-LABEL: {{^}}tbuffer_load_immoffs_large
-; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] offset:73
+; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] offset:1
; GCN: s_waitcnt
define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) {
diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
index 8b27ee9e652a..9e8e3420b562 100644
--- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
+++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
@@ -173,9 +173,9 @@ entry:
; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}}
; CI-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}}
; CI-NOHSA-NOT: v_add
+; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
; CI-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
-; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
@@ -203,10 +203,10 @@ entry:
; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16:
; SI: s_mov_b32 {{s[0-9]+}}, 0x13480
-; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:32
-; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:48
+; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
+; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64
; CI-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}}
; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
; CI-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}}
More information about the llvm-commits
mailing list