[PATCH] D73634: [AMDGPU] Cluster FLAT instructions with both vaddr and saddr
Jay Foad via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 09:03:26 PST 2020
This revision was automatically updated to reflect the committed changes.
Closed by commit rGd07a7895792a: [AMDGPU] Cluster FLAT instructions with both vaddr and saddr (authored by foad).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D73634/new/
https://reviews.llvm.org/D73634
Files:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/global-saddr.ll
llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
Index: llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
+++ llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
@@ -264,11 +264,11 @@
; CI-NEXT: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:52{{$}}
; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:12
+; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:28
; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:44
+
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}{{$}}
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:20
-; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:28
-
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:36
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:52
Index: llvm/test/CodeGen/AMDGPU/global-saddr.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/global-saddr.ll
+++ llvm/test/CodeGen/AMDGPU/global-saddr.ll
@@ -2,11 +2,11 @@
; Test for a conv2d like sequence of loads.
-; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}{{$}}
+; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:-16{{$}}
-; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:-32{{$}}
+; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
; GFX9: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:8{{$}}
define hidden amdgpu_kernel void @simpleSaddrs(i64 addrspace(1)* %dst_image, i64 addrspace(1)* %src_image ) {
@@ -45,9 +45,9 @@
store volatile i64 %add7, i64 addrspace(1)* %ptr9
; Test various offset boundaries.
-; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:4088{{$}}
; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:4088{{$}}
; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:2040{{$}}
+; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:4088{{$}}
%gep11 = getelementptr inbounds i64, i64 addrspace(1)* %gep, i64 511
%load11 = load i64, i64 addrspace(1)* %gep11
%gep12 = getelementptr inbounds i64, i64 addrspace(1)* %gep, i64 1023
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -365,23 +365,14 @@
}
if (isFLAT(LdSt)) {
- const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
- if (VAddr) {
- // Can't analyze 2 offsets.
- // FIXME remove this restriction!
- if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
- return false;
-
- BaseOp = VAddr;
- } else {
- // scratch instructions have either vaddr or saddr.
- BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr);
- }
-
+ // Instructions have either vaddr or saddr or both.
+ BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+ if (BaseOp)
+ BaseOps.push_back(BaseOp);
+ BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr);
+ if (BaseOp)
+ BaseOps.push_back(BaseOp);
Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
- if (!BaseOp->isReg())
- return false;
- BaseOps.push_back(BaseOp);
return true;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D73634.241179.patch
Type: text/x-patch
Size: 4357 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200129/4605b9d5/attachment.bin>
More information about the llvm-commits
mailing list