[PATCH] D65557: AMDGPU/LoadStoreOptimizer: Set the correct offset whem merging MMOs
Tom Stellard via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 31 22:17:30 PDT 2019
tstellar created this revision.
tstellar added a reviewer: arsenm.
Herald added subscribers: hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl.
Herald added a project: LLVM.
This is a follow up to r367237. MachineFunction::getMachineMemOperand()
adds the offset parameter to the existing offset instead of resetting it.
So we need to reset the offset to the correct value after calling this
function.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D65557
Files:
llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
llvm/test/CodeGen/AMDGPU/merge-load-store.mir
Index: llvm/test/CodeGen/AMDGPU/merge-load-store.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/merge-load-store.mir
+++ llvm/test/CodeGen/AMDGPU/merge-load-store.mir
@@ -65,7 +65,8 @@
attributes #0 = { convergent nounwind }
- define amdgpu_kernel void @merge_mmos() { ret void }
+ define amdgpu_kernel void @merge_mmos(i32 addrspace(1)* %ptr_addr1) { ret void }
+
...
---
name: mem_dependency
@@ -170,6 +171,8 @@
# CHECK: S_BUFFER_LOAD_DWORDX2_IMM %0, 0, 0, 0 :: (dereferenceable invariant load 8, align 4)
# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 4)
# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 4)
+# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 64, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from %ir.ptr_addr1 + 64, align 4
+# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 64, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into %ir.ptr_addr1 + 64, align 4
name: merge_mmos
tracksRegLiveness: true
body: |
@@ -183,6 +186,11 @@
%4:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 4, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4)
BUFFER_STORE_DWORD_OFFSET_exact %3, %0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4)
BUFFER_STORE_DWORD_OFFSET_exact %4, %0, 0, 4, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4)
+ %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 64, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from %ir.ptr_addr1 + 64)
+ %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 68, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from %ir.ptr_addr1 + 68)
+ BUFFER_STORE_DWORD_OFFSET_exact %5, %0, 0, 64, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.ptr_addr1 + 64)
+ BUFFER_STORE_DWORD_OFFSET_exact %6, %0, 0, 68, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.ptr_addr1 + 68)
+
S_ENDPGM 0
...
Index: llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -392,7 +392,12 @@
const MachineMemOperand *B) {
unsigned MinOffset = std::min(A->getOffset(), B->getOffset());
unsigned Size = A->getSize() + B->getSize();
- return MF.getMachineMemOperand(A, MinOffset, Size);
+ // This function adds the offset parameter to the existing offset for A,
+ // so we pass 0 here as the offset and then manually set it to the correct
+ // value after the call.
+ MachineMemOperand *MMO = MF.getMachineMemOperand(A, 0, Size);
+ MMO->setOffset(MinOffset);
+ return MMO;
}
bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI, bool Modify) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D65557.212727.patch
Type: text/x-patch
Size: 2961 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190801/33ca06ee/attachment.bin>
More information about the llvm-commits
mailing list