[PATCH] D25829: AMDGPU: Fix SILoadStoreOptimizer when writes cannot be merged due register dependencies
Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 27 01:24:35 PDT 2016
This revision was automatically updated to reflect the committed changes.
Closed by commit rL285273: AMDGPU: Fix SILoadStoreOptimizer when writes cannot be merged due register… (authored by nha).
Changed prior to commit:
https://reviews.llvm.org/D25829?vs=75308&id=75990#toc
Repository:
rL LLVM
https://reviews.llvm.org/D25829
Files:
llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
llvm/trunk/test/CodeGen/AMDGPU/merge-store-usedef.ll
Index: llvm/trunk/test/CodeGen/AMDGPU/merge-store-usedef.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/merge-store-usedef.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/merge-store-usedef.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}test1:
+; CHECK: ds_write_b32
+; CHECK: ds_read_b32
+; CHECK: ds_write_b32
+define amdgpu_vs void @test1(i32 %v) #0 {
+ %p0 = getelementptr i32, i32 addrspace(3)* null, i32 0
+ %p1 = getelementptr i32, i32 addrspace(3)* null, i32 1
+
+ store i32 %v, i32 addrspace(3)* %p0
+
+ call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %v, i32 1, i32 undef, i32 undef, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
+
+ %w = load i32, i32 addrspace(3)* %p0
+ store i32 %w, i32 addrspace(3)* %p1
+ ret void
+}
+
+declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
+
+attributes #0 = { nounwind }
Index: llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -141,6 +141,27 @@
}
}
+// Add MI and its defs to the lists if MI reads one of the defs that are
+// already in the list. Returns true in that case.
+static bool
+addToListsIfDependent(MachineInstr &MI,
+ SmallVectorImpl<const MachineOperand *> &Defs,
+ SmallVectorImpl<MachineInstr*> &Insts) {
+ for (const MachineOperand *Def : Defs) {
+ bool ReadDef = MI.readsVirtualRegister(Def->getReg());
+ // If ReadDef is true, then there is a use of Def between I
+ // and the instruction that I will potentially be merged with. We
+ // will need to move this instruction after the merged instructions.
+ if (ReadDef) {
+ Insts.push_back(&MI);
+ addDefsToList(MI, Defs);
+ return true;
+ }
+ }
+
+ return false;
+}
+
static bool
canMoveInstsAcrossMemOp(MachineInstr &MemOp,
ArrayRef<MachineInstr*> InstsToMove,
@@ -224,24 +245,23 @@
// When we match I with another DS instruction we will be moving I down
// to the location of the matched instruction any uses of I will need to
// be moved down as well.
- for (const MachineOperand *Def : DefsToMove) {
- bool ReadDef = MBBI->readsVirtualRegister(Def->getReg());
- // If ReadDef is true, then there is a use of Def between I
- // and the instruction that I will potentially be merged with. We
- // will need to move this instruction after the merged instructions.
- if (ReadDef) {
- InstsToMove.push_back(&*MBBI);
- addDefsToList(*MBBI, DefsToMove);
- break;
- }
- }
+ addToListsIfDependent(*MBBI, DefsToMove, InstsToMove);
continue;
}
// Don't merge volatiles.
if (MBBI->hasOrderedMemoryRef())
return E;
+ // Handle a case like
+ // DS_WRITE_B32 addr, v, idx0
+ // w = DS_READ_B32 addr, idx0
+ // DS_WRITE_B32 addr, f(w), idx1
+ // where the DS_READ_B32 ends up in InstsToMove and therefore prevents
+ // merging of the two writes.
+ if (addToListsIfDependent(*MBBI, DefsToMove, InstsToMove))
+ continue;
+
int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr);
const MachineOperand &AddrReg0 = I->getOperand(AddrIdx);
const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D25829.75990.patch
Type: text/x-patch
Size: 3705 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161027/572fee82/attachment.bin>
More information about the llvm-commits
mailing list