[PATCH] D25829: AMDGPU: Fix SILoadStoreOptimizer when writes cannot be merged due register dependencies

Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 27 01:24:35 PDT 2016


This revision was automatically updated to reflect the committed changes.
Closed by commit rL285273: AMDGPU: Fix SILoadStoreOptimizer when writes cannot be merged due register… (authored by nha).

Changed prior to commit:
  https://reviews.llvm.org/D25829?vs=75308&id=75990#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D25829

Files:
  llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
  llvm/trunk/test/CodeGen/AMDGPU/merge-store-usedef.ll


Index: llvm/trunk/test/CodeGen/AMDGPU/merge-store-usedef.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/merge-store-usedef.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/merge-store-usedef.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}test1:
+; CHECK: ds_write_b32
+; CHECK: ds_read_b32
+; CHECK: ds_write_b32
+define amdgpu_vs void @test1(i32 %v) #0 {
+  %p0 = getelementptr i32, i32 addrspace(3)* null, i32 0
+  %p1 = getelementptr i32, i32 addrspace(3)* null, i32 1
+
+  store i32 %v, i32 addrspace(3)* %p0
+
+  call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %v, i32 1, i32 undef, i32 undef, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
+
+  %w = load i32, i32 addrspace(3)* %p0
+  store i32 %w, i32 addrspace(3)* %p1
+  ret void
+}
+
+declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
+
+attributes #0 = { nounwind }
Index: llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -141,6 +141,27 @@
   }
 }
 
+// Add MI and its defs to the lists if MI reads one of the defs that are
+// already in the list. Returns true in that case.
+static bool
+addToListsIfDependent(MachineInstr &MI,
+                      SmallVectorImpl<const MachineOperand *> &Defs,
+                      SmallVectorImpl<MachineInstr*> &Insts) {
+  for (const MachineOperand *Def : Defs) {
+    bool ReadDef = MI.readsVirtualRegister(Def->getReg());
+    // If ReadDef is true, then there is a use of Def between I
+    // and the instruction that I will potentially be merged with. We
+    // will need to move this instruction after the merged instructions.
+    if (ReadDef) {
+      Insts.push_back(&MI);
+      addDefsToList(MI, Defs);
+      return true;
+    }
+  }
+
+  return false;
+}
+
 static bool
 canMoveInstsAcrossMemOp(MachineInstr &MemOp,
                         ArrayRef<MachineInstr*> InstsToMove,
@@ -224,24 +245,23 @@
       // When we match I with another DS instruction we will be moving I down
       // to the location of the matched instruction any uses of I will need to
       // be moved down as well.
-      for (const MachineOperand *Def : DefsToMove) {
-        bool ReadDef = MBBI->readsVirtualRegister(Def->getReg());
-        // If ReadDef is true, then there is a use of Def between I
-        // and the instruction that I will potentially be merged with. We
-        // will need to move this instruction after the merged instructions.
-        if (ReadDef) {
-          InstsToMove.push_back(&*MBBI);
-          addDefsToList(*MBBI, DefsToMove);
-          break;
-        }
-      }
+      addToListsIfDependent(*MBBI, DefsToMove, InstsToMove);
       continue;
     }
 
     // Don't merge volatiles.
     if (MBBI->hasOrderedMemoryRef())
       return E;
 
+    // Handle a case like
+    //   DS_WRITE_B32 addr, v, idx0
+    //   w = DS_READ_B32 addr, idx0
+    //   DS_WRITE_B32 addr, f(w), idx1
+    // where the DS_READ_B32 ends up in InstsToMove and therefore prevents
+    // merging of the two writes.
+    if (addToListsIfDependent(*MBBI, DefsToMove, InstsToMove))
+      continue;
+
     int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr);
     const MachineOperand &AddrReg0 = I->getOperand(AddrIdx);
     const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D25829.75990.patch
Type: text/x-patch
Size: 3705 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161027/572fee82/attachment.bin>


More information about the llvm-commits mailing list