[PATCH] D77245: [AMDGPU] Fix crash in SILoadStoreOptimizer

Thu Apr 2 10:50:46 PDT 2020

This revision was automatically updated to reflect the committed changes.
Closed by commit rGf2334a7ef255: [AMDGPU] Fix crash in SILoadStoreOptimizer (authored by rampitec).
Herald added a project: LLVM.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77245/new/

https://reviews.llvm.org/D77245

Files:
  llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
  llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.ll
  llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.mir


Index: llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.mir
===================================================================

--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.mir
@@ -0,0 +1,23 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt %s -o - | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: out_of_order_merge
+# GCN: DS_READ2_B64_gfx9
+# GCN: DS_WRITE_B64_gfx9
+# GCN: DS_READ2_B64_gfx9
+# GCN: DS_WRITE_B64_gfx9
+# GCN: DS_WRITE_B64_gfx9
+---
+name:            out_of_order_merge
+body:             |
+  bb.0:
+    %4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %5:vreg_64 = DS_READ_B64_gfx9 %4, 776, 0, implicit $exec :: (load 8 from `double addrspace(3)* undef`, addrspace 3)
+    %6:vreg_64 = DS_READ_B64_gfx9 %4, 784, 0, implicit $exec :: (load 8 from `double addrspace(3)* undef` + 8, addrspace 3)
+    %17:vreg_64 = DS_READ_B64_gfx9 %4, 840, 0, implicit $exec :: (load 8 from `double addrspace(3)* undef`, addrspace 3)
+    DS_WRITE_B64_gfx9 %4, %17, 8, 0, implicit $exec :: (store 8 into `double addrspace(3)* undef` + 8, addrspace 3)
+    DS_WRITE_B64_gfx9 %4, %6, 0, 0, implicit $exec :: (store 8 into `double addrspace(3)* undef`, align 16, addrspace 3)
+    %24:vreg_64 = DS_READ_B64_gfx9 %4, 928, 0, implicit $exec :: (load 8 from `double addrspace(3)* undef` + 8, addrspace 3)
+    DS_WRITE_B64_gfx9 undef %29:vgpr_32, %5, 0, 0, implicit $exec :: (store 8 into `double addrspace(3)* undef`, addrspace 3)
+    S_ENDPGM 0
+
+...
Index: llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+ at L = external local_unnamed_addr addrspace(3) global [9 x double], align 16
+ at Ldisp = external local_unnamed_addr addrspace(3) global [96 x double], align 16
+
+; Stores are reordered during loads merge. This case used to assert while
+; scanning for a paired instruction because it used to expect paired one
+; to follow a base one.
+
+; GCN-LABEL: {{^}}out_of_order_merge:
+; GCN-COUNT2: ds_read2_b64
+; GCN-COUNT3: ds_write_b64
+define amdgpu_kernel void @out_of_order_merge() {
+entry:
+  %gep1 = getelementptr inbounds [96 x double], [96 x double] addrspace(3)* @Ldisp, i32 0, i32 0
+  %gep2 = getelementptr inbounds [96 x double], [96 x double] addrspace(3)* @Ldisp, i32 0, i32 1
+  %tmp12 = load <2 x double>, <2 x double> addrspace(3)* bitcast (double addrspace(3)* getelementptr inbounds ([9 x double], [9 x double] addrspace(3)* @L, i32 0, i32 1) to <2 x double> addrspace(3)*), align 8
+  %tmp14 = extractelement <2 x double> %tmp12, i32 0
+  %tmp15 = extractelement <2 x double> %tmp12, i32 1
+  %add50.i = fadd double %tmp14, %tmp15
+  store double %add50.i, double addrspace(3)* %gep1, align 8
+  %tmp16 = load double, double addrspace(3)* getelementptr inbounds ([9 x double], [9 x double] addrspace(3)* @L, i32 1, i32 0), align 8
+  store double %tmp16, double addrspace(3)* %gep2, align 8
+  %tmp17 = load <2 x double>, <2 x double> addrspace(3)* bitcast (double addrspace(3)* getelementptr inbounds ([9 x double], [9 x double] addrspace(3)* @L, i32 2, i32 1) to <2 x double> addrspace(3)*), align 8
+  %tmp19 = extractelement <2 x double> %tmp17, i32 1
+  store double %tmp19, double addrspace(3)* undef, align 8
+  ret void
+}
Index: llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -884,8 +884,19 @@
 
   MachineBasicBlock::iterator E = std::next(Paired.I);
   MachineBasicBlock::iterator MBBI = std::next(CI.I);
+  MachineBasicBlock::iterator MBBE = CI.I->getParent()->end();
   for (; MBBI != E; ++MBBI) {
 
+    if (MBBI == MBBE) {
+      // CombineInfo::Order is a hint on the instruction ordering within the
+      // basic block. This hint suggests that CI precedes Paired, which is
+      // true most of the time. However, moveInstsAfter() processing a
+      // previous list may have changed this order in a situation when it
+      // moves an instruction which exists in some other merge list.
+      // In this case it must be dependent.
+      return false;
+    }
+
     if ((getInstClass(MBBI->getOpcode(), *TII) != InstClass) ||
         (getInstSubclass(MBBI->getOpcode(), *TII) != InstSubclass)) {
       // This is not a matching instruction, but we can keep looking as


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D77245.254565.patch
Type: text/x-patch
Size: 4648 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200402/dba18405/attachment.bin>