[PATCH] D34380: AMDGPU: Do operand folding in program order

Mon Jun 19 20:21:32 PDT 2017

arsenm created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl.

Before it was possible to partially fold use instructions
before the defs. After the xor is folded into a copy, the same
mov can end up in the fold list twice, so on the second attempt
it will fail expecting to see a register to fold.

      


https://reviews.llvm.org/D34380

Files:
  lib/Target/AMDGPU/SIFoldOperands.cpp
  test/CodeGen/AMDGPU/fold-operands-order.mir


Index: test/CodeGen/AMDGPU/fold-operands-order.mir
===================================================================

--- /dev/null
+++ test/CodeGen/AMDGPU/fold-operands-order.mir
@@ -0,0 +1,47 @@
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs -run-pass si-fold-operands -o - %s | FileCheck -check-prefix=GCN %s
+
+--- |
+  define amdgpu_kernel void @mov_in_use_list_2x() {
+    unreachable
+  }
+
+...
+---
+
+# Blocks should be processed in program order to make sure folds
+# aren't made in users before the def is seen.
+
+# GCN-LABEL: name: mov_in_use_list_2x{{$}}
+# GCN: %2 = V_MOV_B32_e32 0, implicit %exec
+# GCN-NEXT: %3 = COPY undef %0
+
+# GCN: %1 = V_MOV_B32_e32 0, implicit %exec
+
+
+name: mov_in_use_list_2x
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vgpr_32, preferred-register: '' }
+  - { id: 1, class: vgpr_32, preferred-register: '' }
+  - { id: 2, class: vgpr_32, preferred-register: '' }
+  - { id: 3, class: vgpr_32, preferred-register: '' }
+liveins:
+body:             |
+  bb.0:
+    successors: %bb.2
+
+    S_BRANCH %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %2 = COPY %1
+    %3 = V_XOR_B32_e64 killed %2, undef %0, implicit %exec
+
+  bb.2:
+    successors: %bb.1
+
+    %1 = V_MOV_B32_e32 0, implicit %exec
+    S_BRANCH %bb.1
+
+...
Index: lib/Target/AMDGPU/SIFoldOperands.cpp
===================================================================
--- lib/Target/AMDGPU/SIFoldOperands.cpp
+++ lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -13,6 +13,7 @@
 #include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -923,12 +924,9 @@
   // level.
   bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath();
 
-  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
-       BI != BE; ++BI) {
-
-    MachineBasicBlock &MBB = *BI;
+  for (MachineBasicBlock *MBB : depth_first(&MF)) {
     MachineBasicBlock::iterator I, Next;
-    for (I = MBB.begin(); I != MBB.end(); I = Next) {
+    for (I = MBB->begin(); I != MBB->end(); I = Next) {
       Next = std::next(I);
       MachineInstr &MI = *I;
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D34380.103140.patch
Type: text/x-patch
Size: 2308 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170620/c12850bd/attachment.bin>