[PATCH] D34380: AMDGPU: Do operand folding in program order
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 19 20:21:32 PDT 2017
arsenm created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl.
Before it was possible to partially fold use instructions
before the defs. After the xor is folded into a copy, the same
mov can end up in the fold list twice, so on the second attempt
it will fail expecting to see a register to fold.
https://reviews.llvm.org/D34380
Files:
lib/Target/AMDGPU/SIFoldOperands.cpp
test/CodeGen/AMDGPU/fold-operands-order.mir
Index: test/CodeGen/AMDGPU/fold-operands-order.mir
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/fold-operands-order.mir
@@ -0,0 +1,47 @@
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs -run-pass si-fold-operands -o - %s | FileCheck -check-prefix=GCN %s
+
+--- |
+ define amdgpu_kernel void @mov_in_use_list_2x() {
+ unreachable
+ }
+
+...
+---
+
+# Blocks should be processed in program order to make sure folds
+# aren't made in users before the def is seen.
+
+# GCN-LABEL: name: mov_in_use_list_2x{{$}}
+# GCN: %2 = V_MOV_B32_e32 0, implicit %exec
+# GCN-NEXT: %3 = COPY undef %0
+
+# GCN: %1 = V_MOV_B32_e32 0, implicit %exec
+
+
+name: mov_in_use_list_2x
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32, preferred-register: '' }
+ - { id: 1, class: vgpr_32, preferred-register: '' }
+ - { id: 2, class: vgpr_32, preferred-register: '' }
+ - { id: 3, class: vgpr_32, preferred-register: '' }
+liveins:
+body: |
+ bb.0:
+ successors: %bb.2
+
+ S_BRANCH %bb.2
+
+ bb.1:
+ successors: %bb.2
+
+ %2 = COPY %1
+ %3 = V_XOR_B32_e64 killed %2, undef %0, implicit %exec
+
+ bb.2:
+ successors: %bb.1
+
+ %1 = V_MOV_B32_e32 0, implicit %exec
+ S_BRANCH %bb.1
+
+...
Index: lib/Target/AMDGPU/SIFoldOperands.cpp
===================================================================
--- lib/Target/AMDGPU/SIFoldOperands.cpp
+++ lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -13,6 +13,7 @@
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -923,12 +924,9 @@
// level.
bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath();
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
-
- MachineBasicBlock &MBB = *BI;
+ for (MachineBasicBlock *MBB : depth_first(&MF)) {
MachineBasicBlock::iterator I, Next;
- for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ for (I = MBB->begin(); I != MBB->end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D34380.103140.patch
Type: text/x-patch
Size: 2308 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170620/c12850bd/attachment.bin>
More information about the llvm-commits
mailing list