[PATCH] D26931: AMDGPU: Materialize frame index before add
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 21 13:28:04 PST 2016
arsenm created this revision.
arsenm added a subscriber: llvm-commits.
Herald added a reviewer: tstellarAMD.
Herald added subscribers: tony-tye, yaxunl, nhaehnle, wdng, kzhuravl.
It isn't generally safe to fold the frame index
directly into the operand since it will possibly
not be an inline immediate after it is expanded.
This surprisingly seems to produce better code, since
the FI doesn't prevent folding other immediate operands.
https://reviews.llvm.org/D26931
Files:
lib/Target/AMDGPU/SIRegisterInfo.cpp
test/CodeGen/AMDGPU/local-stack-slot-bug.ll
Index: test/CodeGen/AMDGPU/local-stack-slot-bug.ll
===================================================================
--- test/CodeGen/AMDGPU/local-stack-slot-bug.ll
+++ test/CodeGen/AMDGPU/local-stack-slot-bug.ll
@@ -7,15 +7,16 @@
;
; CHECK-LABEL: {{^}}main:
+; CHECK: v_mov_b32_e32 [[BASE_FI:v[0-9]+]], 0{{$}}
+
; FIXME: add 0?
-; CHECK-DAG: s_movk_i32 [[K0:s[0-9]+]], 0x140
-; CHECK-DAG: v_add_i32_e64 [[ADD_K0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, [[K0]], 0
+; CHECK-DAG: v_add_i32_e32 [[ADD_K0:v[0-9]+]], vcc, 0x140, [[BASE_FI]]
; CHECK-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
; CHECK-DAG: buffer_store_dword {{v[0-9]+}}, [[ADD_K0]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
; CHECK-DAG: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]
-; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]
+; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, [[BASE_FI]], [[BYTES]]
; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
Index: lib/Target/AMDGPU/SIRegisterInfo.cpp
===================================================================
--- lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -235,12 +235,17 @@
unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
.addImm(Offset);
+ BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
+ .addFrameIndex(FrameIdx);
+
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg)
.addReg(UnusedCarry, RegState::Define | RegState::Dead)
.addReg(OffsetReg, RegState::Kill)
- .addFrameIndex(FrameIdx);
+ .addReg(FIReg);
}
void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D26931.78770.patch
Type: text/x-patch
Size: 2045 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161121/1444db49/attachment.bin>
More information about the llvm-commits
mailing list