[PATCH] D22556: Replace subregister uses when processing tied operands
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 12 17:03:13 PDT 2016
arsenm retitled this revision from "Replace other uses of register when processind tied operands" to "Replace subregister uses when processing tied operands".
arsenm updated the summary for this revision.
arsenm updated this revision to Diff 67942.
arsenm added a comment.
There was already code there trying to accomplish the same thing, it was just skipping for subregisters for some reason
https://reviews.llvm.org/D22556
Files:
lib/CodeGen/TwoAddressInstructionPass.cpp
test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll
test/CodeGen/AMDGPU/indirect-addressing-si.ll
Index: test/CodeGen/AMDGPU/indirect-addressing-si.ll
===================================================================
--- test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -125,27 +125,32 @@
}
; CHECK-LABEL: {{^}}insert_w_offset:
-; CHECK: s_load_dword [[IN:s[0-9]+]]
-; CHECK: s_mov_b32 m0, [[IN]]
-; CHECK: v_movreld_b32_e32
-define void @insert_w_offset(float addrspace(1)* %out, i32 %in) {
+; CHECK-DAG: s_load_dword [[IN:s[0-9]+]]
+; CHECK-DAG: s_mov_b32 m0, [[IN]]
+; CHECK-DAG: v_mov_b32_e32 v[[ELT0:[0-9]+]], 1.0
+; CHECK-DAG: v_mov_b32_e32 v[[ELT1:[0-9]+]], 2.0
+; CHECK-DAG: v_mov_b32_e32 v[[ELT2:[0-9]+]], 0x40400000
+; CHECK-DAG: v_mov_b32_e32 v[[ELT3:[0-9]+]], 4.0
+; CHECK-DAG: v_mov_b32_e32 v[[INS:[0-9]+]], 0x40a00000
+; CHECK: v_movreld_b32_e32 v[[ELT1]], v[[INS]]
+; CHECK: buffer_store_dwordx4 v{{\[}}[[ELT0]]:[[ELT3]]{{\]}}
+define void @insert_w_offset(<4 x float> addrspace(1)* %out, i32 %in) {
entry:
%0 = add i32 %in, 1
%1 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %0
- %2 = extractelement <4 x float> %1, i32 2
- store float %2, float addrspace(1)* %out
+ store <4 x float> %1, <4 x float> addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}insert_wo_offset:
; CHECK: s_load_dword [[IN:s[0-9]+]]
; CHECK: s_mov_b32 m0, [[IN]]
-; CHECK: v_movreld_b32_e32
-define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) {
+; CHECK: v_movreld_b32_e32 v[[ELT0:[0-9]+]]
+; CHECK: buffer_store_dwordx4 v{{\[}}[[ELT0]]:
+define void @insert_wo_offset(<4 x float> addrspace(1)* %out, i32 %in) {
entry:
%0 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
- %1 = extractelement <4 x float> %0, i32 2
- store float %1, float addrspace(1)* %out
+ store <4 x float> %0, <4 x float> addrspace(1)* %out
ret void
}
Index: test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll
@@ -0,0 +1,19 @@
+; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+
+; FIXME: Merge into indirect-addressing-si.ll
+
+; Make sure that TwoAddressInstructions keeps src0 as subregister sub0
+; of the tied implicit use and def of the super register.
+
+; CHECK-LABEL: {{^}}insert_wo_offset:
+; CHECK: s_load_dword [[IN:s[0-9]+]]
+; CHECK: s_mov_b32 m0, [[IN]]
+; CHECK: v_movreld_b32_e32 v[[ELT0:[0-9]+]]
+; CHECK-NEXT: buffer_store_dwordx4 v{{\[}}[[ELT0]]:
+define void @insert_wo_offset(<4 x float> addrspace(1)* %out, i32 %in) {
+entry:
+ %ins = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
+ store <4 x float> %ins, <4 x float> addrspace(1)* %out
+ ret void
+}
+
Index: lib/CodeGen/TwoAddressInstructionPass.cpp
===================================================================
--- lib/CodeGen/TwoAddressInstructionPass.cpp
+++ lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1567,14 +1567,14 @@
if (!IsEarlyClobber) {
// Replace other (un-tied) uses of regB with LastCopiedReg.
for (MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB &&
+ if (MO.isReg() && MO.getReg() == RegB &&
MO.isUse()) {
if (MO.isKill()) {
MO.setIsKill(false);
RemovedKillFlag = true;
}
MO.setReg(LastCopiedReg);
- MO.setSubReg(0);
+ MO.setSubReg(MO.getSubReg());
}
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D22556.67942.patch
Type: text/x-patch
Size: 3620 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160813/7e3bb5ab/attachment.bin>
More information about the llvm-commits
mailing list