[PATCH] D12256: AMDGPU/SI: Fold operands through REG_SEQUENCE instructions
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 21 16:14:59 PDT 2015
tstellarAMD created this revision.
tstellarAMD added a reviewer: arsenm.
tstellarAMD added a subscriber: llvm-commits.
Herald added a subscriber: arsenm.
This helps mostly when we use add instructions for address calculations
that contain immediates.
http://reviews.llvm.org/D12256
Files:
lib/Target/AMDGPU/SIFoldOperands.cpp
test/CodeGen/AMDGPU/cgp-addressing-modes.ll
test/CodeGen/AMDGPU/merge-stores.ll
test/CodeGen/AMDGPU/select64.ll
Index: test/CodeGen/AMDGPU/select64.ll
===================================================================
--- test/CodeGen/AMDGPU/select64.ll
+++ test/CodeGen/AMDGPU/select64.ll
@@ -51,12 +51,8 @@
}
; CHECK-LABEL: {{^}}v_select_i64_split_imm:
-; CHECK: s_mov_b32 [[SHI:s[0-9]+]], 63
-; CHECK: s_mov_b32 [[SLO:s[0-9]+]], 0
-; CHECK-DAG: v_mov_b32_e32 [[VHI:v[0-9]+]], [[SHI]]
-; CHECK-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], [[SLO]]
-; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VLO]], {{v[0-9]+}}
-; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VHI]], {{v[0-9]+}}
+; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
+; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 63, {{v[0-9]+}}
; CHECK: s_endpgm
define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%cmp = icmp ugt i32 %cond, 5
Index: test/CodeGen/AMDGPU/merge-stores.ll
===================================================================
--- test/CodeGen/AMDGPU/merge-stores.ll
+++ test/CodeGen/AMDGPU/merge-stores.ll
@@ -508,10 +508,8 @@
}
; GCN-LABEL: {{^}}merge_local_store_2_constants_i32:
-; GCN-DAG: s_movk_i32 [[SLO:s[0-9]+]], 0x1c8
-; GCN-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b
-; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[SLO]]
-; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHI]]
+; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b
; GCN: ds_write2_b32 v{{[0-9]+}}, v[[LO]], v[[HI]] offset1:1{{$}}
define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
Index: test/CodeGen/AMDGPU/cgp-addressing-modes.ll
===================================================================
--- test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -317,10 +317,8 @@
; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
; GCN: s_and_saveexec_b64
-; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 3{{$}}
-; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -4{{$}}
-; GCN: s_add_u32
-; GCN: s_addc_u32
+; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
+; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
; GCN: s_or_b64 exec, exec
define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
Index: lib/Target/AMDGPU/SIFoldOperands.cpp
===================================================================
--- lib/Target/AMDGPU/SIFoldOperands.cpp
+++ lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -245,6 +245,27 @@
}
}
+ // Special case for REG_SEQUENCE: We can't fold literals into
+ // REG_SEQUENCE instructions, so we have to fold them into the
+ // uses of REG_SEQUENCE.
+ if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+ unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
+ unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
+
+ for (MachineRegisterInfo::use_iterator
+ RSUse = MRI.use_begin(RegSeqDstReg),
+ RSE = MRI.use_end(); RSUse != RSE; ++RSUse) {
+
+ MachineInstr *RSUseMI = RSUse->getParent();
+ if (RSUse->getSubReg() != RegSeqDstSubReg)
+ continue;
+
+ foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
+ TII, TRI, MRI);
+ }
+ return;
+ }
+
const MCInstrDesc &UseDesc = UseMI->getDesc();
// Don't fold into target independent nodes. Target independent opcodes
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D12256.32881.patch
Type: text/x-patch
Size: 3505 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150821/ca22fa1a/attachment.bin>
More information about the llvm-commits
mailing list