[PATCH] D12225: AMDGPU: Don't reprocess instructions when splitting i64 bcnt
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 20 22:17:07 PDT 2015
arsenm created this revision.
arsenm added a reviewer: tstellarAMD.
arsenm added a subscriber: llvm-commits.
Herald added a subscriber: arsenm.
http://reviews.llvm.org/D12225
Files:
lib/Target/AMDGPU/SIInstrInfo.cpp
test/CodeGen/AMDGPU/ctpop64.ll
Index: test/CodeGen/AMDGPU/ctpop64.ll
===================================================================
--- test/CodeGen/AMDGPU/ctpop64.ll
+++ test/CodeGen/AMDGPU/ctpop64.ll
@@ -36,6 +36,25 @@
ret void
}
+; FIXME: We shouldn't emit the v_mov_b32 0
+; FUNC-LABEL: {{^}}v_ctpop_i64_user:
+; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
+; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
+; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
+; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
+; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
+; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]]
+; GCN-DAG: v_or_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[ZERO]]
+; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
+; GCN: s_endpgm
+define void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind {
+ %val = load i64, i64 addrspace(1)* %in, align 8
+ %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
+ %or = or i64 %ctpop, %s.val
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
+
; FUNC-LABEL: {{^}}s_ctpop_v2i64:
; GCN: s_bcnt1_i32_b64
; GCN: s_bcnt1_i32_b64
Index: lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- lib/Target/AMDGPU/SIInstrInfo.cpp
+++ lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2523,8 +2523,9 @@
MRI.replaceRegWith(Dest.getReg(), ResultReg);
- Worklist.push_back(First);
- Worklist.push_back(Second);
+ // We don't need to legalize operands here. src0 for etiher instruction can be
+ // an SGPR, and the second input is unused or determined here.
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D12225.32801.patch
Type: text/x-patch
Size: 1889 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150821/f5f38308/attachment.bin>
More information about the llvm-commits
mailing list