[llvm-commits] [llvm] r103971 - in /llvm/trunk: lib/CodeGen/TwoAddressInstructionPass.cpp test/CodeGen/ARM/reg_sequence.ll
Evan Cheng
evan.cheng at apple.com
Mon May 17 13:57:12 PDT 2010
Author: evancheng
Date: Mon May 17 15:57:12 2010
New Revision: 103971
URL: http://llvm.org/viewvc/llvm-project?rev=103971&view=rev
Log:
Careful with reg_sequence coalescing to not to overwrite sub-register indices.
Modified:
llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp
llvm/trunk/test/CodeGen/ARM/reg_sequence.ll
Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp?rev=103971&r1=103970&r2=103971&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp (original)
+++ llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Mon May 17 15:57:12 2010
@@ -128,6 +128,8 @@
void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &Processed);
+ void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg);
+
/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
/// of the de-ssa process. This replaces sources of REG_SEQUENCE as
/// sub-register references of the register defined by REG_SEQUENCE.
@@ -1132,7 +1134,7 @@
}
static void UpdateRegSequenceSrcs(unsigned SrcReg,
- unsigned DstReg, unsigned SrcIdx,
+ unsigned DstReg, unsigned SubIdx,
MachineRegisterInfo *MRI) {
for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
RE = MRI->reg_end(); RI != RE; ) {
@@ -1140,7 +1142,77 @@
++RI;
MO.setReg(DstReg);
assert(MO.getSubReg() == 0);
- MO.setSubReg(SrcIdx);
+ MO.setSubReg(SubIdx);
+ }
+}
+
+/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are
+/// EXTRACT_SUBREG from the same register and to the same virtual register
+/// with different sub-register indices, attempt to combine the
+/// EXTRACT_SUBREGs and pre-coalesce them. e.g.
+/// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
+/// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6
+/// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5
+/// Since D subregs 5, 6 can combine to a Q register, we can coalesce
+/// reg1026 to reg1029.
+void
+TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
+ unsigned DstReg) {
+ SmallSet<unsigned, 4> Seen;
+ for (unsigned i = 0, e = Srcs.size(); i != e; ++i) {
+ unsigned SrcReg = Srcs[i];
+ if (!Seen.insert(SrcReg))
+ continue;
+
+ // If there are no other uses than extract_subreg which feed into
+ // the reg_sequence, then we might be able to coalesce them.
+ bool CanCoalesce = true;
+ SmallVector<unsigned, 4> SubIndices;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SrcReg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (!UseMI->isExtractSubreg() ||
+ UseMI->getOperand(0).getReg() != DstReg) {
+ CanCoalesce = false;
+ break;
+ }
+ SubIndices.push_back(UseMI->getOperand(2).getImm());
+ }
+
+ if (!CanCoalesce || SubIndices.size() < 2)
+ continue;
+
+ std::sort(SubIndices.begin(), SubIndices.end());
+ unsigned NewSubIdx = 0;
+ if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices,
+ NewSubIdx)) {
+ bool Proceed = true;
+ if (NewSubIdx)
+ for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
+ RE = MRI->reg_end(); RI != RE; ) {
+ MachineOperand &MO = RI.getOperand();
+ ++RI;
+ // FIXME: If the sub-registers do not combine to the whole
+ // super-register, i.e. NewSubIdx != 0, and any of the use has a
+ // sub-register index, then abort the coalescing attempt.
+ if (MO.getSubReg()) {
+ Proceed = false;
+ break;
+ }
+ MO.setReg(DstReg);
+ MO.setSubReg(NewSubIdx);
+ }
+ if (Proceed)
+ for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
+ RE = MRI->reg_end(); RI != RE; ) {
+ MachineOperand &MO = RI.getOperand();
+ ++RI;
+ MO.setReg(DstReg);
+ if (NewSubIdx)
+ MO.setSubReg(NewSubIdx);
+ }
+ }
}
}
@@ -1221,50 +1293,15 @@
for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
unsigned SrcReg = MI->getOperand(i).getReg();
- unsigned SrcIdx = MI->getOperand(i+1).getImm();
- UpdateRegSequenceSrcs(SrcReg, DstReg, SrcIdx, MRI);
+ unsigned SubIdx = MI->getOperand(i+1).getImm();
+ UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI);
}
DEBUG(dbgs() << "Eliminated: " << *MI);
MI->eraseFromParent();
// Try coalescing some EXTRACT_SUBREG instructions.
- Seen.clear();
- for (unsigned i = 0, e = RealSrcs.size(); i != e; ++i) {
- unsigned SrcReg = RealSrcs[i];
- if (!Seen.insert(SrcReg))
- continue;
-
- // If there are no other uses than extract_subreg which feed into
- // the reg_sequence, then we might be able to coalesce them.
- bool CanCoalesce = true;
- SmallVector<unsigned, 4> SubIndices;
- for (MachineRegisterInfo::use_nodbg_iterator
- UI = MRI->use_nodbg_begin(SrcReg),
- UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
- MachineInstr *UseMI = &*UI;
- if (!UseMI->isExtractSubreg() ||
- UseMI->getOperand(0).getReg() != DstReg) {
- CanCoalesce = false;
- break;
- }
- SubIndices.push_back(UseMI->getOperand(2).getImm());
- }
-
- if (!CanCoalesce)
- continue;
-
- // %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
- // %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6
- // %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5
- // Since D subregs 5, 6 can combine to a Q register, we can coalesce
- // reg1026 to reg1029.
- std::sort(SubIndices.begin(), SubIndices.end());
- unsigned NewSubIdx = 0;
- if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices,
- NewSubIdx))
- UpdateRegSequenceSrcs(SrcReg, DstReg, NewSubIdx, MRI);
- }
+ CoalesceExtSubRegs(RealSrcs, DstReg);
}
RegSequences.clear();
Modified: llvm/trunk/test/CodeGen/ARM/reg_sequence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/reg_sequence.ll?rev=103971&r1=103970&r2=103971&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/reg_sequence.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/reg_sequence.ll Mon May 17 15:57:12 2010
@@ -3,6 +3,7 @@
%struct.int16x8_t = type { <8 x i16> }
%struct.int32x4_t = type { <4 x i32> }
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
@@ -149,12 +150,51 @@
ret <8 x i16> %tmp5
}
+define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
+; CHECK: t6:
+; CHECK: vldr.64
+; CHECK: vmov d1, d0
+; CHECK-NEXT: vld2.8 {d0[1], d1[1]}
+ %tmp1 = load <8 x i8>* %B ; <<8 x i8>> [#uses=2]
+ %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
+ %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1]
+ %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 ; <<8 x i8>> [#uses=1]
+ %tmp5 = add <8 x i8> %tmp3, %tmp4 ; <<8 x i8>> [#uses=1]
+ ret <8 x i8> %tmp5
+}
+
+define arm_apcscc void @t7(i32* %iptr, i32* %optr) nounwind {
+entry:
+; CHECK: t7:
+; CHECK: vld2.32
+; CHECK: vst2.32
+; CHECK: vld1.32 {d0, d1},
+; CHECK: vmov q1, q0
+; CHECK-NOT: vmov
+; CHECK: vuzp.32 q0, q1
+; CHECK: vst1.32
+ %0 = bitcast i32* %iptr to i8* ; <i8*> [#uses=2]
+ %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+ %tmp57 = extractvalue %struct.__neon_int32x4x2_t %1, 0 ; <<4 x i32>> [#uses=1]
+ %tmp60 = extractvalue %struct.__neon_int32x4x2_t %1, 1 ; <<4 x i32>> [#uses=1]
+ %2 = bitcast i32* %optr to i8* ; <i8*> [#uses=2]
+ tail call void @llvm.arm.neon.vst2.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60)
+ %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %0) ; <<4 x i32>> [#uses=1]
+ %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> ; <<4 x i32>> [#uses=1]
+ tail call void @llvm.arm.neon.vst1.v4i32(i8* %2, <4 x i32> %4)
+ ret void
+}
+
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly
+
declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly
declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone
declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind
+
declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind
declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
@@ -163,6 +203,8 @@
declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly
+
declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly
declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind
More information about the llvm-commits
mailing list