[PATCH] ARM: correct liveness on ldm/stm combiner
Tim Northover
t.p.northover at gmail.com
Wed Oct 9 08:05:40 PDT 2013
When we had a sequence like:
s1 = VLDRS [r0, 1], Q0<imp-def>
s3 = VLDRS [r0, 2], Q0<imp-use,kill>, Q0<imp-def>
s0 = VLDRS [r0, 0], Q0<imp-use,kill>, Q0<imp-def>
s2 = VLDRS [r0, 4], Q0<imp-use,kill>, Q0<imp-def>
we were gathering the {s0, s1} loads below the s3 load. This is fine, but confused the verifier since now the s3 load had Q0<imp-use> with no definition above it.
This should mark such uses <undef> as well. The liveness structure at the beginning and end of the block is unaffected, and the true sN definitions should prevent any dodgy reorderings being introduced elsewhere.
I know undef isn't ideal for these kinds of things, but the alternative would seem to be removing the <imp-use> entirely, which seems even worse.
Ok to commit?
Cheers.
Tim.
http://llvm-reviews.chandlerc.com/D1868
Files:
lib/Target/ARM/ARMLoadStoreOptimizer.cpp
test/CodeGen/ARM/vldm-liveness.ll
Index: lib/Target/ARM/ARMLoadStoreOptimizer.cpp
===================================================================
--- lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -90,6 +90,10 @@
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
typedef MemOpQueue::iterator MemOpQueueIter;
+ void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
+ const MemOpQueue &MemOps, unsigned DefReg,
+ unsigned RangeBegin, unsigned RangeEnd);
+
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
int Offset, unsigned Base, bool BaseKill, int Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
@@ -360,6 +364,62 @@
return true;
}
+/// \brief Find all instructions using a given imp-def within a range.
+///
+/// We are trying to combine a range of instructions, one of which (located at
+/// position RangeBegin) implicitly defines a register. The final LDM/STM will
+/// be placed at RangeEnd, and so any uses of this definition between RangeStart
+/// and RangeEnd must be modified to use an undefined value.
+///
+/// The live range continues until we find a second definition or one of the
+/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
+/// we must consider all uses and decide which are relevant in a second pass.
+void ARMLoadStoreOpt::findUsesOfImpDef(
+ SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
+ unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
+ std::map<unsigned, MachineOperand *> Uses;
+ unsigned LastLivePos = RangeEnd;
+
+ // First we find all uses of this register with Position between RangeBegin
+ // and RangeEnd, any or all of these could be uses of a definition at
+ // RangeBegin. We also record the latest position a definition at RangeBegin
+ // would be considered live.
+ for (unsigned i = 0; i < MemOps.size(); ++i) {
+ MachineInstr &MI = *MemOps[i].MBBI;
+ unsigned MIPosition = MemOps[i].Position;
+ if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
+ continue;
+
+ // If this instruction defines the register, then any later use will be of
+ // that definition rather than ours.
+ if (MI.definesRegister(DefReg))
+ LastLivePos = std::min(LastLivePos, MIPosition);
+
+ MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
+ if (!UseOp)
+ continue;
+
+ // If this instruction kills the register then (assuming liveness is
+ // correct when we start) we don't need to think about anything after here.
+ if (UseOp->isKill())
+ LastLivePos = std::min(LastLivePos, MIPosition);
+
+ Uses[MIPosition] = UseOp;
+ }
+
+ // Now we traverse the list of all uses, and append the ones that actually use
+ // our definition to the requested list.
+ for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
+ E = Uses.end();
+ I != E; ++I) {
+ // List is sorted by position so once we've found one out of range there
+ // will be no more to consider.
+ if (I->first > LastLivePos)
+ break;
+ UsesOfImpDefs.push_back(I->second);
+ }
+}
+
// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
// success.
void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
@@ -392,6 +452,7 @@
SmallVector<std::pair<unsigned, bool>, 8> Regs;
SmallVector<unsigned, 8> ImpDefs;
+ SmallVector<MachineOperand *, 8> UsesOfImpDefs;
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
unsigned Reg = memOps[i].Reg;
// If we are inserting the merged operation after an operation that
@@ -406,6 +467,12 @@
unsigned DefReg = MO->getReg();
if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
ImpDefs.push_back(DefReg);
+
+ // There may be other uses of the definition between this instruction and
+ // the eventual LDM/STM position. These should be marked undef if the
+ // merge takes place.
+ findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
+ insertPos);
}
}
@@ -418,6 +485,16 @@
// Merge succeeded, update records.
Merges.push_back(prior(Loc));
+
+ // In gathering loads together, we may have moved the imp-def of a register
+ // past one of its uses. This is OK, since we know better than the rest of
+ // LLVM what's OK with ARM loads and stores; but we still have to adjust the
+ // affected uses.
+ for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
+ E = UsesOfImpDefs.end();
+ I != E; ++I)
+ (*I)->setIsUndef();
+
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
// Remove kill flags from any memops that come before insertPos.
if (Regs[i-memOpsBegin].second) {
@@ -432,6 +509,7 @@
memOps[i].isKill = true;
}
MBB.erase(memOps[i].MBBI);
+
// Update this memop to refer to the merged instruction.
// We may need to move kill flags again.
memOps[i].Merged = true;
Index: test/CodeGen/ARM/vldm-liveness.ll
===================================================================
--- /dev/null
+++ test/CodeGen/ARM/vldm-liveness.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple thumbv7-apple-ios -verify-machineinstrs -o - %s | FileCheck %s
+
+; ARM load store optimizer was dealing with a sequence like:
+; s1 = VLDRS [r0, 1], Q0<imp-def>
+; s3 = VLDRS [r0, 2], Q0<imp-use,kill>, Q0<imp-def>
+; s0 = VLDRS [r0, 0], Q0<imp-use,kill>, Q0<imp-def>
+; s2 = VLDRS [r0, 4], Q0<imp-use,kill>, Q0<imp-def>
+;
+; It decided to combine the {s0, s1} loads into a single instruction in the
+; third position. However, this leaves the instruction defining s3 with a stray
+; imp-use of Q0, which is undefined.
+;
+; The verifier catches this, so this test just makes sure that appropriate
+; liveness flags are added.
+
+define arm_aapcs_vfpcc <4 x float> @foo(float* %ptr) {
+; CHECK-LABEL: foo:
+; CHECK: vldr s3, [r0, #8]
+; CHECK: vldmia r0, {s0, s1}
+; CHECK: vldr s2, [r0, #16]
+ %off0 = getelementptr float* %ptr, i32 0
+ %val0 = load float* %off0
+ %off1 = getelementptr float* %ptr, i32 1
+ %val1 = load float* %off1
+ %off4 = getelementptr float* %ptr, i32 4
+ %val4 = load float* %off4
+ %off2 = getelementptr float* %ptr, i32 2
+ %val2 = load float* %off2
+
+ %vec1 = insertelement <4 x float> undef, float %val0, i32 0
+ %vec2 = insertelement <4 x float> %vec1, float %val1, i32 1
+ %vec3 = insertelement <4 x float> %vec2, float %val4, i32 2
+ %vec4 = insertelement <4 x float> %vec3, float %val2, i32 3
+
+ ret <4 x float> %vec4
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1868.1.patch
Type: text/x-patch
Size: 6815 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20131009/e22b6570/attachment.bin>
More information about the llvm-commits
mailing list