[llvm] r256523 - [Thumb] Fix assembler error 'cannot honor width suffix pop {lr}'
Artyom Skrobov via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 28 13:40:46 PST 2015
Author: askrobov
Date: Mon Dec 28 15:40:45 2015
New Revision: 256523
URL: http://llvm.org/viewvc/llvm-project?rev=256523&view=rev
Log:
[Thumb] Fix assembler error 'cannot honor width suffix pop {lr}'
Summary:
* avoid generating POP {LR} in Thumb1 epilogues
* combine MOV LR, Rx + BX LR -> BX Rx in a peephole optimization pass
* combine POP {LR} + B + BX LR -> POP {PC} on v5T+
Test cases by Ana Pazos
Differential Revision: http://reviews.llvm.org/D15707
Modified:
llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp
llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
Modified: llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp?rev=256523&r1=256522&r2=256523&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp Mon Dec 28 15:40:45 2015
@@ -151,6 +151,7 @@ namespace {
bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+ bool CombineMovBx(MachineBasicBlock &MBB);
};
char ARMLoadStoreOpt::ID = 0;
}
@@ -1825,6 +1826,30 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM
return false;
}
+bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ if (MBBI == MBB.begin() || MBBI == MBB.end() ||
+ MBBI->getOpcode() != ARM::tBX_RET)
+ return false;
+
+ MachineBasicBlock::iterator Prev = MBBI;
+ --Prev;
+ if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
+ return false;
+
+ for (auto Use : Prev->uses())
+ if (Use.isKill()) {
+ AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
+ .addReg(Use.getReg(), RegState::Kill))
+ .copyImplicitOps(&*MBBI);
+ MBB.erase(MBBI);
+ MBB.erase(Prev);
+ return true;
+ }
+
+ llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
+}
+
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
MF = &Fn;
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
@@ -1844,6 +1869,8 @@ bool ARMLoadStoreOpt::runOnMachineFuncti
Modified |= LoadStoreMultipleOpti(MBB);
if (STI->hasV5TOps())
Modified |= MergeReturnIntoLDM(MBB);
+ if (isThumb1)
+ Modified |= CombineMovBx(MBB);
}
Allocator.DestroyAll();
Modified: llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp?rev=256523&r1=256522&r2=256523&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp Mon Dec 28 15:40:45 2015
@@ -433,14 +433,16 @@ bool Thumb1FrameLowering::emitPopSpecial
auto MBBI = MBB.getFirstTerminator();
bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize;
if (CanRestoreDirectly) {
- if (MBBI != MBB.end())
+ if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB)
CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET ||
MBBI->getOpcode() == ARM::tPOP_RET);
else {
- assert(MBB.back().getOpcode() == ARM::tPOP);
+ auto MBBI_prev = MBBI;
+ MBBI_prev--;
+ assert(MBBI_prev->getOpcode() == ARM::tPOP);
assert(MBB.succ_size() == 1);
if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET)
- MBBI--; // Replace the final tPOP with a tPOP_RET.
+ MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET.
else
CanRestoreDirectly = false;
}
@@ -454,8 +456,7 @@ bool Thumb1FrameLowering::emitPopSpecial
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)));
// Copy implicit ops and popped registers, if any.
for (auto MO: MBBI->operands())
- if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
- MO.getReg() != ARM::LR)
+ if (MO.isReg() && (MO.isImplicit() || MO.isDef()))
MIB.addOperand(MO);
MIB.addReg(ARM::PC, RegState::Define);
// Erase the old instruction (tBX_RET or tPOP).
@@ -529,32 +530,26 @@ bool Thumb1FrameLowering::emitPopSpecial
.addReg(PopReg, RegState::Kill));
}
- bool AddBx = false;
- if (MBBI == MBB.end()) {
- MachineInstr& Pop = MBB.back();
- assert(Pop.getOpcode() == ARM::tPOP);
- Pop.RemoveOperand(Pop.findRegisterDefOperandIdx(ARM::LR));
- } else if (MBBI->getOpcode() == ARM::tPOP_RET) {
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) {
// We couldn't use the direct restoration above, so
// perform the opposite conversion: tPOP_RET to tPOP.
MachineInstrBuilder MIB =
AddDefaultPred(
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)));
- unsigned Popped = 0;
+ bool Popped = false;
for (auto MO: MBBI->operands())
if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
MO.getReg() != ARM::PC) {
MIB.addOperand(MO);
if (!MO.isImplicit())
- Popped++;
+ Popped = true;
}
// Is there anything left to pop?
if (!Popped)
MBB.erase(MIB.getInstr());
// Erase the old instruction.
MBB.erase(MBBI);
- MBBI = MBB.end();
- AddBx = true;
+ MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)));
}
assert(PopReg && "Do not know how to get LR");
@@ -563,31 +558,14 @@ bool Thumb1FrameLowering::emitPopSpecial
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
- if (!TemporaryReg && MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET) {
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
- .addReg(PopReg, RegState::Kill);
- AddDefaultPred(MIB);
- MIB.copyImplicitOps(&*MBBI);
- // erase the old tBX_RET instruction
- MBB.erase(MBBI);
- return true;
- }
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(PopReg, RegState::Kill));
- if (AddBx && !TemporaryReg) {
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
- .addReg(PopReg, RegState::Kill));
- } else {
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(ARM::LR, RegState::Define)
- .addReg(PopReg, RegState::Kill));
- }
- if (TemporaryReg) {
+ if (TemporaryReg)
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
.addReg(PopReg, RegState::Define)
.addReg(TemporaryReg, RegState::Kill));
- if (AddBx)
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET)));
- }
return true;
}
@@ -645,28 +623,34 @@ restoreCalleeSavedRegisters(MachineBasic
MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
AddDefaultPred(MIB);
- bool NumRegs = false;
+ bool NeedsPop = false;
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- if (Reg == ARM::LR && MBB.succ_empty()) {
- // Special epilogue for vararg functions. See emitEpilogue
- if (isVarArg)
- continue;
- // ARMv4T requires BX, see emitEpilogue
- if (!STI.hasV5TOps())
+ if (Reg == ARM::LR) {
+ if (MBB.succ_empty()) {
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (isVarArg)
+ continue;
+ // ARMv4T requires BX, see emitEpilogue
+ if (!STI.hasV5TOps())
+ continue;
+ Reg = ARM::PC;
+ (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+ if (MI != MBB.end())
+ MIB.copyImplicitOps(&*MI);
+ MI = MBB.erase(MI);
+ } else
+ // LR may only be popped into PC, as part of return sequence.
+ // If this isn't the return sequence, we'll need emitPopSpecialFixUp
+ // to restore LR the hard way.
continue;
- Reg = ARM::PC;
- (*MIB).setDesc(TII.get(ARM::tPOP_RET));
- if (MI != MBB.end())
- MIB.copyImplicitOps(&*MI);
- MI = MBB.erase(MI);
}
MIB.addReg(Reg, getDefRegState(true));
- NumRegs = true;
+ NeedsPop = true;
}
// It's illegal to emit pop instruction without operands.
- if (NumRegs)
+ if (NeedsPop)
MBB.insert(MI, &*MIB);
else
MF.DeleteMachineInstr(MIB);
Modified: llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll?rev=256523&r1=256522&r2=256523&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll Mon Dec 28 15:40:45 2015
@@ -159,6 +159,7 @@ declare i32 @doSomething(i32, i32*)
; DISABLE-V4T-NEXT: pop {r1}
; DISABLE-V4T-NEXT: bx r1
;
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
; ENABLE-NEXT: bx lr
define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
entry:
@@ -270,7 +271,10 @@ for.end:
; Next BB.
; SUM << 3.
; CHECK: lsls [[SUM]], [[SUM]], #3
-; ENABLE-NEXT: pop {r4, lr}
+; ENABLE-V5T-NEXT: pop {r4, pc}
+; ENABLE-V4T-NEXT: pop {r4}
+; ENABLE-V4T-NEXT: pop {r1}
+; ENABLE-V4T-NEXT: bx r1
;
; Duplicated epilogue.
; DISABLE-V5T: pop {r4, pc}
@@ -285,6 +289,7 @@ for.end:
; DISABLE-V4T-NEXT: pop {r1}
; DISABLE-V4T-NEXT: bx r1
;
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
; ENABLE-NEXT: bx lr
define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
entry:
@@ -350,7 +355,10 @@ declare void @somethingElse(...)
; Next BB.
; SUM << 3.
; CHECK: lsls [[SUM]], [[SUM]], #3
-; ENABLE: pop {r4, lr}
+; ENABLE-V5T-NEXT: pop {r4, pc}
+; ENABLE-V4T-NEXT: pop {r4}
+; ENABLE-V4T-NEXT: pop {r1}
+; ENABLE-V4T-NEXT: bx r1
;
; Duplicated epilogue.
; DISABLE-V5T: pop {r4, pc}
@@ -365,6 +373,7 @@ declare void @somethingElse(...)
; DISABLE-V4T-NEXT: pop {r1}
; DISABLE-V4T-NEXT: bx r1
;
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
; ENABLE-NEXT: bx lr
define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
entry:
@@ -431,7 +440,10 @@ entry:
;
; Next BB.
; CHECK: movs r0, #0
-; ENABLE-NEXT: pop {r4, lr}
+; ENABLE-V5T-NEXT: pop {r4, pc}
+; ENABLE-V4T-NEXT: pop {r4}
+; ENABLE-V4T-NEXT: pop {r1}
+; ENABLE-V4T-NEXT: bx r1
;
; Duplicated epilogue.
; DISABLE-V5T-NEXT: pop {r4, pc}
@@ -446,6 +458,7 @@ entry:
; DISABLE-V4T-NEXT: pop {r1}
; DISABLE-V4T-NEXT: bx r1
;
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
; ENABLE-NEXT: bx lr
define i32 @inlineAsm(i32 %cond, i32 %N) {
entry:
@@ -506,7 +519,10 @@ if.end:
; CHECK-NEXT: lsls r0, r0, #3
;
; ENABLE-NEXT: add sp, #16
-; ENABLE-NEXT: pop {[[TMP]], lr}
+; ENABLE-V5T-NEXT: pop {[[TMP]], pc}
+; ENABLE-V4T-NEXT: pop {[[TMP]]}
+; ENABLE-V4T-NEXT: pop {r1}
+; ENABLE-V4T-NEXT: bx r1
;
; Duplicated epilogue.
; DISABLE-V5T-NEXT: add sp, #16
@@ -518,6 +534,7 @@ if.end:
; CHECK: lsls r0, r1, #1
;
; Epilogue code.
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
; ENABLE-NEXT: bx lr
;
; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
@@ -586,4 +603,89 @@ if.end:
declare void @abort() #0
+define i32 @b_to_bx(i32 %value) {
+; CHECK-LABEL: b_to_bx:
+; DISABLE: push {r7, lr}
+; CHECK: cmp r1, #49
+; CHECK-NEXT: bgt [[ELSE_LABEL:LBB[0-9_]+]]
+; ENABLE: push {r7, lr}
+
+; CHECK: bl
+; DISABLE-V5-NEXT: pop {r7, pc}
+; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]]
+
+; ENABLE-V5-NEXT: pop {r7, pc}
+; ENABLE-V4-NEXT: pop {r7}
+; ENABLE-V4-NEXT: pop {r1}
+; ENABLE-V4-NEXT: bx r1
+
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; CHECK-NEXT: lsls r0, r1, #1
+; DISABLE-V5-NEXT: pop {r7, pc}
+; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
+; DISABLE-V4T-NEXT: pop {r7}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
+; ENABLE-NEXT: bx lr
+
+entry:
+ %cmp = icmp slt i32 %value, 50
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %div = sdiv i32 5000, %value
+ br label %if.end
+
+if.else:
+ %mul = shl nsw i32 %value, 1
+ br label %if.end
+
+if.end:
+ %value.addr.0 = phi i32 [ %div, %if.then ], [ %mul, %if.else ]
+ ret i32 %value.addr.0
+}
+
+define i1 @beq_to_bx(i32* %y, i32 %head) {
+; CHECK-LABEL: beq_to_bx:
+; DISABLE: push {r4, lr}
+; CHECK: cmp r2, #0
+; CHECK-NEXT: beq [[EXIT_LABEL:LBB[0-9_]+]]
+; ENABLE: push {r4, lr}
+
+; CHECK: tst r3, r4
+; ENABLE-NEXT: pop {r4}
+; ENABLE-NEXT: pop {r3}
+; ENABLE-NEXT: mov lr, r3
+; CHECK-NEXT: beq [[EXIT_LABEL]]
+
+; CHECK: str r1, [r2]
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: [[EXIT_LABEL]]: @ %cleanup
+; ENABLE-NEXT: bx lr
+; DISABLE-V5-NEXT: pop {r4, pc}
+; DISABLE-V4T-NEXT: pop {r4}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+
+entry:
+ %cmp = icmp eq i32* %y, null
+ br i1 %cmp, label %cleanup, label %if.end
+
+if.end:
+ %z = load i32, i32* %y, align 4
+ %and = and i32 %z, 2
+ %cmp2 = icmp eq i32 %and, 0
+ br i1 %cmp2, label %cleanup, label %if.end4
+
+if.end4:
+ store i32 %head, i32* %y, align 4
+ br label %cleanup
+
+cleanup:
+ %retval.0 = phi i1 [ 0, %if.end4 ], [ 1, %entry ], [ 1, %if.end ]
+ ret i1 %retval.0
+}
+
attributes #0 = { noreturn nounwind }
More information about the llvm-commits
mailing list