[llvm] r364288 - [ARM] DLS/LE low-overhead loop code generation
Sam Parker via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 25 03:45:52 PDT 2019
Author: sam_parker
Date: Tue Jun 25 03:45:51 2019
New Revision: 364288
URL: http://llvm.org/viewvc/llvm-project?rev=364288&view=rev
Log:
[ARM] DLS/LE low-overhead loop code generation
Introduce three pseudo instructions to be used during DAG ISel to
represent v8.1-m low-overhead loops. One maps to set_loop_iterations
while loop_decrement_reg is lowered to two, so that we can separate
the decrement and branching operations. The pseudo instructions are
expanded pre-emission, where we can still decide whether we actually
want to generate a low-overhead loop, in a new pass:
ARMLowOverheadLoops. The pass currently bails, reverting to an sub,
icmp and br, in the cases where a call or stack spill/restore happens
between the decrement and branching instructions, or if the loop is
too large.
Differential Revision: https://reviews.llvm.org/D63476
Added:
llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp
llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir
llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir
llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir
llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir
llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir
llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir
llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir
Modified:
llvm/trunk/lib/Target/ARM/ARM.h
llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td
llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
llvm/trunk/lib/Target/ARM/CMakeLists.txt
llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll
llvm/trunk/test/Transforms/HardwareLoops/ARM/calls.ll
llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll
llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll
Modified: llvm/trunk/lib/Target/ARM/ARM.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.h?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARM.h (original)
+++ llvm/trunk/lib/Target/ARM/ARM.h Tue Jun 25 03:45:51 2019
@@ -35,7 +35,7 @@ class MachineInstr;
class MCInst;
class PassRegistry;
-
+FunctionPass *createARMLowOverheadLoopsPass();
Pass *createARMParallelDSPPass();
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -66,6 +66,7 @@ void initializeARMExpandPseudoPass(PassR
void initializeThumb2SizeReducePass(PassRegistry &);
void initializeThumb2ITBlockPass(PassRegistry &);
void initializeMVEVPTBlockPass(PassRegistry &);
+void initializeARMLowOverheadLoopsPass(PassRegistry &);
} // end namespace llvm
Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Tue Jun 25 03:45:51 2019
@@ -2986,6 +2986,36 @@ void ARMDAGToDAGISel::Select(SDNode *N)
unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
if (InFlag.getOpcode() == ARMISD::CMPZ) {
+ if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+ SDValue Int = InFlag.getOperand(0);
+ uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
+
+ // Handle low-overhead loops.
+ if (ID == Intrinsic::loop_decrement_reg) {
+ SDValue Elements = Int.getOperand(2);
+ SDValue Size = CurDAG->getTargetConstant(
+ cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
+ MVT::i32);
+
+ SDValue Args[] = { Elements, Size, Int.getOperand(0) };
+ SDNode *LoopDec =
+ CurDAG->getMachineNode(ARM::t2LoopDec, dl,
+ CurDAG->getVTList(MVT::i32, MVT::Other),
+ Args);
+ ReplaceUses(Int.getNode(), LoopDec);
+
+ SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
+ SDNode *LoopEnd =
+ CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
+
+ ReplaceUses(N, LoopEnd);
+ CurDAG->RemoveDeadNode(N);
+ CurDAG->RemoveDeadNode(InFlag.getNode());
+ CurDAG->RemoveDeadNode(Int.getNode());
+ return;
+ }
+ }
+
bool SwitchEQNEToPLMI;
SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
InFlag = N->getOperand(4);
Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Tue Jun 25 03:45:51 2019
@@ -5135,6 +5135,7 @@ class t2LOL<dag oops, dag iops, string a
let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
}
+let isNotDuplicable = 1 in {
def t2WLS : t2LOL<(outs GPRlr:$LR),
(ins rGPR:$Rn, wlslabel_u11:$label),
"wls", "$LR, $Rn, $label"> {
@@ -5178,6 +5179,21 @@ def t2LE : t2LOL<(outs ), (ins lelabel_u
let Inst{10-1} = label{10-1};
}
+def t2DoLoopStart :
+ t2PseudoInst<(outs), (ins rGPR:$elts), 4, IIC_Br,
+ [(int_set_loop_iterations rGPR:$elts)]>, Sched<[WriteBr]>;
+
+def t2LoopDec :
+ t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size),
+ 4, IIC_Br, []>, Sched<[WriteBr]>;
+
+let isBranch = 1, isTerminator = 1, hasSideEffects = 1 in
+def t2LoopEnd :
+ t2PseudoInst<(outs), (ins GPRlr:$elts, brtarget:$target),
+ 8, IIC_Br, []>, Sched<[WriteBr]>;
+
+} // end isNotDuplicable
+
class CS<string iname, bits<4> opcode, list<dag> pattern=[]>
: V8_1MI<(outs rGPR:$Rd), (ins GPRwithZR:$Rn, GPRwithZR:$Rm, pred_noal:$fcond),
AddrModeNone, NoItinerary, iname, "$Rd, $Rn, $Rm, $fcond", "", pattern> {
Added: llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp?rev=364288&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp (added)
+++ llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp Tue Jun 25 03:45:51 2019
@@ -0,0 +1,295 @@
+//===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Finalize v8.1-m low-overhead loops by converting the associated pseudo
+/// instructions into machine operations.
+/// The expectation is that the loop contains three pseudo instructions:
+/// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
+/// form should be in the preheader, whereas the while form should be in the
+/// preheaders only predecessor. TODO: Could DoLoopStart get moved into the
+/// pre-preheader?
+/// - t2LoopDec - placed within in the loop body.
+/// - t2LoopEnd - the loop latch terminator.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMBasicBlockInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-low-overhead-loops"
+#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
+
+namespace {
+
+ class ARMLowOverheadLoops : public MachineFunctionPass {
+ const ARMBaseInstrInfo *TII = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
+
+ public:
+ static char ID;
+
+ ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ bool ProcessLoop(MachineLoop *ML);
+
+ void Expand(MachineLoop *ML, MachineInstr *Start,
+ MachineInstr *Dec, MachineInstr *End, bool Revert);
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ StringRef getPassName() const override {
+ return ARM_LOW_OVERHEAD_LOOPS_NAME;
+ }
+ };
+}
+
+char ARMLowOverheadLoops::ID = 0;
+
+INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
+ false, false)
+
+bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
+ //if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
+ //return false;
+
+ LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n");
+
+ auto &MLI = getAnalysis<MachineLoopInfo>();
+ MRI = &MF.getRegInfo();
+ TII = static_cast<const ARMBaseInstrInfo*>(
+ MF.getSubtarget().getInstrInfo());
+ BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
+ BBUtils->computeAllBlockSizes();
+
+ bool Changed = false;
+ for (auto ML : MLI) {
+ if (!ML->getParentLoop())
+ Changed |= ProcessLoop(ML);
+ }
+ return Changed;
+}
+
+bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
+
+ bool Changed = false;
+
+ // Process inner loops first.
+ for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
+ Changed |= ProcessLoop(*I);
+
+ LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
+
+ auto IsLoopStart = [](MachineInstr &MI) {
+ return MI.getOpcode() == ARM::t2DoLoopStart;
+ };
+
+ auto SearchForStart =
+ [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr* {
+ for (auto &MI : *MBB) {
+ if (IsLoopStart(MI))
+ return &MI;
+ }
+ return nullptr;
+ };
+
+ MachineInstr *Start = nullptr;
+ MachineInstr *Dec = nullptr;
+ MachineInstr *End = nullptr;
+ bool Revert = false;
+
+ if (auto *Preheader = ML->getLoopPreheader())
+ Start = SearchForStart(Preheader);
+
+ // Find the low-overhead loop components and decide whether or not to fall
+ // back to a normal loop.
+ for (auto *MBB : reverse(ML->getBlocks())) {
+ for (auto &MI : *MBB) {
+ if (MI.getOpcode() == ARM::t2LoopDec)
+ Dec = &MI;
+ else if (MI.getOpcode() == ARM::t2LoopEnd)
+ End = &MI;
+
+ if (!Dec)
+ continue;
+
+ // TODO: Though the call will require LE to execute again, does this
+ // mean we should revert? Always executing LE hopefully should be faster
+ // than performing a sub,cmp,br or even subs,br.
+ if (MI.getDesc().isCall())
+ Revert = true;
+
+ // If we find that we load/store LR between LoopDec and LoopEnd, expect
+ // that the decremented value has been spilled to the stack. Because
+ // this value isn't actually going to be produced until the latch, by LE,
+ // we would need to generate a real sub. The value is also likely to be
+ // reloaded for use of LoopEnd - in which in case we'd need to perform
+ // an add because it gets negated again by LE! The other option is to
+ // then generate the other form of LE which doesn't perform the sub.
+ if (MI.mayLoad() || MI.mayStore())
+ Revert =
+ MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR;
+ }
+
+ if (Dec && End && Revert)
+ break;
+ }
+
+ if (Start || Dec || End) {
+ if (!Start || !Dec || !End)
+ report_fatal_error("Failed to find all loop components");
+ } else {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
+ return Changed;
+ }
+
+ if (!End->getOperand(1).isMBB() ||
+ End->getOperand(1).getMBB() != ML->getHeader())
+ report_fatal_error("Expected LoopEnd to target Loop Header");
+
+ // The LE instructions has 12-bits for the label offset.
+ if (!BBUtils->isBBInRange(End, ML->getHeader(), 4096)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Too large for a low-overhead loop!\n");
+ Revert = true;
+ }
+
+ LLVM_DEBUG(dbgs() << "ARM Loops:\n - Found Loop Start: " << *Start
+ << " - Found Loop Dec: " << *Dec
+ << " - Found Loop End: " << *End);
+
+ Expand(ML, Start, Dec, End, Revert);
+ return true;
+}
+
+void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
+ MachineInstr *Dec, MachineInstr *End,
+ bool Revert) {
+
+ auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) {
+ // The trip count should already been held in LR since the instructions
+ // within the loop can only read and write to LR. So, there should be a
+ // mov to setup the count. WLS/DLS perform this move, so find the original
+ // and delete it - inserting WLS/DLS in its place.
+ MachineBasicBlock *MBB = Start->getParent();
+ MachineInstr *InsertPt = Start;
+ for (auto &I : MRI->def_instructions(ARM::LR)) {
+ if (I.getParent() != MBB)
+ continue;
+
+ // Always execute.
+ if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL)
+ continue;
+
+ // Only handle move reg, if the trip count it will need moving into a reg
+ // before the setup instruction anyway.
+ if (!I.getDesc().isMoveReg() ||
+ !I.getOperand(1).isIdenticalTo(Start->getOperand(0)))
+ continue;
+ InsertPt = &I;
+ break;
+ }
+
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(ARM::t2DLS));
+ if (InsertPt != Start)
+ InsertPt->eraseFromParent();
+
+ MIB.addDef(ARM::LR);
+ MIB.add(Start->getOperand(0));
+ LLVM_DEBUG(dbgs() << "ARM Loops: Inserted DLS: " << *MIB);
+ Start->eraseFromParent();
+ };
+
+ // Combine the LoopDec and LoopEnd instructions into LE(TP).
+ auto ExpandLoopEnd = [this](MachineLoop *ML, MachineInstr *Dec,
+ MachineInstr *End) {
+ MachineBasicBlock *MBB = End->getParent();
+ MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
+ TII->get(ARM::t2LEUpdate));
+ MIB.addDef(ARM::LR);
+ MIB.add(End->getOperand(0));
+ MIB.add(End->getOperand(1));
+ LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
+
+ // If there is a branch after loop end, which branches to the fallthrough
+ // block, remove the branch.
+ MachineBasicBlock *Latch = End->getParent();
+ MachineInstr *Terminator = &Latch->instr_back();
+ if (End != Terminator) {
+ MachineBasicBlock *Exit = ML->getExitBlock();
+ if (Latch->isLayoutSuccessor(Exit)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing loop exit branch: "
+ << *Terminator);
+ Terminator->eraseFromParent();
+ }
+ }
+ End->eraseFromParent();
+ Dec->eraseFromParent();
+ };
+
+ // Generate a subs, or sub and cmp, and a branch instead of an LE.
+ // TODO: Check flags so that we can possibly generate a subs.
+ auto ExpandBranch = [this](MachineInstr *Dec, MachineInstr *End) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub, cmp, br.\n");
+ // Create sub
+ MachineBasicBlock *MBB = Dec->getParent();
+ MachineInstrBuilder MIB = BuildMI(*MBB, Dec, Dec->getDebugLoc(),
+ TII->get(ARM::t2SUBri));
+ MIB.addDef(ARM::LR);
+ MIB.add(Dec->getOperand(1));
+ MIB.add(Dec->getOperand(2));
+ MIB.addImm(ARMCC::AL);
+ MIB.addReg(0);
+ MIB.addReg(0);
+
+ // Create cmp
+ MBB = End->getParent();
+ MIB = BuildMI(*MBB, End, End->getDebugLoc(), TII->get(ARM::t2CMPri));
+ MIB.addReg(ARM::LR);
+ MIB.addImm(0);
+ MIB.addImm(ARMCC::AL);
+
+ // Create bne
+ MIB = BuildMI(*MBB, End, End->getDebugLoc(), TII->get(ARM::t2Bcc));
+ MIB.add(End->getOperand(1)); // branch target
+ MIB.addImm(ARMCC::NE); // condition code
+ End->eraseFromParent();
+ Dec->eraseFromParent();
+ };
+
+ if (Revert) {
+ Start->eraseFromParent();
+ ExpandBranch(Dec, End);
+ } else {
+ ExpandLoopStart(ML, Start);
+ ExpandLoopEnd(ML, Dec, End);
+ }
+}
+
+FunctionPass *llvm::createARMLowOverheadLoopsPass() {
+ return new ARMLowOverheadLoops();
+}
Modified: llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp Tue Jun 25 03:45:51 2019
@@ -96,6 +96,7 @@ extern "C" void LLVMInitializeARMTarget(
initializeARMExpandPseudoPass(Registry);
initializeThumb2SizeReducePass(Registry);
initializeMVEVPTBlockPass(Registry);
+ initializeARMLowOverheadLoopsPass(Registry);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -446,6 +447,9 @@ bool ARMPassConfig::addPreISel() {
MergeExternalByDefault));
}
+ if (TM->getOptLevel() != CodeGenOpt::None)
+ addPass(createHardwareLoopsPass());
+
return false;
}
@@ -526,4 +530,5 @@ void ARMPassConfig::addPreEmitPass() {
addPass(createARMOptimizeBarriersPass());
addPass(createARMConstantIslandPass());
+ addPass(createARMLowOverheadLoopsPass());
}
Modified: llvm/trunk/lib/Target/ARM/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/CMakeLists.txt?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/ARM/CMakeLists.txt Tue Jun 25 03:45:51 2019
@@ -39,6 +39,7 @@ add_llvm_target(ARMCodeGen
ARMLegalizerInfo.cpp
ARMParallelDSP.cpp
ARMLoadStoreOptimizer.cpp
+ ARMLowOverheadLoops.cpp
ARMMCInstLower.cpp
ARMMachineFunctionInfo.cpp
ARMMacroFusion.cpp
Modified: llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll Tue Jun 25 03:45:51 2019
@@ -49,6 +49,10 @@
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Exception handling preparation
; CHECK-NEXT: Merge internal globals
+; CHECK-NEXT: Dominator Tree Construction
+; CHECK-NEXT: Natural Loop Information
+; CHECK-NEXT: Scalar Evolution Analysis
+; CHECK-NEXT: Hardware Loop Insertion
; CHECK-NEXT: Safe Stack instrumentation pass
; CHECK-NEXT: Insert stack protectors
; CHECK-NEXT: Module Verifier
@@ -138,6 +142,9 @@
; CHECK-NEXT: Unpack machine instruction bundles
; CHECK-NEXT: optimise barriers pass
; CHECK-NEXT: ARM constant island placement and branch shortening pass
+; CHECK-NEXT: MachineDominator Tree Construction
+; CHECK-NEXT: Machine Natural Loop Construction
+; CHECK-NEXT: ARM Low Overhead Loops pass
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis
Modified: llvm/trunk/test/Transforms/HardwareLoops/ARM/calls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/calls.ll?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/calls.ll (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/calls.ll Tue Jun 25 03:45:51 2019
@@ -3,7 +3,7 @@
; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+fp-armv8,+fullfp16 -hardware-loops -disable-arm-loloops=false %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP64
; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -hardware-loops -disable-arm-loloops=false %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -hardware-loops -disable-arm-loloops=false %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
-
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+lob,+mve.fp -disable-arm-loloops=false %s -o - | FileCheck %s --check-prefix=CHECK-LLC
; CHECK-LABEL: skip_call
; CHECK-NOT: call void @llvm.set.loop.iterations
@@ -41,6 +41,15 @@ while.end:
; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
; CHECK: br i1 [[CMP]], label %loop, label %exit
+; CHECK-LLC-LABEL: test_target_specific:
+; CHECK-LLC: mov.w lr, #50
+; CHECK-LLC: dls lr, lr
+; CHECK-LLC-NOT: mov lr,
+; CHECK-LLC: [[LOOP_HEADER:\.LBB[0-9_]+]]:
+; CHECK-LLC: le lr, [[LOOP_HEADER]]
+; CHECK-LLC-NOT: b .
+; CHECK-LLC: @ %exit
+
define i32 @test_target_specific(i32* %a, i32* %b) {
entry:
br label %loop
@@ -86,6 +95,17 @@ exit:
; CHECK-MVE-NOT: call void @llvm.set.loop.iterations
; CHECK-FP: call void @llvm.set.loop.iterations.i32(i32 100)
; CHECK-MVEFP: call void @llvm.set.loop.iterations.i32(i32 100)
+
+; CHECK-LLC-LABEL: test_fabs:
+; CHECK-LLC: mov.w lr, #100
+; CHECK-LLC: dls lr, lr
+; CHECK-LLC-NOT: mov lr,
+; CHECK-LLC: [[LOOP_HEADER:\.LBB[0-9_]+]]:
+; CHECK-LLC-NOT: bl
+; CHECK-LLC: le lr, [[LOOP_HEADER]]
+; CHECK-LLC-NOT: b .
+; CHECK-LLC: @ %exit
+
define float @test_fabs(float* %a) {
entry:
br label %loop
Added: llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir?rev=364288&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir (added)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir Tue Jun 25 03:45:51 2019
@@ -0,0 +1,115 @@
+# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
+# CHECK: $lr = tMOVr $r0, 13, $noreg
+# CHECK: $lr = t2DLS killed $r0
+# CHECK: $lr = t2LEUpdate renamable $lr, %bb.1
+
+--- |
+ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv8.1m.main"
+
+ define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
+ entry:
+ %scevgep = getelementptr i32, i32* %q, i32 -1
+ %scevgep3 = getelementptr i32, i32* %p, i32 -1
+ call void @llvm.set.loop.iterations.i32(i32 %n)
+ br label %while.body
+
+ while.body:
+ %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ]
+ %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ]
+ %0 = phi i32 [ %n, %entry ], [ %2, %while.body ]
+ %scevgep2 = getelementptr i32, i32* %lsr.iv, i32 1
+ %scevgep6 = getelementptr i32, i32* %lsr.iv4, i32 1
+ %1 = load i32, i32* %scevgep2, align 4
+ store i32 %1, i32* %scevgep6, align 4
+ %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
+ %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
+ %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+ %3 = icmp ne i32 %2, 0
+ br i1 %3, label %while.body, label %while.end
+
+ while.end:
+ ret i32 0
+ }
+
+ declare void @llvm.set.loop.iterations.i32(i32) #0
+ declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
+ declare void @llvm.stackprotector(i8*, i8**) #1
+
+ attributes #0 = { noduplicate nounwind }
+ attributes #1 = { nounwind }
+
+...
+---
+name: do_copy
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+registers: []
+liveins:
+ - { reg: '$r0', virtual-reg: '' }
+ - { reg: '$r1', virtual-reg: '' }
+ - { reg: '$r2', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 8
+ offsetAdjustment: 0
+ maxAlignment: 4
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants: []
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x80000000)
+ liveins: $r0, $r1, $r2, $r7, $lr
+
+ $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r7, killed $lr
+ frame-setup CFI_INSTRUCTION def_cfa_offset 8
+ frame-setup CFI_INSTRUCTION offset $lr, -4
+ frame-setup CFI_INSTRUCTION offset $r7, -8
+ $lr = tMOVr $r0, 13, $noreg
+ t2DoLoopStart killed $r0
+ renamable $r0 = t2SUBri killed renamable $r1, 4, 14, $noreg, $noreg
+ renamable $r1 = t2SUBri killed renamable $r2, 4, 14, $noreg, $noreg
+
+ bb.1.while.body:
+ successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ liveins: $lr, $r0, $r1
+
+ renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep2)
+ early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep6)
+ renamable $lr = t2LoopDec killed renamable $lr, 1
+ t2LoopEnd renamable $lr, %bb.1
+ t2B %bb.2, 14, $noreg
+
+ bb.2.while.end:
+ $r0 = t2MOVi 0, 14, $noreg, $noreg
+ $sp = t2LDMIA_RET $sp, 14, $noreg, def $r7, def $pc, implicit killed $r0
+
+...
Added: llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir?rev=364288&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir (added)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir Tue Jun 25 03:45:51 2019
@@ -0,0 +1,145 @@
+# RUN: llc -mtriple=armv8.1m.main -mattr=+lob -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
+# CHECK: for.body:
+# CHECK-NOT: t2DLS
+# CHECK-NOT: t2LEUpdate
+
+--- |
+ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv8.1m.main-unknown-unknown"
+
+ ; Function Attrs: norecurse nounwind
+ define dso_local arm_aapcscc void @massive(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr {
+ entry:
+ %cmp8 = icmp eq i32 %N, 0
+ br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
+
+ for.body.preheader: ; preds = %entry
+ %scevgep = getelementptr i32, i32* %a, i32 -1
+ %scevgep4 = getelementptr i32, i32* %c, i32 -1
+ %scevgep8 = getelementptr i32, i32* %b, i32 -1
+ call void @llvm.set.loop.iterations.i32(i32 %N)
+ br label %for.body
+
+ for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+ for.body: ; preds = %for.body, %for.body.preheader
+ %lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
+ %lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
+ %lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
+ %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ]
+ %size = call i32 @llvm.arm.space(i32 4096, i32 undef)
+ %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1
+ %1 = load i32, i32* %scevgep11, align 4, !tbaa !3
+ %scevgep7 = getelementptr i32, i32* %lsr.iv5, i32 1
+ %2 = load i32, i32* %scevgep7, align 4, !tbaa !3
+ %mul = mul nsw i32 %2, %1
+ %scevgep3 = getelementptr i32, i32* %lsr.iv1, i32 1
+ store i32 %mul, i32* %scevgep3, align 4, !tbaa !3
+ %scevgep2 = getelementptr i32, i32* %lsr.iv1, i32 1
+ %scevgep6 = getelementptr i32, i32* %lsr.iv5, i32 1
+ %scevgep10 = getelementptr i32, i32* %lsr.iv9, i32 1
+ %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+ %4 = icmp ne i32 %3, 0
+ br i1 %4, label %for.body, label %for.cond.cleanup
+ }
+
+ declare i32 @llvm.arm.space(i32, i32) #1
+ declare void @llvm.set.loop.iterations.i32(i32) #2
+ declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
+
+ attributes #1 = { nounwind }
+ attributes #2 = { noduplicate nounwind }
+
+ !llvm.module.flags = !{!0, !1}
+ !llvm.ident = !{!2}
+
+ !0 = !{i32 1, !"wchar_size", i32 4}
+ !1 = !{i32 1, !"min_enum_size", i32 4}
+ !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"}
+ !3 = !{!4, !4, i64 0}
+ !4 = !{!"int", !5, i64 0}
+ !5 = !{!"omnipotent char", !6, i64 0}
+ !6 = !{!"Simple C/C++ TBAA"}
+
+...
+---
+name: massive
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: false
+hasWinCFI: false
+registers: []
+liveins:
+ - { reg: '$r0', virtual-reg: '' }
+ - { reg: '$r1', virtual-reg: '' }
+ - { reg: '$r2', virtual-reg: '' }
+ - { reg: '$r3', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 8
+ offsetAdjustment: 0
+ maxAlignment: 4
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants: []
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x80000000)
+
+ frame-setup tPUSH 14, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp
+ frame-setup CFI_INSTRUCTION def_cfa_offset 8
+ frame-setup CFI_INSTRUCTION offset $lr, -4
+ frame-setup CFI_INSTRUCTION offset $r7, -8
+ $r7 = frame-setup tMOVr $sp, 14, $noreg
+ frame-setup CFI_INSTRUCTION def_cfa_register $r7
+ tCMPi8 $r3, 0, 14, $noreg, implicit-def $cpsr
+ t2IT 0, 8, implicit-def $itstate
+ tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate
+ renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg
+ renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg
+ renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg
+ $lr = tMOVr $r3, 14, $noreg
+ t2DoLoopStart killed $r3
+
+ bb.1.for.body:
+ successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+
+ dead renamable $r3 = SPACE 4096, undef renamable $r0
+ renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep11, !tbaa !3)
+ renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7, !tbaa !3)
+ renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg
+ early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep3, !tbaa !3)
+ renamable $lr = t2LoopDec killed renamable $lr, 1
+ t2LoopEnd renamable $lr, %bb.1
+ tB %bb.2, 14, $noreg
+
+ bb.2.for.cond.cleanup:
+ tPOP_RET 14, $noreg, def $r7, def $pc
+
+...
Added: llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir?rev=364288&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir (added)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir Tue Jun 25 03:45:51 2019
@@ -0,0 +1,160 @@
+# RUN: llc -mtriple=armv8.1m.main -mattr=+lob -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
+# CHECK: for.body:
+# CHECK-NOT: t2DLS
+# CHECK-NOT: t2LEUpdate
+
+--- |
+ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv8.1m.main-unknown-unknown"
+
+ define dso_local arm_aapcscc void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr {
+ entry:
+ %cmp8 = icmp eq i32 %N, 0
+ br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
+
+ for.body.preheader: ; preds = %entry
+ br label %for.body
+
+ for.cond.cleanup: ; preds = %for.end, %entry
+ ret void
+
+ for.body: ; preds = %for.body.preheader, %for.end
+ %lsr.iv4 = phi i32* [ %b, %for.body.preheader ], [ %scevgep5, %for.end ]
+ %lsr.iv2 = phi i32* [ %c, %for.body.preheader ], [ %scevgep3, %for.end ]
+ %lsr.iv1 = phi i32* [ %a, %for.body.preheader ], [ %scevgep, %for.end ]
+ %lsr.iv = phi i32 [ %N, %for.body.preheader ], [ %lsr.iv.next, %for.end ]
+ %size = call i32 @llvm.arm.space(i32 3072, i32 undef)
+ %0 = load i32, i32* %lsr.iv4, align 4, !tbaa !3
+ %1 = load i32, i32* %lsr.iv2, align 4, !tbaa !3
+ %mul = mul nsw i32 %1, %0
+ store i32 %mul, i32* %lsr.iv1, align 4, !tbaa !3
+ %cmp = icmp ne i32 %0, 0
+ br i1 %cmp, label %middle.block, label %for.end
+
+ middle.block: ; preds = %for.body
+ %div = udiv i32 %1, %0
+ store i32 %div, i32* %lsr.iv1, align 4, !tbaa !3
+ %size.1 = call i32 @llvm.arm.space(i32 1024, i32 undef)
+ br label %for.end
+
+ for.end: ; preds = %middle.block, %for.body
+ %lsr.iv.next = add i32 %lsr.iv, -1
+ %scevgep = getelementptr i32, i32* %lsr.iv1, i32 1
+ %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 1
+ %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
+ %exitcond = icmp eq i32 %lsr.iv.next, 0
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+ }
+
+ declare i32 @llvm.arm.space(i32, i32) #1
+ attributes #1 = { nounwind }
+
+ !llvm.module.flags = !{!0, !1}
+ !llvm.ident = !{!2}
+
+ !0 = !{i32 1, !"wchar_size", i32 4}
+ !1 = !{i32 1, !"min_enum_size", i32 4}
+ !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"}
+ !3 = !{!4, !4, i64 0}
+ !4 = !{!"int", !5, i64 0}
+ !5 = !{!"omnipotent char", !6, i64 0}
+ !6 = !{!"Simple C/C++ TBAA"}
+
+...
+---
+name: size_limit
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: false
+hasWinCFI: false
+registers: []
+liveins:
+ - { reg: '$r0', virtual-reg: '' }
+ - { reg: '$r1', virtual-reg: '' }
+ - { reg: '$r2', virtual-reg: '' }
+ - { reg: '$r3', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 16
+ offsetAdjustment: -8
+ maxAlignment: 4
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants: []
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x30000000), %bb.3(0x50000000)
+
+ frame-setup tPUSH 14, $noreg, killed $r4, killed $r6, $r7, killed $lr, implicit-def $sp, implicit $sp
+ frame-setup CFI_INSTRUCTION def_cfa_offset 16
+ frame-setup CFI_INSTRUCTION offset $lr, -4
+ frame-setup CFI_INSTRUCTION offset $r7, -8
+ frame-setup CFI_INSTRUCTION offset $r6, -12
+ frame-setup CFI_INSTRUCTION offset $r4, -16
+ $r7 = frame-setup tADDrSPi $sp, 2, 14, $noreg
+ frame-setup CFI_INSTRUCTION def_cfa $r7, 8
+ tCBNZ $r3, %bb.3
+
+ bb.1.for.cond.cleanup:
+ tPOP_RET 14, $noreg, def $r4, def $r6, def $r7, def $pc
+
+ bb.2.for.end:
+ successors: %bb.1(0x04000000), %bb.3(0x7c000000)
+
+ renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 4, 14, $noreg
+ renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 4, 14, $noreg
+ renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 4, 14, $noreg
+ renamable $r3, $cpsr = tSUBi8 killed renamable $r3, 1, 14, $noreg
+ tBcc %bb.1, 0, killed $cpsr
+
+ bb.3.for.body:
+ successors: %bb.4(0x50000000), %bb.2(0x30000000)
+
+ dead renamable $r12 = SPACE 3072, undef renamable $r0
+ renamable $r12 = t2LDRi12 renamable $r1, 0, 14, $noreg :: (load 4 from %ir.lsr.iv4, !tbaa !3)
+ renamable $lr = t2LDRi12 renamable $r2, 0, 14, $noreg :: (load 4 from %ir.lsr.iv2, !tbaa !3)
+ t2CMPri renamable $r12, 0, 14, $noreg, implicit-def $cpsr
+ renamable $r4 = nsw t2MUL renamable $lr, renamable $r12, 14, $noreg
+ tSTRi killed renamable $r4, renamable $r0, 0, 14, $noreg :: (store 4 into %ir.lsr.iv1, !tbaa !3)
+ t2Bcc %bb.2, 0, killed $cpsr
+
+ bb.4.middle.block:
+ successors: %bb.2(0x80000000)
+
+ renamable $r4 = t2UDIV killed renamable $lr, killed renamable $r12, 14, $noreg
+ tSTRi killed renamable $r4, renamable $r0, 0, 14, $noreg :: (store 4 into %ir.lsr.iv1, !tbaa !3)
+ dead renamable $r4 = SPACE 1024, undef renamable $r0
+ t2B %bb.2, 14, $noreg
+
+...
Added: llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir?rev=364288&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir (added)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir Tue Jun 25 03:45:51 2019
@@ -0,0 +1,141 @@
+# RUN: llc -mtriple=thumbv8.1m.main %s -o - | FileCheck %s
+
+# CHECK: .LBB0_2:
+# CHECK: sub.w lr, lr, #1
+# CHECK: mov [[TMP:r[0-9]+]], lr
+# CHECK: bl bar
+# CHECK: mov lr, [[TMP]]
+# CHECK: cmp.w lr, #0
+# CHECK: bne{{.*}} .LBB0_2
+
+--- |
+ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv8.1m.main-arm-none-eabi"
+
+ define i32 @skip_call(i32 %n) #0 {
+ entry:
+ %cmp6 = icmp eq i32 %n, 0
+ br i1 %cmp6, label %while.end, label %while.body.preheader
+
+ while.body.preheader: ; preds = %entry
+ call void @llvm.set.loop.iterations.i32(i32 %n)
+ br label %while.body
+
+ while.body: ; preds = %while.body, %while.body.preheader
+ %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
+ %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
+ %call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)()
+ %add = add nsw i32 %call, %res.07
+ %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+ %2 = icmp ne i32 %1, 0
+ br i1 %2, label %while.body, label %while.end
+
+ while.end: ; preds = %while.body, %entry
+ %res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ ret i32 %res.0.lcssa
+ }
+
+ declare i32 @bar(...) local_unnamed_addr #0
+ declare void @llvm.set.loop.iterations.i32(i32) #1
+ declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
+ declare void @llvm.stackprotector(i8*, i8**) #2
+
+ attributes #0 = { "target-features"="+mve.fp" }
+ attributes #1 = { noduplicate nounwind }
+ attributes #2 = { nounwind }
+
+...
+---
+name: skip_call
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+registers: []
+liveins:
+ - { reg: '$r0', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 16
+ offsetAdjustment: 0
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants: []
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x30000000), %bb.3(0x50000000)
+ liveins: $r0, $r4, $r5, $r7, $lr
+
+ $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr
+ frame-setup CFI_INSTRUCTION def_cfa_offset 16
+ frame-setup CFI_INSTRUCTION offset $lr, -4
+ frame-setup CFI_INSTRUCTION offset $r7, -8
+ frame-setup CFI_INSTRUCTION offset $r5, -12
+ frame-setup CFI_INSTRUCTION offset $r4, -16
+ t2CMPri $r0, 0, 14, $noreg, implicit-def $cpsr
+ t2Bcc %bb.1, 0, killed $cpsr
+
+ bb.3.while.body.preheader:
+ successors: %bb.4(0x80000000)
+ liveins: $r0
+
+ $lr = tMOVr $r0, 14, $noreg
+ renamable $r4 = t2MOVi 0, 14, $noreg, $noreg
+ t2DoLoopStart killed $r0
+
+ bb.4.while.body:
+ successors: %bb.4(0x7c000000), %bb.2(0x04000000)
+ liveins: $lr, $r4
+
+ renamable $lr = t2LoopDec killed renamable $lr, 1
+ $r5 = tMOVr killed $lr, 14, $noreg
+ tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0
+ $lr = tMOVr killed $r5, 14, $noreg
+ renamable $r4 = nsw t2ADDrr killed renamable $r0, killed renamable $r4, 14, $noreg, $noreg
+ t2LoopEnd renamable $lr, %bb.4
+ t2B %bb.2, 14, $noreg
+
+ bb.2.while.end:
+ liveins: $r4
+
+ $r0 = tMOVr killed $r4, 14, $noreg
+ $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
+
+ bb.1:
+ renamable $r4 = t2MOVi 0, 14, $noreg, $noreg
+ $r0 = tMOVr killed $r4, 14, $noreg
+ $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
+
+...
Added: llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir?rev=364288&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir (added)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir Tue Jun 25 03:45:51 2019
@@ -0,0 +1,139 @@
+# RUN: llc -mtriple=thumbv8.1m.main %s -o - | FileCheck %s
+
+# CHECK: .LBB0_2:
+# CHECK: sub.w lr, lr, #1
+# CHECK: str.w lr, [sp, #12]
+# CHECK: ldr.w lr, [sp, #12]
+# CHECK: cmp.w lr, #0
+# CHECK: bne{{.*}} .LBB0_2
+
+--- |
+ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv8.1m.main-arm-none-eabi"
+
+ define i32 @skip_spill(i32 %n) #0 {
+ entry:
+ %cmp6 = icmp eq i32 %n, 0
+ br i1 %cmp6, label %while.end, label %while.body.preheader
+
+ while.body.preheader: ; preds = %entry
+ call void @llvm.set.loop.iterations.i32(i32 %n)
+ br label %while.body
+
+ while.body: ; preds = %while.body, %while.body.preheader
+ %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
+ %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
+ %call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)()
+ %add = add nsw i32 %call, %res.07
+ %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+ %2 = icmp ne i32 %1, 0
+ br i1 %2, label %while.body, label %while.end
+
+ while.end: ; preds = %while.body, %entry
+ %res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
+ ret i32 %res.0.lcssa
+ }
+
+ declare i32 @bar(...) local_unnamed_addr #0
+ declare void @llvm.set.loop.iterations.i32(i32) #1
+ declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
+ declare void @llvm.stackprotector(i8*, i8**) #2
+
+ attributes #0 = { "target-features"="+mve.fp" }
+ attributes #1 = { noduplicate nounwind }
+ attributes #2 = { nounwind }
+
+...
+---
+name: skip_spill
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+registers: []
+liveins:
+ - { reg: '$r0', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 16
+ offsetAdjustment: 0
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants: []
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x30000000), %bb.3(0x50000000)
+ liveins: $r0, $r4, $r5, $r7, $lr
+
+ $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr
+ frame-setup CFI_INSTRUCTION def_cfa_offset 16
+ frame-setup CFI_INSTRUCTION offset $lr, -4
+ frame-setup CFI_INSTRUCTION offset $r7, -8
+ frame-setup CFI_INSTRUCTION offset $r5, -12
+ frame-setup CFI_INSTRUCTION offset $r4, -16
+ t2CMPri $r0, 0, 14, $noreg, implicit-def $cpsr
+ t2Bcc %bb.1, 0, killed $cpsr
+
+ bb.3.while.body.preheader:
+ successors: %bb.4(0x80000000)
+ liveins: $r0
+
+ $lr = tMOVr $r0, 14, $noreg
+ renamable $r4 = t2MOVi 0, 14, $noreg, $noreg
+ t2DoLoopStart killed $r0
+
+ bb.4.while.body:
+ successors: %bb.4(0x7c000000), %bb.2(0x04000000)
+ liveins: $lr, $r4
+
+ renamable $lr = t2LoopDec killed renamable $lr, 1
+ t2STRi12 $lr, %stack.0, 0, 14, $noreg :: (store 4)
+ $lr = t2LDRi12 %stack.0, 0, 14, $noreg :: (load 4)
+ renamable $r4 = nsw t2ADDrr renamable $lr, killed renamable $r4, 14, $noreg, $noreg
+ t2LoopEnd renamable $lr, %bb.4
+ t2B %bb.2, 14, $noreg
+
+ bb.2.while.end:
+ liveins: $r4
+
+ $r0 = tMOVr killed $r4, 14, $noreg
+ $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
+
+ bb.1:
+ renamable $r4 = t2MOVi 0, 14, $noreg, $noreg
+ $r0 = tMOVr killed $r4, 14, $noreg
+ $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
+
+...
Modified: llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll Tue Jun 25 03:45:51 2019
@@ -1,6 +1,7 @@
; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -hardware-loops -disable-arm-loloops=false %s -S -o - | FileCheck %s
; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED
; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=-lob -hardware-loops %s -S -o - | FileCheck %s --check-prefix=DISABLED
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -disable-arm-loloops=false %s -o - | FileCheck %s --check-prefix=CHECK-LLC
; DISABLED-NOT: llvm.set.loop.iterations
; DISABLED-NOT: llvm.loop.decrement
@@ -15,6 +16,15 @@
; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1)
; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
; CHECK: br i1 [[CMP]], label %while.body, label %while.end
+
+; CHECK-LLC-LABEL:do_copy:
+; CHECK-LLC-NOT: mov lr, r0
+; CHECK-LLC: dls lr, r0
+; CHECK-LLC-NOT: mov lr, r0
+; CHECK-LLC: [[LOOP_HEADER:\.LBB[0-9_]+]]:
+; CHECK-LLC: le lr, [[LOOP_HEADER]]
+; CHECK-LLC-NOT: b [[LOOP_EXIT:\.LBB[0-9._]+]]
+; CHECK-LLC: @ %while.end
define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
entry:
br label %while.body
@@ -45,6 +55,14 @@ while.end:
; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
; CHECK: br i1 [[CMP]], label %while.body, label %while.end.loopexit
+; CHECK-LLC-LABEL:do_inc1:
+; CHECK-LLC: dls lr,
+; CHECK-LLC-NOT: mov lr,
+; CHECK-LLC: [[LOOP_HEADER:\.LBB[0-9_]+]]:
+; CHECK-LLC: le lr, [[LOOP_HEADER]]
+; CHECK-LLC-NOT: b [[LOOP_EXIT:\.LBB[0-9_]+]]
+; CHECK-LLC: [[LOOP_EXIT:\.LBB[0-9_]+]]:
+
define i32 @do_inc1(i32 %n) {
entry:
%cmp7 = icmp eq i32 %n, 0
@@ -84,6 +102,16 @@ while.end:
; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1)
; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
; CHECK: br i1 [[CMP]], label %while.body, label %while.end.loopexit
+
+; CHECK-LLC: do_inc2:
+; CHECK-LLC-NOT: mov lr,
+; CHECK-LLC: dls lr,
+; CHECK-LLC-NOT: mov lr,
+; CHECK-LLC: [[LOOP_HEADER:\.LBB[0-9._]+]]:
+; CHECK-LLC: le lr, [[LOOP_HEADER]]
+; CHECK-LLC-NOT: b [[LOOP_EXIT:\.LBB[0-9._]+]]
+; CHECK-LLC: [[LOOP_EXIT:\.LBB[0-9_]+]]:
+
define i32 @do_inc2(i32 %n) {
entry:
%cmp7 = icmp sgt i32 %n, 0
@@ -127,6 +155,15 @@ while.end:
; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1)
; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
; CHECK: br i1 [[CMP]], label %while.body, label %while.end.loopexit
+
+; CHECK-LLC: do_dec2
+; CHECK-LLC-NOT: mov lr,
+; CHECK-LLC: dls lr,
+; CHECK-LLC-NOT: mov lr,
+; CHECK-LLC: [[LOOP_HEADER:\.LBB[0-9_]+]]:
+; CHECK-LLC: le lr, [[LOOP_HEADER]]
+; CHECK-LLC-NOT: b .
+; CHECK-LLC: @ %while.end
define i32 @do_dec2(i32 %n) {
entry:
%cmp6 = icmp sgt i32 %n, 0
Added: llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir?rev=364288&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir (added)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir Tue Jun 25 03:45:51 2019
@@ -0,0 +1,155 @@
+# RUN: llc -mtriple=armv8.1m.main -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
+# CHECK: entry:
+# CHECK: $lr = t2DLS
+# CHECK: for.body:
+# CHECK: $lr = t2LEUpdate renamable $lr
+
+--- |
+ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv8.1m.main-unknown-unknown"
+
+ ; Function Attrs: norecurse nounwind
+ define dso_local arm_aapcscc void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr #0 {
+ entry:
+ %cmp8 = icmp eq i32 %N, 0
+ br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
+
+ for.body.preheader: ; preds = %entry
+ %scevgep = getelementptr i32, i32* %a, i32 -1
+ %scevgep4 = getelementptr i32, i32* %c, i32 -1
+ %scevgep8 = getelementptr i32, i32* %b, i32 -1
+ call void @llvm.set.loop.iterations.i32(i32 %N)
+ br label %for.body
+
+ for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+ for.body: ; preds = %for.body, %for.body.preheader
+ %lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
+ %lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
+ %lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
+ %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ]
+ %size = call i32 @llvm.arm.space(i32 4072, i32 undef)
+ %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1
+ %1 = load i32, i32* %scevgep11, align 4, !tbaa !3
+ %scevgep7 = getelementptr i32, i32* %lsr.iv5, i32 1
+ %2 = load i32, i32* %scevgep7, align 4, !tbaa !3
+ %mul = mul nsw i32 %2, %1
+ %scevgep3 = getelementptr i32, i32* %lsr.iv1, i32 1
+ store i32 %mul, i32* %scevgep3, align 4, !tbaa !3
+ %scevgep2 = getelementptr i32, i32* %lsr.iv1, i32 1
+ %scevgep6 = getelementptr i32, i32* %lsr.iv5, i32 1
+ %scevgep10 = getelementptr i32, i32* %lsr.iv9, i32 1
+ %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+ %4 = icmp ne i32 %3, 0
+ br i1 %4, label %for.body, label %for.cond.cleanup
+ }
+
+ ; Function Attrs: nounwind
+ declare i32 @llvm.arm.space(i32, i32) #1
+
+ ; Function Attrs: noduplicate nounwind
+ declare void @llvm.set.loop.iterations.i32(i32) #2
+
+ ; Function Attrs: noduplicate nounwind
+ declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
+
+ ; Function Attrs: nounwind
+ declare void @llvm.stackprotector(i8*, i8**) #1
+
+ attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" }
+ attributes #1 = { nounwind }
+ attributes #2 = { noduplicate nounwind }
+
+ !llvm.module.flags = !{!0, !1}
+ !llvm.ident = !{!2}
+
+ !0 = !{i32 1, !"wchar_size", i32 4}
+ !1 = !{i32 1, !"min_enum_size", i32 4}
+ !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"}
+ !3 = !{!4, !4, i64 0}
+ !4 = !{!"int", !5, i64 0}
+ !5 = !{!"omnipotent char", !6, i64 0}
+ !6 = !{!"Simple C/C++ TBAA"}
+
+...
+---
+name: size_limit
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: false
+hasWinCFI: false
+registers: []
+liveins:
+ - { reg: '$r0', virtual-reg: '' }
+ - { reg: '$r1', virtual-reg: '' }
+ - { reg: '$r2', virtual-reg: '' }
+ - { reg: '$r3', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 8
+ offsetAdjustment: 0
+ maxAlignment: 4
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants: []
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x80000000)
+
+ frame-setup tPUSH 14, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp
+ frame-setup CFI_INSTRUCTION def_cfa_offset 8
+ frame-setup CFI_INSTRUCTION offset $lr, -4
+ frame-setup CFI_INSTRUCTION offset $r7, -8
+ $r7 = frame-setup tMOVr $sp, 14, $noreg
+ frame-setup CFI_INSTRUCTION def_cfa_register $r7
+ tCMPi8 $r3, 0, 14, $noreg, implicit-def $cpsr
+ t2IT 0, 8, implicit-def $itstate
+ tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate
+ renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg
+ renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg
+ renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg
+ $lr = tMOVr $r3, 14, $noreg
+ t2DoLoopStart killed $r3
+
+ bb.1.for.body:
+ successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+
+ dead renamable $r3 = SPACE 4072, undef renamable $r0
+ renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep11, !tbaa !3)
+ renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7, !tbaa !3)
+ renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg
+ early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep3, !tbaa !3)
+ renamable $lr = t2LoopDec killed renamable $lr, 1
+ t2LoopEnd renamable $lr, %bb.1
+ tB %bb.2, 14, $noreg
+
+ bb.2.for.cond.cleanup:
+ tPOP_RET 14, $noreg, def $r7, def $pc
+
+...
Modified: llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll Tue Jun 25 03:45:51 2019
@@ -1,4 +1,6 @@
; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -hardware-loops -disable-arm-loloops=false %s -S -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -disable-arm-loloops=false %s -o - | FileCheck %s --check-prefix=CHECK-LLC
+; RUN: opt -mtriple=thumbv8.1m.main -loop-unroll -unroll-remainder=false -S < %s | llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false | FileCheck %s --check-prefix=CHECK-UNROLL
; CHECK-LABEL: early_exit
; CHECK-NOT: llvm.set.loop.iterations
@@ -43,6 +45,16 @@ do.end:
; CHECK-NOT: [[LOOP_DEC1:%[^ ]+]] = call i1 @llvm.loop.decrement.i32(i32 1)
; CHECK-NOT: br i1 [[LOOP_DEC1]], label %while.cond1.preheader.us, label %while.end7
+
+; CHECK-LLC: nested:
+; CHECK-LLC-NOT: mov lr, r1
+; CHECK-LLC: dls lr, r1
+; CHECK-LLC-NOT: mov lr, r1
+; CHECK-LLC: [[LOOP_HEADER:\.LBB[0-9._]+]]:
+; CHECK-LLC: le lr, [[LOOP_HEADER]]
+; CHECK-LLC-NOT: b [[LOOP_EXIT:\.LBB[0-9._]+]]
+; CHECK-LLC: [[LOOP_EXIT:\.LBB[0-9._]+]]:
+
define void @nested(i32* nocapture %A, i32 %N) {
entry:
%cmp20 = icmp eq i32 %N, 0
@@ -210,6 +222,171 @@ exit:
ret void
}
+; CHECK-LABEL: search
+; CHECK: for.body.preheader:
+; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N)
+; CHECK: br label %for.body
+; CHECK: for.body:
+; CHECK: for.inc:
+; CHECK: [[LOOP_DEC:%[^ ]+]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32
+; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
+; CHECK: br i1 [[CMP]], label %for.body, label %for.cond.cleanup
+define i32 @search(i8* nocapture readonly %c, i32 %N) {
+entry:
+ %cmp11 = icmp eq i32 %N, 0
+ br i1 %cmp11, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ %found.0.lcssa = phi i32 [ 0, %entry ], [ %found.1, %for.inc ]
+ %spaces.0.lcssa = phi i32 [ 0, %entry ], [ %spaces.1, %for.inc ]
+ %sub = sub nsw i32 %found.0.lcssa, %spaces.0.lcssa
+ ret i32 %sub
+
+for.body:
+ %i.014 = phi i32 [ %inc3, %for.inc ], [ 0, %entry ]
+ %spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %entry ]
+ %found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i8, i8* %c, i32 %i.014
+ %0 = load i8, i8* %arrayidx, align 1
+ switch i8 %0, label %for.inc [
+ i8 108, label %sw.bb
+ i8 111, label %sw.bb
+ i8 112, label %sw.bb
+ i8 32, label %sw.bb1
+ ]
+
+sw.bb: ; preds = %for.body, %for.body, %for.body
+ %inc = add nsw i32 %found.012, 1
+ br label %for.inc
+
+sw.bb1: ; preds = %for.body
+ %inc2 = add nsw i32 %spaces.013, 1
+ br label %for.inc
+
+for.inc: ; preds = %sw.bb, %sw.bb1, %for.body
+ %found.1 = phi i32 [ %found.012, %for.body ], [ %found.012, %sw.bb1 ], [ %inc, %sw.bb ]
+ %spaces.1 = phi i32 [ %spaces.013, %for.body ], [ %inc2, %sw.bb1 ], [ %spaces.013, %sw.bb ]
+ %inc3 = add nuw i32 %i.014, 1
+ %exitcond = icmp eq i32 %inc3, %N
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: unroll_inc_int
+; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N)
+; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(
+
+; TODO: We should be able to support the unrolled loop body.
+; CHECK-UNROLL-LABEL: unroll_inc_int:
+; CHECK-UNROLL: [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader
+; CHECK-UNROLL-NOT: dls
+; CHECK-UNROLL: [[LOOP:.LBB[0-9_]+]]: @ %for.body
+; CHECK-UNROLL-NOT: le lr, [[LOOP]]
+; CHECK-UNROLL: bne [[LOOP]]
+; CHECK-UNROLL: %for.body.epil.preheader
+; CHECK-UNROLL: dls
+; CHECK-UNROLL: %for.body.epil
+; CHECK-UNROLL: le
+
+define void @unroll_inc_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
+entry:
+ %cmp8 = icmp sgt i32 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
+ %1 = load i32, i32* %arrayidx1, align 4
+ %mul = mul nsw i32 %1, %0
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
+ store i32 %mul, i32* %arrayidx2, align 4
+ %inc = add nuw nsw i32 %i.09, 1
+ %exitcond = icmp eq i32 %inc, %N
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: unroll_inc_unsigned
+; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N)
+; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(
+
+; CHECK-LLC-LABEL: unroll_inc_unsigned:
+; CHECK-LLC: dls lr, [[COUNT:r[0-9]+]]
+; CHECK-LLC: le lr
+
+; TODO: We should be able to support the unrolled loop body.
+; CHECK-UNROLL-LABEL: unroll_inc_unsigned:
+; CHECK-UNROLL: [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader
+; CHECK-UNROLL-NOT: dls
+; CHECK-UNROLL: [[LOOP:.LBB[0-9_]+]]: @ %for.body
+; CHECK-UNROLL-NOT: le lr, [[LOOP]]
+; CHECK-UNROLL: bne [[LOOP]]
+; CHECK-UNROLL: %for.body.epil.preheader
+; CHECK-UNROLL: dls
+; CHECK-UNROLL: %for.body.epil
+; CHECK-UNROLL: le
+define void @unroll_inc_unsigned(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
+entry:
+ %cmp8 = icmp eq i32 %N, 0
+ br i1 %cmp8, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
+ %1 = load i32, i32* %arrayidx1, align 4
+ %mul = mul nsw i32 %1, %0
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
+ store i32 %mul, i32* %arrayidx2, align 4
+ %inc = add nuw i32 %i.09, 1
+ %exitcond = icmp eq i32 %inc, %N
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: unroll_dec_int
+; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N)
+; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(
+
+; TODO: An unnecessary register is being held to hold COUNT, lr should just
+; be used instead.
+; CHECK-LLC-LABEL: unroll_dec_int:
+; CHECK-LLC: dls lr, [[COUNT:r[0-9]+]]
+; CHECK-LLC: subs [[COUNT]], #1
+; CHECK-LLC: le lr
+
+; CHECK-UNROLL-LABEL: unroll_dec_int
+; CHECK-UNROLL: dls lr
+; CHECK-UNROLL: le lr
+; CHECK-UNROLL: dls lr
+; CHECK-UNROLL: le lr
+define void @unroll_dec_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
+entry:
+ %cmp8 = icmp sgt i32 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %i.09 = phi i32 [ %dec, %for.body ], [ %N, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
+ %1 = load i32, i32* %arrayidx1, align 4
+ %mul = mul nsw i32 %1, %0
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
+ store i32 %mul, i32* %arrayidx2, align 4
+ %dec = add nsw i32 %i.09, -1
+ %cmp = icmp sgt i32 %dec, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
Added: llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir?rev=364288&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir (added)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir Tue Jun 25 03:45:51 2019
@@ -0,0 +1,198 @@
+# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops -o -
+# CHECK: bb.1.for.body.preheader:
+# CHECK: $lr = t2DLS
+# CHECK-NOT: t2LoopDec
+# CHECK: bb.6.for.inc:
+# CHECK: $lr = t2LEUpdate renamable $lr, %bb.2
+
+--- |
+ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv8.1m.main-unknown-unknown"
+
+ ; Function Attrs: norecurse nounwind readonly
+ define dso_local arm_aapcscc i32 @search(i8* nocapture readonly %c, i32 %N) local_unnamed_addr #0 {
+ entry:
+ %cmp11 = icmp eq i32 %N, 0
+ br i1 %cmp11, label %for.cond.cleanup, label %for.body.preheader
+
+ for.body.preheader:
+ call void @llvm.set.loop.iterations.i32(i32 %N)
+ br label %for.body
+
+ for.cond.cleanup:
+ %found.0.lcssa = phi i32 [ 0, %entry ], [ %found.1, %for.inc ]
+ %spaces.0.lcssa = phi i32 [ 0, %entry ], [ %spaces.1, %for.inc ]
+ %sub = sub nsw i32 %found.0.lcssa, %spaces.0.lcssa
+ ret i32 %sub
+
+ for.body:
+ %lsr.iv1 = phi i8* [ %c, %for.body.preheader ], [ %scevgep, %for.inc ]
+ %spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %for.body.preheader ]
+ %found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %for.body.preheader ]
+ %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.inc ]
+ %1 = load i8, i8* %lsr.iv1, align 1
+ %2 = zext i8 %1 to i32
+ switch i32 %2, label %for.inc [
+ i32 108, label %sw.bb
+ i32 111, label %sw.bb
+ i32 112, label %sw.bb
+ i32 32, label %sw.bb1
+ ]
+
+ sw.bb:
+ %inc = add nsw i32 %found.012, 1
+ br label %for.inc
+
+ sw.bb1:
+ %inc2 = add nsw i32 %spaces.013, 1
+ br label %for.inc
+
+ for.inc:
+ %found.1 = phi i32 [ %found.012, %for.body ], [ %found.012, %sw.bb1 ], [ %inc, %sw.bb ]
+ %spaces.1 = phi i32 [ %spaces.013, %for.body ], [ %inc2, %sw.bb1 ], [ %spaces.013, %sw.bb ]
+ %scevgep = getelementptr i8, i8* %lsr.iv1, i32 1
+ %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+ %4 = icmp ne i32 %3, 0
+ br i1 %4, label %for.body, label %for.cond.cleanup
+ }
+
+ declare void @llvm.set.loop.iterations.i32(i32) #1
+ declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
+ declare void @llvm.stackprotector(i8*, i8**) #2
+
+ attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" }
+ attributes #1 = { noduplicate nounwind }
+ attributes #2 = { nounwind }
+
+...
+---
+name: search
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+registers: []
+liveins:
+ - { reg: '$r0', virtual-reg: '' }
+ - { reg: '$r1', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 16
+ offsetAdjustment: -8
+ maxAlignment: 4
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants: []
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x30000000), %bb.3(0x50000000)
+ liveins: $r0, $r1, $r4, $r6, $lr
+
+ $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r4, killed $r6, $r7, killed $lr
+ frame-setup CFI_INSTRUCTION def_cfa_offset 16
+ frame-setup CFI_INSTRUCTION offset $lr, -4
+ frame-setup CFI_INSTRUCTION offset $r7, -8
+ frame-setup CFI_INSTRUCTION offset $r6, -12
+ frame-setup CFI_INSTRUCTION offset $r4, -16
+ $r7 = frame-setup t2ADDri $sp, 8, 14, $noreg, $noreg
+ frame-setup CFI_INSTRUCTION def_cfa $r7, 8
+ t2CMPri $r1, 0, 14, $noreg, implicit-def $cpsr
+ t2Bcc %bb.1, 0, killed $cpsr
+
+ bb.3.for.body.preheader:
+ successors: %bb.4(0x80000000)
+ liveins: $r0, $r1
+
+ $lr = tMOVr $r1, 14, $noreg
+ t2DoLoopStart killed $r1
+ renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
+ renamable $r12 = t2MOVi 1, 14, $noreg, $noreg
+ renamable $r2 = t2MOVi 0, 14, $noreg, $noreg
+
+ bb.4.for.body:
+ successors: %bb.5(0x26666665), %bb.6(0x5999999b)
+ liveins: $lr, $r0, $r1, $r2, $r12
+
+ renamable $r3 = t2LDRBi12 renamable $r0, 0, 14, $noreg :: (load 1 from %ir.lsr.iv1)
+ renamable $r4 = t2SUBri renamable $r3, 108, 14, $noreg, $noreg
+ renamable $lr = t2LoopDec killed renamable $lr, 1
+ t2CMPri renamable $r4, 4, 14, $noreg, implicit-def $cpsr
+ t2Bcc %bb.5, 8, killed $cpsr
+
+ bb.6.for.body:
+ successors: %bb.7(0x6db6db6e), %bb.5(0x12492492)
+ liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r12
+
+ renamable $r4 = t2LSLrr renamable $r12, killed renamable $r4, 14, $noreg, $noreg
+ t2TSTri killed renamable $r4, 25, 14, $noreg, implicit-def $cpsr
+ t2Bcc %bb.5, 0, killed $cpsr
+
+ bb.7.sw.bb:
+ successors: %bb.8(0x80000000)
+ liveins: $lr, $r0, $r1, $r2, $r12
+
+ renamable $r2 = nsw t2ADDri killed renamable $r2, 1, 14, $noreg, $noreg
+ t2B %bb.8, 14, $noreg
+
+ bb.5.for.body:
+ successors: %bb.8(0x80000000)
+ liveins: $lr, $r0, $r1, $r2, $r3, $r12
+
+ t2CMPri killed renamable $r3, 32, 14, $noreg, implicit-def $cpsr
+ BUNDLE implicit-def dead $itstate, implicit-def $r1, implicit killed $r1, implicit killed $cpsr {
+ t2IT 0, 8, implicit-def $itstate
+ renamable $r1 = nsw t2ADDri killed renamable $r1, 1, 0, killed $cpsr, $noreg, implicit $r1, implicit internal killed $itstate
+ }
+
+ bb.8.for.inc:
+ successors: %bb.4(0x7c000000), %bb.2(0x04000000)
+ liveins: $lr, $r0, $r1, $r2, $r12
+
+ renamable $r0 = t2ADDri killed renamable $r0, 1, 14, $noreg, $noreg
+ t2LoopEnd renamable $lr, %bb.4
+ t2B %bb.2, 14, $noreg
+
+ bb.2.for.cond.cleanup:
+ liveins: $r1, $r2
+
+ renamable $r0 = nsw t2SUBrr killed renamable $r2, killed renamable $r1, 14, $noreg, $noreg
+ $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r6, def $r7, def $pc, implicit killed $r0
+
+ bb.1:
+ renamable $r2 = t2MOVi 0, 14, $noreg, $noreg
+ renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
+ renamable $r0 = nsw t2SUBrr killed renamable $r2, killed renamable $r1, 14, $noreg, $noreg
+ $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r6, def $r7, def $pc, implicit killed $r0
+
+...
More information about the llvm-commits
mailing list