[llvm] r237266 - [Hexagon] Generate loop1 instruction for nested loops
Brendon Cahoon
bcahoon at codeaurora.org
Wed May 13 10:56:04 PDT 2015
Author: bcahoon
Date: Wed May 13 12:56:03 2015
New Revision: 237266
URL: http://llvm.org/viewvc/llvm-project?rev=237266&view=rev
Log:
[Hexagon] Generate loop1 instruction for nested loops
loop1 is for the outer loop and loop0 is for the inner loop.
Differential Revision: http://reviews.llvm.org/D9680
Added:
llvm/trunk/test/CodeGen/Hexagon/hwloop-loop1.ll
Modified:
llvm/trunk/lib/Target/Hexagon/HexagonHardwareLoops.cpp
Modified: llvm/trunk/lib/Target/Hexagon/HexagonHardwareLoops.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonHardwareLoops.cpp?rev=237266&r1=237265&r2=237266&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonHardwareLoops.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonHardwareLoops.cpp Wed May 13 12:56:03 2015
@@ -159,7 +159,7 @@ namespace {
MachineOperand *InitialValue,
const MachineOperand *Endvalue,
int64_t IVBump) const;
-
+
/// \brief Analyze the statements in a loop to determine if the loop
/// has a computable trip count and, if so, return a value that represents
/// the trip count expression.
@@ -179,15 +179,16 @@ namespace {
/// \brief Return true if the instruction is not valid within a hardware
/// loop.
- bool isInvalidLoopOperation(const MachineInstr *MI) const;
+ bool isInvalidLoopOperation(const MachineInstr *MI,
+ bool IsInnerHWLoop) const;
/// \brief Return true if the loop contains an instruction that inhibits
/// using the hardware loop.
- bool containsInvalidInstruction(MachineLoop *L) const;
+ bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
/// \brief Given a loop, check if we can convert it to a hardware loop.
/// If so, then perform the conversion and return true.
- bool convertToHardwareLoop(MachineLoop *L);
+ bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
/// \brief Return true if the instruction is now dead.
bool isDead(const MachineInstr *MI,
@@ -307,18 +308,10 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopIn
INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
"Hexagon Hardware Loops", false, false)
-
-/// \brief Returns true if the instruction is a hardware loop instruction.
-static bool isHardwareLoop(const MachineInstr *MI) {
- return MI->getOpcode() == Hexagon::J2_loop0r ||
- MI->getOpcode() == Hexagon::J2_loop0i;
-}
-
FunctionPass *llvm::createHexagonHardwareLoops() {
return new HexagonHardwareLoops();
}
-
bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
@@ -329,12 +322,12 @@ bool HexagonHardwareLoops::runOnMachineF
MDT = &getAnalysis<MachineDominatorTree>();
TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
- for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
- I != E; ++I) {
- MachineLoop *L = *I;
- if (!L->getParentLoop())
- Changed |= convertToHardwareLoop(L);
- }
+ for (auto &L : *MLI)
+ if (!L->getParentLoop()) {
+ bool L0Used = false;
+ bool L1Used = false;
+ Changed |= convertToHardwareLoop(L, L0Used, L1Used);
+ }
return Changed;
}
@@ -467,27 +460,27 @@ HexagonHardwareLoops::getComparisonKind(
case Hexagon::C2_cmpeqi:
case Hexagon::C2_cmpeq:
case Hexagon::C2_cmpeqp:
- Cmp = Comparison::Kind::EQ;
+ Cmp = Comparison::EQ;
break;
case Hexagon::C4_cmpneq:
case Hexagon::C4_cmpneqi:
- Cmp = Comparison::Kind::NE;
+ Cmp = Comparison::NE;
break;
case Hexagon::C4_cmplte:
- Cmp = Comparison::Kind::LEs;
+ Cmp = Comparison::LEs;
break;
case Hexagon::C4_cmplteu:
- Cmp = Comparison::Kind::LEu;
+ Cmp = Comparison::LEu;
break;
case Hexagon::C2_cmpgtui:
case Hexagon::C2_cmpgtu:
case Hexagon::C2_cmpgtup:
- Cmp = Comparison::Kind::GTu;
+ Cmp = Comparison::GTu;
break;
case Hexagon::C2_cmpgti:
case Hexagon::C2_cmpgt:
case Hexagon::C2_cmpgtp:
- Cmp = Comparison::Kind::GTs;
+ Cmp = Comparison::GTs;
break;
default:
return (Comparison::Kind)0;
@@ -749,7 +742,7 @@ CountValue *HexagonHardwareLoops::comput
MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
DebugLoc DL;
if (InsertPos != PH->end())
- InsertPos->getDebugLoc();
+ DL = InsertPos->getDebugLoc();
// If Start is an immediate and End is a register, the trip count
// will be "reg - imm". Hexagon's "subtract immediate" instruction
@@ -828,7 +821,7 @@ CountValue *HexagonHardwareLoops::comput
const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
(RegToImm ? TII->get(Hexagon::A2_subri) :
TII->get(Hexagon::A2_addi));
- if (RegToReg || RegToImm) {
+ if (RegToReg || RegToImm) {
unsigned SubR = MRI->createVirtualRegister(IntRC);
MachineInstrBuilder SubIB =
BuildMI(*PH, InsertPos, DL, SubD, SubR);
@@ -902,51 +895,50 @@ CountValue *HexagonHardwareLoops::comput
return new CountValue(CountValue::CV_Register, CountR, CountSR);
}
-
/// \brief Return true if the operation is invalid within hardware loop.
-bool HexagonHardwareLoops::isInvalidLoopOperation(
- const MachineInstr *MI) const {
+bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
+ bool IsInnerHWLoop) const {
// Call is not allowed because the callee may use a hardware loop except for
// the case when the call never returns.
if (MI->getDesc().isCall() && MI->getOpcode() != Hexagon::CALLv3nr)
return true;
- // do not allow nested hardware loops
- if (isHardwareLoop(MI))
- return true;
-
- // check if the instruction defines a hardware loop register
+ // Check if the instruction defines a hardware loop register.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
unsigned R = MO.getReg();
- if (R == Hexagon::LC0 || R == Hexagon::LC1 ||
- R == Hexagon::SA0 || R == Hexagon::SA1)
+ if (IsInnerHWLoop && (R == Hexagon::LC0 || R == Hexagon::SA0 ||
+ R == Hexagon::LC1 || R == Hexagon::SA1))
+ return true;
+ if (!IsInnerHWLoop && (R == Hexagon::LC1 || R == Hexagon::SA1))
return true;
}
return false;
}
-
-/// \brief - Return true if the loop contains an instruction that inhibits
-/// the use of the hardware loop function.
-bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
+/// \brief Return true if the loop contains an instruction that inhibits
+/// the use of the hardware loop instruction.
+bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
+ bool IsInnerHWLoop) const {
const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
+ DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
MachineBasicBlock *MBB = Blocks[i];
for (MachineBasicBlock::iterator
MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
const MachineInstr *MI = &*MII;
- if (isInvalidLoopOperation(MI))
+ if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
+ DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump(););
return true;
+ }
}
}
return false;
}
-
/// \brief Returns true if the instruction is dead. This was essentially
/// copied from DeadMachineInstructionElim::isDead, but with special cases
/// for inline asm, physical registers and instructions with side effects
@@ -1041,19 +1033,47 @@ void HexagonHardwareLoops::removeIfDead(
///
/// The code makes several assumptions about the representation of the loop
/// in llvm.
-bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
+bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
+ bool &RecL0used,
+ bool &RecL1used) {
// This is just for sanity.
assert(L->getHeader() && "Loop without a header?");
bool Changed = false;
+ bool L0Used = false;
+ bool L1Used = false;
+
// Process nested loops first.
- for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- Changed |= convertToHardwareLoop(*I);
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
+ L0Used |= RecL0used;
+ L1Used |= RecL1used;
+ }
// If a nested loop has been converted, then we can't convert this loop.
- if (Changed)
+ if (Changed && L0Used && L1Used)
return Changed;
+ unsigned LOOP_i;
+ unsigned LOOP_r;
+ unsigned ENDLOOP;
+
+ // Flag used to track loopN instruction:
+ // 1 - Hardware loop is being generated for the inner most loop.
+ // 0 - Hardware loop is being generated for the outer loop.
+ unsigned IsInnerHWLoop = 1;
+
+ if (L0Used) {
+ LOOP_i = Hexagon::J2_loop1i;
+ LOOP_r = Hexagon::J2_loop1r;
+ ENDLOOP = Hexagon::ENDLOOP1;
+ IsInnerHWLoop = 0;
+ } else {
+ LOOP_i = Hexagon::J2_loop0i;
+ LOOP_r = Hexagon::J2_loop0r;
+ ENDLOOP = Hexagon::ENDLOOP0;
+ }
+
#ifndef NDEBUG
// Stop trying after reaching the limit (if any).
int Limit = HWLoopLimit;
@@ -1065,10 +1085,10 @@ bool HexagonHardwareLoops::convertToHard
#endif
// Does the loop contain any invalid instructions?
- if (containsInvalidInstruction(L))
+ if (containsInvalidInstruction(L, IsInnerHWLoop))
return false;
- MachineBasicBlock *LastMBB = L->getExitingBlock();
+ MachineBasicBlock *LastMBB = getExitingBlock(L);
// Don't generate hw loop if the loop has more than one exit.
if (!LastMBB)
return false;
@@ -1141,8 +1161,7 @@ bool HexagonHardwareLoops::convertToHard
BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
.addReg(TripCount->getReg(), 0, TripCount->getSubReg());
// Add the Loop instruction to the beginning of the loop.
- BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r))
- .addMBB(LoopStart)
+ BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
.addReg(CountReg);
} else {
assert(TripCount->isImm() && "Expecting immediate value for trip count");
@@ -1150,14 +1169,14 @@ bool HexagonHardwareLoops::convertToHard
// if the immediate fits in the instructions. Otherwise, we need to
// create a new virtual register.
int64_t CountImm = TripCount->getImm();
- if (!TII->isValidOffset(Hexagon::J2_loop0i, CountImm)) {
+ if (!TII->isValidOffset(LOOP_i, CountImm)) {
unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
.addImm(CountImm);
- BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r))
+ BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
.addMBB(LoopStart).addReg(CountReg);
} else
- BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0i))
+ BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
.addMBB(LoopStart).addImm(CountImm);
}
@@ -1171,8 +1190,7 @@ bool HexagonHardwareLoops::convertToHard
// Replace the loop branch with an endloop instruction.
DebugLoc LastIDL = LastI->getDebugLoc();
- BuildMI(*LastMBB, LastI, LastIDL,
- TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
+ BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
// The loop ends with either:
// - a conditional branch followed by an unconditional branch, or
@@ -1200,6 +1218,15 @@ bool HexagonHardwareLoops::convertToHard
removeIfDead(OldInsts[i]);
++NumHWLoops;
+
+ // Set RecL1used and RecL0used only after hardware loop has been
+ // successfully generated. Doing it earlier can cause wrong loop instruction
+ // to be used.
+ if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
+ RecL1used = true;
+ else
+ RecL0used = true;
+
return true;
}
@@ -1533,7 +1560,7 @@ MachineBasicBlock *HexagonHardwareLoops:
if (Header->pred_size() > 2) {
// Ensure that the header has only two predecessors: the preheader and
// the loop latch. Any additional predecessors of the header should
- // join at the newly created preheader. Inspect all PHI nodes from the
+ // join at the newly created preheader. Inspect all PHI nodes from the
// header and create appropriate corresponding PHI nodes in the preheader.
for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
Added: llvm/trunk/test/CodeGen/Hexagon/hwloop-loop1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/hwloop-loop1.ll?rev=237266&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/hwloop-loop1.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/hwloop-loop1.ll Wed May 13 12:56:03 2015
@@ -0,0 +1,68 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+;
+; Generate loop1 instruction for double loop sequence.
+
+; CHECK: loop0(.LBB{{.}}_{{.}}, #100)
+; CHECK: endloop0
+; CHECK: loop1(.LBB{{.}}_{{.}}, #100)
+; CHECK: loop0(.LBB{{.}}_{{.}}, #100)
+; CHECK: endloop0
+; CHECK: endloop1
+
+define i32 @main() #0 {
+entry:
+ %array = alloca [100 x i32], align 8
+ %doublearray = alloca [100 x [100 x i32]], align 8
+ %0 = bitcast [100 x i32]* %array to i8*
+ call void @llvm.lifetime.start(i64 400, i8* %0) #1
+ %1 = bitcast [100 x [100 x i32]]* %doublearray to i8*
+ call void @llvm.lifetime.start(i64 40000, i8* %1) #1
+ %arrayidx1 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %doublearray, i32 0, i32 10, i32 10
+ %arrayidx2.gep = getelementptr [100 x i32], [100 x i32]* %array, i32 0, i32 0
+ br label %for.body
+
+for.body:
+ %2 = phi i32 [ undef, %entry ], [ %.pre, %for.body.for.body_crit_edge ]
+ %sum.031 = phi i32 [ undef, %entry ], [ %add, %for.body.for.body_crit_edge ]
+ %arrayidx2.phi = phi i32* [ %arrayidx2.gep, %entry ], [ %arrayidx2.inc, %for.body.for.body_crit_edge ]
+ %i.030 = phi i32 [ 1, %entry ], [ %phitmp, %for.body.for.body_crit_edge ]
+ %add = add nsw i32 %2, %sum.031
+ %exitcond33 = icmp eq i32 %i.030, 100
+ %arrayidx2.inc = getelementptr i32, i32* %arrayidx2.phi, i32 1
+ br i1 %exitcond33, label %for.cond7.preheader.preheader, label %for.body.for.body_crit_edge
+
+for.cond7.preheader.preheader:
+ br label %for.cond7.preheader
+
+for.body.for.body_crit_edge:
+ %.pre = load i32, i32* %arrayidx2.inc, align 4
+ %phitmp = add i32 %i.030, 1
+ br label %for.body
+
+for.cond7.preheader:
+ %i.129 = phi i32 [ %inc16, %for.inc15 ], [ 0, %for.cond7.preheader.preheader ]
+ br label %for.body9
+
+for.body9:
+ %j.028 = phi i32 [ 0, %for.cond7.preheader ], [ %inc13, %for.body9 ]
+ %arrayidx11 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %doublearray, i32 0, i32 %i.129, i32 %j.028
+ store i32 %add, i32* %arrayidx11, align 4
+ %inc13 = add nsw i32 %j.028, 1
+ %exitcond = icmp eq i32 %inc13, 100
+ br i1 %exitcond, label %for.inc15, label %for.body9
+
+for.inc15:
+ %inc16 = add nsw i32 %i.129, 1
+ %exitcond32 = icmp eq i32 %inc16, 100
+ br i1 %exitcond32, label %for.end17, label %for.cond7.preheader
+
+for.end17:
+ %3 = load i32, i32* %arrayidx1, align 8
+ call void @llvm.lifetime.end(i64 40000, i8* %1) #1
+ call void @llvm.lifetime.end(i64 400, i8* %0) #1
+ ret i32 %3
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
More information about the llvm-commits
mailing list