[llvm] r243555 - Roll forward r242871
Jingyue Wu
jingyue at google.com
Wed Jul 29 11:59:09 PDT 2015
Author: jingyue
Date: Wed Jul 29 13:59:09 2015
New Revision: 243555
URL: http://llvm.org/viewvc/llvm-project?rev=243555&view=rev
Log:
Roll forward r242871
r242871 missed one place that should be guarded with isPhysicalReg. This patch
fixes that.
Added:
llvm/trunk/test/CodeGen/NVPTX/branch-fold.ll
Modified:
llvm/trunk/lib/CodeGen/BranchFolding.cpp
llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
Modified: llvm/trunk/lib/CodeGen/BranchFolding.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/BranchFolding.cpp?rev=243555&r1=243554&r2=243555&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/BranchFolding.cpp (original)
+++ llvm/trunk/lib/CodeGen/BranchFolding.cpp Wed Jul 29 13:59:09 2015
@@ -12,7 +12,8 @@
// it then removes.
//
// Note that this pass must be run after register allocation, it cannot handle
-// SSA form.
+// SSA form. It also must handle virtual registers for targets that emit virtual
+// ISA (e.g. NVPTX).
//
//===----------------------------------------------------------------------===//
@@ -150,9 +151,13 @@ bool BranchFolder::OptimizeImpDefsBlock(
if (!I->isImplicitDef())
break;
unsigned Reg = I->getOperand(0).getReg();
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- ImpDefRegs.insert(*SubRegs);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ ImpDefRegs.insert(*SubRegs);
+ } else {
+ ImpDefRegs.insert(Reg);
+ }
++I;
}
if (ImpDefRegs.empty())
@@ -1573,6 +1578,17 @@ static MachineBasicBlock *findFalseBlock
return nullptr;
}
+template <class Container>
+static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI,
+ Container &Set) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Set.insert(*AI);
+ } else {
+ Set.insert(Reg);
+ }
+}
+
/// findHoistingInsertPosAndDeps - Find the location to move common instructions
/// in successors to. The location is usually just before the terminator,
/// however if the terminator is a conditional branch and its previous
@@ -1598,8 +1614,7 @@ MachineBasicBlock::iterator findHoisting
if (!Reg)
continue;
if (MO.isUse()) {
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- Uses.insert(*AI);
+ addRegAndItsAliases(Reg, TRI, Uses);
} else {
if (!MO.isDead())
// Don't try to hoist code in the rare case the terminator defines a
@@ -1608,8 +1623,7 @@ MachineBasicBlock::iterator findHoisting
// If the terminator defines a register, make sure we don't hoist
// the instruction whose def might be clobbered by the terminator.
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- Defs.insert(*AI);
+ addRegAndItsAliases(Reg, TRI, Defs);
}
}
@@ -1665,15 +1679,15 @@ MachineBasicBlock::iterator findHoisting
if (!Reg)
continue;
if (MO.isUse()) {
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- Uses.insert(*AI);
+ addRegAndItsAliases(Reg, TRI, Uses);
} else {
if (Uses.erase(Reg)) {
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
- Uses.erase(*SubRegs); // Use sub-registers to be conservative
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Uses.erase(*SubRegs); // Use sub-registers to be conservative
+ }
}
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- Defs.insert(*AI);
+ addRegAndItsAliases(Reg, TRI, Defs);
}
}
@@ -1800,8 +1814,12 @@ bool BranchFolder::HoistCommonCodeInSucc
unsigned Reg = MO.getReg();
if (!Reg || !LocalDefsSet.count(Reg))
continue;
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- LocalDefsSet.erase(*AI);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LocalDefsSet.erase(*AI);
+ } else {
+ LocalDefsSet.erase(Reg);
+ }
}
// Track local defs so we can update liveins.
@@ -1813,8 +1831,7 @@ bool BranchFolder::HoistCommonCodeInSucc
if (!Reg)
continue;
LocalDefs.push_back(Reg);
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- LocalDefsSet.insert(*AI);
+ addRegAndItsAliases(Reg, TRI, LocalDefsSet);
}
HasDups = true;
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp?rev=243555&r1=243554&r2=243555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp Wed Jul 29 13:59:09 2015
@@ -174,7 +174,6 @@ void NVPTXPassConfig::addIRPasses() {
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
disablePass(&PrologEpilogCodeInserterID);
disablePass(&MachineCopyPropagationID);
- disablePass(&BranchFolderPassID);
disablePass(&TailDuplicateID);
addPass(createNVPTXImageOptimizerPass());
Added: llvm/trunk/test/CodeGen/NVPTX/branch-fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/branch-fold.ll?rev=243555&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/branch-fold.ll (added)
+++ llvm/trunk/test/CodeGen/NVPTX/branch-fold.ll Wed Jul 29 13:59:09 2015
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp | FileCheck %s
+; Disable CGP which also folds branches, so that only BranchFolding is under
+; the spotlight.
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+define void @foo(i32 %x, float* %output) {
+; CHECK-LABEL: .visible .func foo(
+; CHECK-NOT: bra.uni
+; CHECK-NOT: LBB0_
+ %1 = icmp eq i32 %x, 1
+ br i1 %1, label %then, label %else
+
+then:
+ br label %merge
+
+else:
+ br label %merge
+
+merge:
+ store float 2.0, float* %output
+ ret void
+}
+
+; PR24299. no crash
+define ptx_kernel void @hoge() #0 {
+; CHECK-LABEL: .visible .entry hoge(
+bb:
+ br i1 undef, label %bb1, label %bb4
+
+bb1: ; preds = %bb1, %bb
+ %tmp = phi i64 [ %tmp2, %bb1 ], [ undef, %bb ]
+ %tmp2 = add nsw i64 %tmp, 1
+ %tmp3 = icmp sle i64 %tmp, 0
+ br i1 %tmp3, label %bb1, label %bb4
+
+bb4: ; preds = %bb4, %bb1, %bb
+ br label %bb4
+}
More information about the llvm-commits
mailing list