[llvm] r326703 - Pass Divergence Analysis data to Selection DAG to drive divergence
Alexander Timofeev via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 5 07:12:22 PST 2018
Author: alex-t
Date: Mon Mar 5 07:12:21 2018
New Revision: 326703
URL: http://llvm.org/viewvc/llvm-project?rev=326703&view=rev
Log:
Pass Divergence Analysis data to Selection DAG to drive divergence
dependent instruction selection.
Differential revision: https://reviews.llvm.org/D35267
Modified:
llvm/trunk/include/llvm/Analysis/DivergenceAnalysis.h
llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h
llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h
llvm/trunk/include/llvm/CodeGen/TargetLowering.h
llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SMInstructions.td
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
Modified: llvm/trunk/include/llvm/Analysis/DivergenceAnalysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/DivergenceAnalysis.h?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/DivergenceAnalysis.h (original)
+++ llvm/trunk/include/llvm/Analysis/DivergenceAnalysis.h Mon Mar 5 07:12:21 2018
@@ -13,6 +13,8 @@
// better decisions.
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
+#define LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
#include "llvm/ADT/DenseSet.h"
#include "llvm/IR/Function.h"
@@ -46,3 +48,5 @@ private:
DenseSet<const Value *> DivergentValues;
};
} // End llvm namespace
+
+#endif //LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
\ No newline at end of file
Modified: llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h (original)
+++ llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h Mon Mar 5 07:12:21 2018
@@ -118,6 +118,17 @@ public:
/// cross-basic-block values.
DenseMap<const Value *, unsigned> ValueMap;
+ /// VirtReg2Value map is needed by the Divergence Analysis driven
+ /// instruction selection. It is reverted ValueMap. It is computed
+ /// in lazy style - on demand. It is used to get the Value corresponding
+ /// to the live in virtual register and is called from the
+ /// TargetLowerinInfo::isSDNodeSourceOfDivergence.
+ DenseMap<unsigned, const Value*> VirtReg2Value;
+
+ /// This method is called from TargetLowerinInfo::isSDNodeSourceOfDivergence
+ /// to get the Value corresponding to the live-in virtual register.
+ const Value * getValueFromVirtualReg(unsigned Vreg);
+
/// Track virtual registers created for exception pointers.
DenseMap<const Value *, unsigned> CatchPadExceptionPointers;
Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Mon Mar 5 07:12:21 2018
@@ -28,8 +28,10 @@
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineValueType.h"
@@ -217,6 +219,9 @@ class SelectionDAG {
LLVMContext *Context;
CodeGenOpt::Level OptLevel;
+ DivergenceAnalysis * DA = nullptr;
+ FunctionLoweringInfo * FLI = nullptr;
+
/// The function-level optimization remark emitter. Used to emit remarks
/// whenever manipulating the DAG.
OptimizationRemarkEmitter *ORE;
@@ -346,19 +351,7 @@ private:
.getRawSubclassData();
}
- void createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
- assert(!Node->OperandList && "Node already has operands");
- SDUse *Ops = OperandRecycler.allocate(
- ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
-
- for (unsigned I = 0; I != Vals.size(); ++I) {
- Ops[I].setUser(Node);
- Ops[I].setInitial(Vals[I]);
- }
- Node->NumOperands = Vals.size();
- Node->OperandList = Ops;
- checkForCycles(Node);
- }
+ void createOperands(SDNode *Node, ArrayRef<SDValue> Vals);
void removeOperands(SDNode *Node) {
if (!Node->OperandList)
@@ -369,7 +362,7 @@ private:
Node->NumOperands = 0;
Node->OperandList = nullptr;
}
-
+ void CreateTopologicalOrder(std::vector<SDNode*>& Order);
public:
explicit SelectionDAG(const TargetMachine &TM, CodeGenOpt::Level);
SelectionDAG(const SelectionDAG &) = delete;
@@ -378,7 +371,12 @@ public:
/// Prepare this SelectionDAG to process code in the given MachineFunction.
void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE,
- Pass *PassPtr, const TargetLibraryInfo *LibraryInfo);
+ Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
+ DivergenceAnalysis * DA);
+
+ void setFunctionLoweringInfo(FunctionLoweringInfo * FuncInfo) {
+ FLI = FuncInfo;
+ }
/// Clear state and free memory necessary to make this
/// SelectionDAG ready to process a new block.
@@ -463,6 +461,8 @@ public:
return Root;
}
+ void VerifyDAGDiverence();
+
/// This iterates over the nodes in the SelectionDAG, folding
/// certain types of nodes together, or eliminating superfluous nodes. The
/// Level argument controls whether Combine is allowed to produce nodes and
@@ -1128,6 +1128,9 @@ public:
SDValue Op3, SDValue Op4, SDValue Op5);
SDNode *UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops);
+ // Propagates the change in divergence to users
+ void updateDivergence(SDNode * N);
+
/// These are used for target selectors to *mutate* the
/// specified node to have the specified return type, Target opcode, and
/// operands. Note that target opcodes are stored as
Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Mon Mar 5 07:12:21 2018
@@ -466,11 +466,13 @@ protected:
friend class SDNode;
friend class MemIntrinsicSDNode;
friend class MemSDNode;
+ friend class SelectionDAG;
uint16_t HasDebugValue : 1;
uint16_t IsMemIntrinsic : 1;
+ uint16_t IsDivergent : 1;
};
- enum { NumSDNodeBits = 2 };
+ enum { NumSDNodeBits = 3 };
class ConstantSDNodeBitfields {
friend class ConstantSDNode;
@@ -662,6 +664,8 @@ public:
bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
+ bool isDivergent() const { return SDNodeBits.IsDivergent; }
+
/// Return true if there are no uses of this node.
bool use_empty() const { return UseList == nullptr; }
Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Mon Mar 5 07:12:21 2018
@@ -29,6 +29,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineValueType.h"
@@ -2562,6 +2563,16 @@ public:
bool isPositionIndependent() const;
+ virtual bool isSDNodeSourceOfDivergence(const SDNode *N,
+ FunctionLoweringInfo *FLI,
+ DivergenceAnalysis *DA) const {
+ return false;
+ }
+
+ virtual bool isSDNodeAlwaysUniform(const SDNode * N) const {
+ return false;
+ }
+
/// Returns true by value, base pointer and offset pointer and addressing mode
/// by reference if the node's address can be legally represented as
/// pre-indexed load / store address.
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp Mon Mar 5 07:12:21 2018
@@ -547,3 +547,13 @@ FunctionLoweringInfo::getOrCreateSwiftEr
}
return std::make_pair(It->second, false);
}
+
+const Value *
+FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) {
+ if (VirtReg2Value.empty()) {
+ for (auto &P : ValueMap) {
+ VirtReg2Value[P.second] = P.first;
+ }
+ }
+ return VirtReg2Value[Vreg];
+}
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Mar 5 07:12:21 2018
@@ -950,7 +950,8 @@ SelectionDAG::SelectionDAG(const TargetM
void SelectionDAG::init(MachineFunction &NewMF,
OptimizationRemarkEmitter &NewORE,
- Pass *PassPtr, const TargetLibraryInfo *LibraryInfo) {
+ Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
+ DivergenceAnalysis * Divergence) {
MF = &NewMF;
SDAGISelPass = PassPtr;
ORE = &NewORE;
@@ -958,6 +959,7 @@ void SelectionDAG::init(MachineFunction
TSI = getSubtarget().getSelectionDAGInfo();
LibInfo = LibraryInfo;
Context = &MF->getFunction().getContext();
+ DA = Divergence;
}
SelectionDAG::~SelectionDAG() {
@@ -1713,6 +1715,7 @@ SDValue SelectionDAG::getRegister(unsign
return SDValue(E, 0);
auto *N = newSDNode<RegisterSDNode>(RegNo, VT);
+ N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -6699,6 +6702,7 @@ SDNode *SelectionDAG::UpdateNodeOperands
if (N->OperandList[1] != Op2)
N->OperandList[1].set(Op2);
+ updateDivergence(N);
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
return N;
@@ -7340,8 +7344,9 @@ void SelectionDAG::ReplaceAllUsesWith(SD
SDUse &Use = UI.getUse();
++UI;
Use.set(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
} while (UI != UE && *UI == User);
-
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User);
@@ -7395,6 +7400,8 @@ void SelectionDAG::ReplaceAllUsesWith(SD
SDUse &Use = UI.getUse();
++UI;
Use.setNode(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
} while (UI != UE && *UI == User);
// Now that we have modified User, add it back to the CSE maps. If it
@@ -7439,8 +7446,9 @@ void SelectionDAG::ReplaceAllUsesWith(SD
const SDValue &ToOp = To[Use.getResNo()];
++UI;
Use.set(ToOp);
+ if (To->getNode()->isDivergent() != From->isDivergent())
+ updateDivergence(User);
} while (UI != UE && *UI == User);
-
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User);
@@ -7498,8 +7506,9 @@ void SelectionDAG::ReplaceAllUsesOfValue
++UI;
Use.set(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
} while (UI != UE && *UI == User);
-
// We are iterating over all uses of the From node, so if a use
// doesn't use the specific value, no changes are made.
if (!UserRemovedFromCSEMaps)
@@ -7532,6 +7541,70 @@ namespace {
} // end anonymous namespace
+void SelectionDAG::updateDivergence(SDNode * N)
+{
+ if (TLI->isSDNodeAlwaysUniform(N))
+ return;
+ bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
+ for (auto &Op : N->ops()) {
+ if (Op.Val.getValueType() != MVT::Other)
+ IsDivergent |= Op.getNode()->isDivergent();
+ }
+ if (N->SDNodeBits.IsDivergent != IsDivergent) {
+ N->SDNodeBits.IsDivergent = IsDivergent;
+ for (auto U : N->uses()) {
+ updateDivergence(U);
+ }
+ }
+}
+
+
+void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) {
+ DenseMap<SDNode *, unsigned> Degree;
+ Order.reserve(AllNodes.size());
+ for (auto & N : allnodes()) {
+ unsigned NOps = N.getNumOperands();
+ Degree[&N] = NOps;
+ if (0 == NOps)
+ Order.push_back(&N);
+ }
+ for (std::vector<SDNode *>::iterator I = Order.begin();
+ I!=Order.end();++I) {
+ SDNode * N = *I;
+ for (auto U : N->uses()) {
+ unsigned &UnsortedOps = Degree[U];
+ if (0 == --UnsortedOps)
+ Order.push_back(U);
+ }
+ }
+}
+
+void SelectionDAG::VerifyDAGDiverence()
+{
+ std::vector<SDNode*> TopoOrder;
+ CreateTopologicalOrder(TopoOrder);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ DenseMap<const SDNode *, bool> DivergenceMap;
+ for (auto &N : allnodes()) {
+ DivergenceMap[&N] = false;
+ }
+ for (auto N : TopoOrder) {
+ bool IsDivergent = DivergenceMap[N];
+ bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA);
+ for (auto &Op : N->ops()) {
+ if (Op.Val.getValueType() != MVT::Other)
+ IsSDNodeDivergent |= DivergenceMap[Op.getNode()];
+ }
+ if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) {
+ DivergenceMap[N] = true;
+ }
+ }
+ for (auto &N : allnodes()) {
+ assert(DivergenceMap[&N] == N.isDivergent() && "Divergence bit inconsistency detected\n");
+ }
+}
+
+
/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
/// uses of other values produced by From.getNode() alone. The same value
/// may appear in both the From and To list. The Deleted vector is
@@ -8337,6 +8410,26 @@ SDNode *SelectionDAG::isConstantFPBuildV
return nullptr;
}
+void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
+ assert(!Node->OperandList && "Node already has operands");
+ SDUse *Ops = OperandRecycler.allocate(
+ ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
+
+ bool IsDivergent = false;
+ for (unsigned I = 0; I != Vals.size(); ++I) {
+ Ops[I].setUser(Node);
+ Ops[I].setInitial(Vals[I]);
+ if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence.
+ IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent();
+ }
+ Node->NumOperands = Vals.size();
+ Node->OperandList = Ops;
+ IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA);
+ if (!TLI->isSDNodeAlwaysUniform(Node))
+ Node->SDNodeBits.IsDivergent = IsDivergent;
+ checkForCycles(Node);
+}
+
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode*> &Visited,
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp Mon Mar 5 07:12:21 2018
@@ -629,6 +629,8 @@ void SDNode::print_details(raw_ostream &
if (getNodeId() != -1)
OS << " [ID=" << getNodeId() << ']';
+ if (!(isa<ConstantSDNode>(this) || (isa<ConstantFPSDNode>(this))))
+ OS << "# D:" << isDivergent();
if (!G)
return;
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Mon Mar 5 07:12:21 2018
@@ -29,6 +29,7 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -329,6 +330,7 @@ void SelectionDAGISel::getAnalysisUsage(
AU.addPreserved<StackProtector>();
AU.addPreserved<GCModuleInfo>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -414,7 +416,8 @@ bool SelectionDAGISel::runOnMachineFunct
SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
- CurDAG->init(*MF, *ORE, this, LibInfo);
+ CurDAG->init(*MF, *ORE, this, LibInfo,
+ getAnalysisIfAvailable<DivergenceAnalysis>());
FuncInfo->set(Fn, *MF, CurDAG);
// Now get the optional analyzes if we want to.
@@ -711,6 +714,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG
int BlockNumber = -1;
(void)BlockNumber;
bool MatchFilterBB = false; (void)MatchFilterBB;
+ TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn);
// Pre-type legalization allow creation of any node types.
CurDAG->NewNodesMustHaveLegalTypes = false;
@@ -744,6 +749,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG
CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
}
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+
DEBUG(dbgs() << "Optimized lowered selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
@@ -761,6 +769,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG
Changed = CurDAG->LegalizeTypes();
}
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+
DEBUG(dbgs() << "Type-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
@@ -780,6 +791,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG
CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
}
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+
DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
@@ -823,6 +837,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
CurDAG->dump());
+
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
}
if (ViewLegalizeDAGs && MatchFilterBB)
@@ -834,6 +851,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG
CurDAG->Legalize();
}
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+
DEBUG(dbgs() << "Legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
@@ -849,6 +869,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG
CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
}
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+
DEBUG(dbgs() << "Optimized legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
@@ -1401,6 +1424,8 @@ void SelectionDAGISel::SelectAllBasicBlo
FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()];
FuncInfo->InsertPt = FuncInfo->MBB->begin();
+ CurDAG->setFunctionLoweringInfo(FuncInfo);
+
if (!FastIS) {
LowerArguments(Fn);
} else {
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Mon Mar 5 07:12:21 2018
@@ -27,6 +27,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -83,6 +84,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AMDGPUArgumentUsageInfo>();
+ AU.addRequired<DivergenceAnalysis>();
SelectionDAGISel::getAnalysisUsage(AU);
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Mon Mar 5 07:12:21 2018
@@ -25,6 +25,7 @@
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "R600MachineFunctionInfo.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
@@ -748,6 +749,101 @@ bool AMDGPUTargetLowering::isCheapToSpec
return true;
}
+bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const {
+ switch (N->getOpcode()) {
+ default:
+ return false;
+ case ISD::EntryToken:
+ case ISD::TokenFactor:
+ return true;
+ case ISD::INTRINSIC_WO_CHAIN:
+ {
+ unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntrID) {
+ default:
+ return false;
+ case Intrinsic::amdgcn_readfirstlane:
+ case Intrinsic::amdgcn_readlane:
+ return true;
+ }
+ }
+ break;
+ case ISD::LOAD:
+ {
+ const LoadSDNode * L = dyn_cast<LoadSDNode>(N);
+ if (L->getMemOperand()->getAddrSpace()
+ == Subtarget->getAMDGPUAS().CONSTANT_ADDRESS_32BIT)
+ return true;
+ return false;
+ }
+ break;
+ }
+}
+
+bool AMDGPUTargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
+ FunctionLoweringInfo * FLI, DivergenceAnalysis * DA) const
+{
+ switch (N->getOpcode()) {
+ case ISD::Register:
+ case ISD::CopyFromReg:
+ {
+ const RegisterSDNode *R = nullptr;
+ if (N->getOpcode() == ISD::Register) {
+ R = dyn_cast<RegisterSDNode>(N);
+ }
+ else {
+ R = dyn_cast<RegisterSDNode>(N->getOperand(1));
+ }
+ if (R)
+ {
+ const MachineFunction * MF = FLI->MF;
+ const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo();
+ unsigned Reg = R->getReg();
+ if (TRI.isPhysicalRegister(Reg))
+ return TRI.isVGPR(MRI, Reg);
+
+ if (MRI.isLiveIn(Reg)) {
+ // workitem.id.x workitem.id.y workitem.id.z
+ if ((MRI.getLiveInPhysReg(Reg) == AMDGPU::T0_X) ||
+ (MRI.getLiveInPhysReg(Reg) == AMDGPU::T0_Y) ||
+ (MRI.getLiveInPhysReg(Reg) == AMDGPU::T0_Z)||
+ (MRI.getLiveInPhysReg(Reg) == AMDGPU::VGPR0) ||
+ (MRI.getLiveInPhysReg(Reg) == AMDGPU::VGPR1) ||
+ (MRI.getLiveInPhysReg(Reg) == AMDGPU::VGPR2))
+ return true;
+ // Formal arguments of non-entry functions
+ // are conservatively considered divergent
+ else if (!AMDGPU::isEntryFunctionCC(FLI->Fn->getCallingConv()))
+ return true;
+ }
+ return !DA || DA->isDivergent(FLI->getValueFromVirtualReg(Reg));
+ }
+ }
+ break;
+ case ISD::LOAD: {
+ const LoadSDNode *L = dyn_cast<LoadSDNode>(N);
+ if (L->getMemOperand()->getAddrSpace() ==
+ Subtarget->getAMDGPUAS().PRIVATE_ADDRESS)
+ return true;
+ } break;
+ case ISD::CALLSEQ_END:
+ return true;
+ break;
+ case ISD::INTRINSIC_WO_CHAIN:
+ {
+
+ }
+ return AMDGPU::isIntrinsicSourceOfDivergence(
+ cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
+ case ISD::INTRINSIC_W_CHAIN:
+ return AMDGPU::isIntrinsicSourceOfDivergence(
+ cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
+ }
+ return false;
+}
+
//===---------------------------------------------------------------------===//
// Target Properties
//===---------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Mon Mar 5 07:12:21 2018
@@ -168,6 +168,9 @@ public:
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ bool isSDNodeSourceOfDivergence(const SDNode * N,
+ FunctionLoweringInfo * FLI, DivergenceAnalysis * DA) const;
+ bool isSDNodeAlwaysUniform(const SDNode * N) const;
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp Mon Mar 5 07:12:21 2018
@@ -17,6 +17,7 @@
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUSubtarget.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -464,55 +465,7 @@ int AMDGPUTTIImpl::getVectorInstrCost(un
}
}
-static bool isIntrinsicSourceOfDivergence(const IntrinsicInst *I) {
- switch (I->getIntrinsicID()) {
- case Intrinsic::amdgcn_workitem_id_x:
- case Intrinsic::amdgcn_workitem_id_y:
- case Intrinsic::amdgcn_workitem_id_z:
- case Intrinsic::amdgcn_interp_mov:
- case Intrinsic::amdgcn_interp_p1:
- case Intrinsic::amdgcn_interp_p2:
- case Intrinsic::amdgcn_mbcnt_hi:
- case Intrinsic::amdgcn_mbcnt_lo:
- case Intrinsic::r600_read_tidig_x:
- case Intrinsic::r600_read_tidig_y:
- case Intrinsic::r600_read_tidig_z:
- case Intrinsic::amdgcn_atomic_inc:
- case Intrinsic::amdgcn_atomic_dec:
- case Intrinsic::amdgcn_ds_fadd:
- case Intrinsic::amdgcn_ds_fmin:
- case Intrinsic::amdgcn_ds_fmax:
- case Intrinsic::amdgcn_image_atomic_swap:
- case Intrinsic::amdgcn_image_atomic_add:
- case Intrinsic::amdgcn_image_atomic_sub:
- case Intrinsic::amdgcn_image_atomic_smin:
- case Intrinsic::amdgcn_image_atomic_umin:
- case Intrinsic::amdgcn_image_atomic_smax:
- case Intrinsic::amdgcn_image_atomic_umax:
- case Intrinsic::amdgcn_image_atomic_and:
- case Intrinsic::amdgcn_image_atomic_or:
- case Intrinsic::amdgcn_image_atomic_xor:
- case Intrinsic::amdgcn_image_atomic_inc:
- case Intrinsic::amdgcn_image_atomic_dec:
- case Intrinsic::amdgcn_image_atomic_cmpswap:
- case Intrinsic::amdgcn_buffer_atomic_swap:
- case Intrinsic::amdgcn_buffer_atomic_add:
- case Intrinsic::amdgcn_buffer_atomic_sub:
- case Intrinsic::amdgcn_buffer_atomic_smin:
- case Intrinsic::amdgcn_buffer_atomic_umin:
- case Intrinsic::amdgcn_buffer_atomic_smax:
- case Intrinsic::amdgcn_buffer_atomic_umax:
- case Intrinsic::amdgcn_buffer_atomic_and:
- case Intrinsic::amdgcn_buffer_atomic_or:
- case Intrinsic::amdgcn_buffer_atomic_xor:
- case Intrinsic::amdgcn_buffer_atomic_cmpswap:
- case Intrinsic::amdgcn_ps_live:
- case Intrinsic::amdgcn_ds_swizzle:
- return true;
- default:
- return false;
- }
-}
+
static bool isArgPassedInSGPR(const Argument *A) {
const Function *F = A->getParent();
@@ -563,7 +516,7 @@ bool AMDGPUTTIImpl::isSourceOfDivergence
return true;
if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V))
- return isIntrinsicSourceOfDivergence(Intrinsic);
+ return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID());
// Assume all function calls are a source of divergence.
if (isa<CallInst>(V) || isa<InvokeInst>(V))
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Mon Mar 5 07:12:21 2018
@@ -5372,7 +5372,7 @@ SDValue SITargetLowering::LowerLOAD(SDVa
unsigned NumElements = MemVT.getVectorNumElements();
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
- if (isMemOpUniform(Load))
+ if (!Op->isDivergent())
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
@@ -5382,7 +5382,7 @@ SDValue SITargetLowering::LowerLOAD(SDVa
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
AS == AMDGPUASI.GLOBAL_ADDRESS) {
- if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) &&
+ if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
!Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load))
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
Modified: llvm/trunk/lib/Target/AMDGPU/SMInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SMInstructions.td?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SMInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SMInstructions.td Mon Mar 5 07:12:21 2018
@@ -223,12 +223,9 @@ def S_MEMREALTIME : SM_Time_Pseudo <"s
def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
auto Ld = cast<LoadSDNode>(N);
return Ld->getAlignment() >= 4 &&
- (((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
- Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
- static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N)) ||
+ ((((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) || (Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT)) && !N->isDivergent()) ||
(Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
- !Ld->isVolatile() &&
- static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N) &&
+ !Ld->isVolatile() && !N->isDivergent() &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
}]>;
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Mon Mar 5 07:12:21 2018
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUBaseInfo.h"
+#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPU.h"
#include "SIDefines.h"
#include "llvm/ADT/StringRef.h"
@@ -938,5 +939,55 @@ AMDGPUAS getAMDGPUAS(const TargetMachine
AMDGPUAS getAMDGPUAS(const Module &M) {
return getAMDGPUAS(Triple(M.getTargetTriple()));
}
+
+bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
+ switch (IntrID) {
+ case Intrinsic::amdgcn_workitem_id_x:
+ case Intrinsic::amdgcn_workitem_id_y:
+ case Intrinsic::amdgcn_workitem_id_z:
+ case Intrinsic::amdgcn_interp_mov:
+ case Intrinsic::amdgcn_interp_p1:
+ case Intrinsic::amdgcn_interp_p2:
+ case Intrinsic::amdgcn_mbcnt_hi:
+ case Intrinsic::amdgcn_mbcnt_lo:
+ case Intrinsic::r600_read_tidig_x:
+ case Intrinsic::r600_read_tidig_y:
+ case Intrinsic::r600_read_tidig_z:
+ case Intrinsic::amdgcn_atomic_inc:
+ case Intrinsic::amdgcn_atomic_dec:
+ case Intrinsic::amdgcn_ds_fadd:
+ case Intrinsic::amdgcn_ds_fmin:
+ case Intrinsic::amdgcn_ds_fmax:
+ case Intrinsic::amdgcn_image_atomic_swap:
+ case Intrinsic::amdgcn_image_atomic_add:
+ case Intrinsic::amdgcn_image_atomic_sub:
+ case Intrinsic::amdgcn_image_atomic_smin:
+ case Intrinsic::amdgcn_image_atomic_umin:
+ case Intrinsic::amdgcn_image_atomic_smax:
+ case Intrinsic::amdgcn_image_atomic_umax:
+ case Intrinsic::amdgcn_image_atomic_and:
+ case Intrinsic::amdgcn_image_atomic_or:
+ case Intrinsic::amdgcn_image_atomic_xor:
+ case Intrinsic::amdgcn_image_atomic_inc:
+ case Intrinsic::amdgcn_image_atomic_dec:
+ case Intrinsic::amdgcn_image_atomic_cmpswap:
+ case Intrinsic::amdgcn_buffer_atomic_swap:
+ case Intrinsic::amdgcn_buffer_atomic_add:
+ case Intrinsic::amdgcn_buffer_atomic_sub:
+ case Intrinsic::amdgcn_buffer_atomic_smin:
+ case Intrinsic::amdgcn_buffer_atomic_umin:
+ case Intrinsic::amdgcn_buffer_atomic_smax:
+ case Intrinsic::amdgcn_buffer_atomic_umax:
+ case Intrinsic::amdgcn_buffer_atomic_and:
+ case Intrinsic::amdgcn_buffer_atomic_or:
+ case Intrinsic::amdgcn_buffer_atomic_xor:
+ case Intrinsic::amdgcn_buffer_atomic_cmpswap:
+ case Intrinsic::amdgcn_ps_live:
+ case Intrinsic::amdgcn_ds_swizzle:
+ return true;
+ default:
+ return false;
+ }
+}
} // namespace AMDGPU
} // namespace llvm
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Mon Mar 5 07:12:21 2018
@@ -382,6 +382,9 @@ int64_t getSMRDEncodedOffset(const MCSub
/// not the encoded offset.
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
+/// \returns true if the intrinsic is divergent
+bool isIntrinsicSourceOfDivergence(unsigned IntrID);
+
} // end namespace AMDGPU
} // end namespace llvm
Modified: llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll Mon Mar 5 07:12:21 2018
@@ -2,7 +2,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
; GCN-LABEL: {{^}}use_dispatch_ptr:
-; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
+; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
+; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @use_dispatch_ptr() #1 {
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
%header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
@@ -19,7 +21,9 @@ define amdgpu_kernel void @kern_indirect
}
; GCN-LABEL: {{^}}use_queue_ptr:
-; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
+; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
+; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @use_queue_ptr() #1 {
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
%header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
@@ -37,11 +41,12 @@ define amdgpu_kernel void @kern_indirect
}
; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
-; CIVI: s_load_dword [[APERTURE_LOAD:s[0-9]+]], s[6:7], 0x10
+; CIVI: flat_load_dword v[[HI:[0-9]+]], v[0:1]
; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]]
-
-; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
-; GCN: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
+; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
+; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
+; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
+; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @use_queue_ptr_addrspacecast() #1 {
%asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32*
store volatile i32 0, i32* %asc
@@ -60,7 +65,9 @@ define amdgpu_kernel void @kern_indirect
}
; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
-; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
+; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
+; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @use_kernarg_segment_ptr() #1 {
%kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
%header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
@@ -424,9 +431,15 @@ define amdgpu_kernel void @kern_indirect
; GCN-LABEL: {{^}}use_every_sgpr_input:
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
-; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
-; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
-; GCN: s_load_dword s{{[0-9]+}}, s[10:11], 0x0
+; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
+; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s8
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s9
+; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s10
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s11
+; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: ; use s[12:13]
; GCN: ; use s14
; GCN: ; use s15
@@ -557,12 +570,23 @@ define void @func_use_every_sgpr_input_c
; GCN-DAG: s_mov_b32 s6, s14
; GCN-DAG: s_mov_b32 s7, s15
; GCN-DAG: s_mov_b32 s8, s16
+
+; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[6:7]
+; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[8:9]
+; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[10:11]
+
; GCN: s_swappc_b64
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
-; GCN: s_load_dword s{{[0-9]+}},
-; GCN: s_load_dword s{{[0-9]+}},
-; GCN: s_load_dword s{{[0-9]+}},
+; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_X]]
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_X]]
+; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_Y]]
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_Y]]
+; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_Z]]
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_Z]]
+; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: ; use
; GCN: ; use [[SAVE_X]]
; GCN: ; use [[SAVE_Y]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll?rev=326703&r1=326702&r2=326703&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll Mon Mar 5 07:12:21 2018
@@ -34,7 +34,13 @@ define amdgpu_kernel void @kernel_implic
; GCN-LABEL: {{^}}func_implicitarg_ptr:
; GCN: s_waitcnt
-; GCN-NEXT: s_load_dword s{{[0-9]+}}, s[6:7], 0x0{{$}}
+; MESA: s_mov_b64 s[8:9], s[6:7]
+; MESA: s_mov_b32 s11, 0xf000
+; MESA: s_mov_b32 s10, -1
+; MESA: buffer_load_dword v0, off, s[8:11], 0
+; HSA: v_mov_b32_e32 v0, s6
+; HSA: v_mov_b32_e32 v1, s7
+; HSA: flat_load_dword v0, v[0:1]
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
define void @func_implicitarg_ptr() #1 {
@@ -83,8 +89,21 @@ define void @func_call_implicitarg_ptr_f
; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr:
; GCN: s_waitcnt
-; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0{{$}}
-; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0{{$}}
+; MESA: s_mov_b64 s[12:13], s[6:7]
+; MESA: s_mov_b32 s15, 0xf000
+; MESA: s_mov_b32 s14, -1
+; MESA: buffer_load_dword v0, off, s[12:15], 0
+; HSA: v_mov_b32_e32 v0, s6
+; HSA: v_mov_b32_e32 v1, s7
+; HSA: flat_load_dword v0, v[0:1]
+; MESA: s_mov_b32 s10, s14
+; MESA: s_mov_b32 s11, s15
+; MESA: buffer_load_dword v0, off, s[8:11], 0
+; HSA: v_mov_b32_e32 v0, s8
+; HSA: v_mov_b32_e32 v1, s9
+; HSA: flat_load_dword v0, v[0:1]
+
+; GCN: s_waitcnt vmcnt(0)
define void @func_kernarg_implicitarg_ptr() #1 {
%kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
More information about the llvm-commits
mailing list