[llvm] r350069 - [NVPTX] Allow libcalls that are defined in the current module.
Justin Lebar via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 26 11:12:31 PST 2018
Author: jlebar
Date: Wed Dec 26 11:12:31 2018
New Revision: 350069
URL: http://llvm.org/viewvc/llvm-project?rev=350069&view=rev
Log:
[NVPTX] Allow libcalls that are defined in the current module.
The patch adds a possibility to make library calls on NVPTX.
An important thing about library functions - they must be defined within
the current module. This basically should guarantee that we produce a
valid PTX assembly (without calls to not defined functions). The one who
wants to use the libcalls is probably will have to link against
compiler-rt or any other implementation.
Currently, it's completely impossible to make library calls because of
error LLVM ERROR: Cannot select: i32 = ExternalSymbol '...'. But we can
lower ExternalSymbol to TargetExternalSymbol and verify if the function
definition is available.
Also, there was an issue with a DAG during legalisation. When we expand
instruction into libcall, the inner call-chain isn't being "integrated"
into outer chain. Since the last "data-flow" (call retval load) node is
located in call-chain earlier than CALLSEQ_END node, the latter becomes
a leaf and therefore a dead node (and is being removed quite fast).
Proposed here solution relies on another data-flow pseudo nodes
(ProxyReg) which purpose is only to keep CALLSEQ_END at legalisation and
instruction selection phases - we remove the pseudo instructions before
register scheduling phase.
Patch by Denys Zariaiev!
Differential Revision: https://reviews.llvm.org/D34708
Added:
llvm/trunk/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
llvm/trunk/test/CodeGen/NVPTX/calls-with-phi.ll
llvm/trunk/test/CodeGen/NVPTX/libcall-fulfilled.ll
llvm/trunk/test/CodeGen/NVPTX/libcall-intrinsic.ll
llvm/trunk/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll
llvm/trunk/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll
Removed:
llvm/trunk/test/CodeGen/NVPTX/zero-cs.ll
Modified:
llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/lib/Target/NVPTX/CMakeLists.txt
llvm/trunk/lib/Target/NVPTX/NVPTX.h
llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h
llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
llvm/trunk/test/CodeGen/NVPTX/libcall-instruction.ll
Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=350069&r1=350068&r2=350069&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Wed Dec 26 11:12:31 2018
@@ -1128,6 +1128,13 @@ public:
/// Expand the specified \c ISD::VACOPY node as the Legalize pass would.
SDValue expandVACopy(SDNode *Node);
+ /// Returs an GlobalAddress of the function from the current module with
+ /// name matching the given ExternalSymbol. Additionally can provide the
+ /// matched function.
+ /// Panics the function doesn't exists.
+ SDValue getSymbolFunctionGlobalAddress(SDValue Op,
+ Function **TargetFunction = nullptr);
+
/// *Mutate* the specified node in-place to have the
/// specified operands. If the resultant node already exists in the DAG,
/// this does not modify the specified node, instead it returns the node that
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=350069&r1=350068&r2=350069&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Wed Dec 26 11:12:31 2018
@@ -8464,6 +8464,32 @@ SDValue SelectionDAG::makeEquivalentMemo
return TokenFactor;
}
+SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
+ Function **OutFunction) {
+ assert(isa<ExternalSymbolSDNode>(Op) && "Node should be an ExternalSymbol");
+
+ auto *Symbol = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ auto *Module = MF->getFunction().getParent();
+ auto *Function = Module->getFunction(Symbol);
+
+ if (OutFunction != nullptr)
+ *OutFunction = Function;
+
+ if (Function != nullptr) {
+ auto PtrTy = TLI->getPointerTy(getDataLayout(), Function->getAddressSpace());
+ return getGlobalAddress(Function, SDLoc(Op), PtrTy);
+ }
+
+ std::string ErrorStr;
+ raw_string_ostream ErrorFormatter(ErrorStr);
+
+ ErrorFormatter << "Undefined external symbol ";
+ ErrorFormatter << '"' << Symbol << '"';
+ ErrorFormatter.flush();
+
+ report_fatal_error(ErrorStr);
+}
+
//===----------------------------------------------------------------------===//
// SDNode Class
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/NVPTX/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/CMakeLists.txt?rev=350069&r1=350068&r2=350069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/NVPTX/CMakeLists.txt Wed Dec 26 11:12:31 2018
@@ -32,6 +32,7 @@ set(NVPTXCodeGen_sources
NVPTXUtilities.cpp
NVVMIntrRange.cpp
NVVMReflect.cpp
+ NVPTXProxyRegErasure.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
Modified: llvm/trunk/lib/Target/NVPTX/NVPTX.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTX.h?rev=350069&r1=350068&r2=350069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTX.h (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTX.h Wed Dec 26 11:12:31 2018
@@ -53,6 +53,7 @@ FunctionPass *createNVPTXImageOptimizerP
FunctionPass *createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM);
BasicBlockPass *createNVPTXLowerAllocaPass();
MachineFunctionPass *createNVPTXPeephole();
+MachineFunctionPass *createNVPTXProxyRegErasurePass();
Target &getTheNVPTXTarget32();
Target &getTheNVPTXTarget64();
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp?rev=350069&r1=350068&r2=350069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp Wed Dec 26 11:12:31 2018
@@ -730,6 +730,11 @@ void NVPTXAsmPrinter::emitDeclarations(c
for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
const Function *F = &*FI;
+ if (F->getAttributes().hasFnAttribute("nvptx-libcall-callee")) {
+ emitDeclaration(F, O);
+ continue;
+ }
+
if (F->isDeclaration()) {
if (F->use_empty())
continue;
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp?rev=350069&r1=350068&r2=350069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp Wed Dec 26 11:12:31 2018
@@ -663,6 +663,8 @@ const char *NVPTXTargetLowering::getTarg
return "NVPTXISD::CallSeqEnd";
case NVPTXISD::CallPrototype:
return "NVPTXISD::CallPrototype";
+ case NVPTXISD::ProxyReg:
+ return "NVPTXISD::ProxyReg";
case NVPTXISD::LoadV2:
return "NVPTXISD::LoadV2";
case NVPTXISD::LoadV4:
@@ -1666,6 +1668,18 @@ SDValue NVPTXTargetLowering::LowerCall(T
// indirect calls but is always null for libcalls.
bool isIndirectCall = !Func && CS;
+ if (isa<ExternalSymbolSDNode>(Callee)) {
+ Function* CalleeFunc = nullptr;
+
+ // Try to find the callee in the current module.
+ Callee = DAG.getSymbolFunctionGlobalAddress(Callee, &CalleeFunc);
+ assert(CalleeFunc != nullptr && "Libcall callee must be set.");
+
+ // Set the "libcall callee" attribute to indicate that the function
+ // must always have a declaration.
+ CalleeFunc->addFnAttr("nvptx-libcall-callee", "true");
+ }
+
if (isIndirectCall) {
// This is indirect function call case : PTX requires a prototype of the
// form
@@ -1738,6 +1752,9 @@ SDValue NVPTXTargetLowering::LowerCall(T
InFlag = Chain.getValue(1);
}
+ SmallVector<SDValue, 16> ProxyRegOps;
+ SmallVector<Optional<MVT>, 16> ProxyRegTruncates;
+
// Generate loads from param memory/moves from registers for result
if (Ins.size() > 0) {
SmallVector<EVT, 16> VTs;
@@ -1808,11 +1825,14 @@ SDValue NVPTXTargetLowering::LowerCall(T
MachineMemOperand::MOLoad);
for (unsigned j = 0; j < NumElts; ++j) {
- SDValue Ret = RetVal.getValue(j);
+ ProxyRegOps.push_back(RetVal.getValue(j));
+
if (needTruncate)
- Ret = DAG.getNode(ISD::TRUNCATE, dl, Ins[VecIdx + j].VT, Ret);
- InVals.push_back(Ret);
+ ProxyRegTruncates.push_back(Optional<MVT>(Ins[VecIdx + j].VT));
+ else
+ ProxyRegTruncates.push_back(Optional<MVT>());
}
+
Chain = RetVal.getValue(NumElts);
InFlag = RetVal.getValue(NumElts + 1);
@@ -1828,8 +1848,29 @@ SDValue NVPTXTargetLowering::LowerCall(T
DAG.getIntPtrConstant(uniqueCallSite + 1, dl,
true),
InFlag, dl);
+ InFlag = Chain.getValue(1);
uniqueCallSite++;
+ // Append ProxyReg instructions to the chain to make sure that `callseq_end`
+ // will not get lost. Otherwise, during libcalls expansion, the nodes can become
+ // dangling.
+ for (unsigned i = 0; i < ProxyRegOps.size(); ++i) {
+ SDValue Ret = DAG.getNode(
+ NVPTXISD::ProxyReg, dl,
+ DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue),
+ { Chain, ProxyRegOps[i], InFlag }
+ );
+
+ Chain = Ret.getValue(1);
+ InFlag = Ret.getValue(2);
+
+ if (ProxyRegTruncates[i].hasValue()) {
+ Ret = DAG.getNode(ISD::TRUNCATE, dl, ProxyRegTruncates[i].getValue(), Ret);
+ }
+
+ InVals.push_back(Ret);
+ }
+
// set isTailCall to false for now, until we figure out how to express
// tail call optimization in PTX
isTailCall = false;
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h?rev=350069&r1=350068&r2=350069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h Wed Dec 26 11:12:31 2018
@@ -51,6 +51,7 @@ enum NodeType : unsigned {
CallSeqBegin,
CallSeqEnd,
CallPrototype,
+ ProxyReg,
FUN_SHFL_CLAMP,
FUN_SHFR_CLAMP,
MUL_WIDE_SIGNED,
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td?rev=350069&r1=350068&r2=350069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td Wed Dec 26 11:12:31 2018
@@ -1885,6 +1885,7 @@ def SDTStoreRetvalProfile : SDTypeProfil
def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
+def SDTProxyRegProfile : SDTypeProfile<1, 1, []>;
def DeclareParam :
SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
@@ -1972,6 +1973,9 @@ def PseudoUseParam :
def RETURNNode :
SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
[SDNPHasChain, SDNPSideEffect]>;
+def ProxyReg :
+ SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
let mayLoad = 1 in {
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
@@ -2249,6 +2253,21 @@ def PseudoUseParamI16 : PseudoUseParamIn
def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
+class ProxyRegInst<string SzStr, NVPTXRegClass regclass> :
+ NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
+ !strconcat("mov.", SzStr, " \t$dst, $src;"),
+ [(set regclass:$dst, (ProxyReg regclass:$src))]>;
+
+let isCodeGenOnly=1, isPseudo=1 in {
+ def ProxyRegI1 : ProxyRegInst<"pred", Int1Regs>;
+ def ProxyRegI16 : ProxyRegInst<"b16", Int16Regs>;
+ def ProxyRegI32 : ProxyRegInst<"b32", Int32Regs>;
+ def ProxyRegI64 : ProxyRegInst<"b64", Int64Regs>;
+ def ProxyRegF16 : ProxyRegInst<"b16", Float16Regs>;
+ def ProxyRegF32 : ProxyRegInst<"f32", Float32Regs>;
+ def ProxyRegF64 : ProxyRegInst<"f64", Float64Regs>;
+ def ProxyRegF16x2 : ProxyRegInst<"b32", Float16x2Regs>;
+}
//
// Load / Store Handling
@@ -2541,7 +2560,7 @@ let mayStore=1, hasSideEffects=0 in {
class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
NVPTXRegClass regclassOut> :
NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
- !strconcat("mov.b", !strconcat(SzStr, " \t$d, $a;")),
+ !strconcat("mov.b", SzStr, " \t$d, $a;"),
[(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
def BITCONVERT_16_I2F : F_BITCONVERT<"16", Int16Regs, Float16Regs>;
Added: llvm/trunk/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp?rev=350069&view=auto
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp (added)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp Wed Dec 26 11:12:31 2018
@@ -0,0 +1,122 @@
+//===- NVPTXProxyRegErasure.cpp - NVPTX Proxy Register Instruction Erasure -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The pass is needed to remove ProxyReg instructions and restore related
+// registers. The instructions were needed at instruction selection stage to
+// make sure that callseq_end nodes won't be removed as "dead nodes". This can
+// happen when we expand instructions into libcalls and the call site doesn't
+// care about the libcall chain. Call site cares about data flow only, and the
+// latest data flow node happens to be before callseq_end. Therefore the node
+// becomes dangling and "dead". The ProxyReg acts like an additional data flow
+// node *after* the callseq_end in the chain and ensures that everything will be
+// preserved.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeNVPTXProxyRegErasurePass(PassRegistry &);
+}
+
+namespace {
+
+struct NVPTXProxyRegErasure : public MachineFunctionPass {
+public:
+ static char ID;
+ NVPTXProxyRegErasure() : MachineFunctionPass(ID) {
+ initializeNVPTXProxyRegErasurePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "NVPTX Proxy Register Instruction Erasure";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ void replaceMachineInstructionUsage(MachineFunction &MF, MachineInstr &MI);
+
+ void replaceRegisterUsage(MachineInstr &Instr, MachineOperand &From,
+ MachineOperand &To);
+};
+
+} // namespace
+
+char NVPTXProxyRegErasure::ID = 0;
+
+INITIALIZE_PASS(NVPTXProxyRegErasure, "nvptx-proxyreg-erasure", "NVPTX ProxyReg Erasure", false, false)
+
+bool NVPTXProxyRegErasure::runOnMachineFunction(MachineFunction &MF) {
+ SmallVector<MachineInstr *, 16> RemoveList;
+
+ for (auto &BB : MF) {
+ for (auto &MI : BB) {
+ switch (MI.getOpcode()) {
+ case NVPTX::ProxyRegI1:
+ case NVPTX::ProxyRegI16:
+ case NVPTX::ProxyRegI32:
+ case NVPTX::ProxyRegI64:
+ case NVPTX::ProxyRegF16:
+ case NVPTX::ProxyRegF16x2:
+ case NVPTX::ProxyRegF32:
+ case NVPTX::ProxyRegF64:
+ replaceMachineInstructionUsage(MF, MI);
+ RemoveList.push_back(&MI);
+ break;
+ }
+ }
+ }
+
+ for (auto *MI : RemoveList) {
+ MI->eraseFromParent();
+ }
+
+ return !RemoveList.empty();
+}
+
+void NVPTXProxyRegErasure::replaceMachineInstructionUsage(MachineFunction &MF,
+ MachineInstr &MI) {
+ auto &InOp = *MI.uses().begin();
+ auto &OutOp = *MI.defs().begin();
+
+ assert(InOp.isReg() && "ProxyReg input operand should be a register.");
+ assert(OutOp.isReg() && "ProxyReg output operand should be a register.");
+
+ for (auto &BB : MF) {
+ for (auto &I : BB) {
+ replaceRegisterUsage(I, OutOp, InOp);
+ }
+ }
+}
+
+void NVPTXProxyRegErasure::replaceRegisterUsage(MachineInstr &Instr,
+ MachineOperand &From,
+ MachineOperand &To) {
+ for (auto &Op : Instr.uses()) {
+ if (Op.isReg() && Op.getReg() == From.getReg()) {
+ Op.setReg(To.getReg());
+ }
+ }
+}
+
+MachineFunctionPass *llvm::createNVPTXProxyRegErasurePass() {
+ return new NVPTXProxyRegErasure();
+}
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp?rev=350069&r1=350068&r2=350069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp Wed Dec 26 11:12:31 2018
@@ -68,6 +68,7 @@ void initializeNVPTXAssignValidGlobalNam
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
void initializeNVPTXLowerArgsPass(PassRegistry &);
void initializeNVPTXLowerAllocaPass(PassRegistry &);
+void initializeNVPTXProxyRegErasurePass(PassRegistry &);
} // end namespace llvm
@@ -87,6 +88,7 @@ extern "C" void LLVMInitializeNVPTXTarge
initializeNVPTXLowerArgsPass(PR);
initializeNVPTXLowerAllocaPass(PR);
initializeNVPTXLowerAggrCopiesPass(PR);
+ initializeNVPTXProxyRegErasurePass(PR);
}
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
@@ -160,6 +162,7 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
+ void addPreRegAlloc() override;
void addPostRegAlloc() override;
void addMachineSSAOptimization() override;
@@ -301,6 +304,11 @@ bool NVPTXPassConfig::addInstSelector()
return false;
}
+void NVPTXPassConfig::addPreRegAlloc() {
+ // Remove Proxy Register pseudo instructions used to keep `callseq_end` alive.
+ addPass(createNVPTXProxyRegErasurePass());
+}
+
void NVPTXPassConfig::addPostRegAlloc() {
addPass(createNVPTXPrologEpilogPass(), false);
if (getOptLevel() != CodeGenOpt::None) {
Added: llvm/trunk/test/CodeGen/NVPTX/calls-with-phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/calls-with-phi.ll?rev=350069&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/calls-with-phi.ll (added)
+++ llvm/trunk/test/CodeGen/NVPTX/calls-with-phi.ll Wed Dec 26 11:12:31 2018
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=nvptx 2>&1 | FileCheck %s
+; Make sure the example doesn't crash with segfault
+
+; CHECK: .visible .func ({{.*}}) loop
+define i32 @loop(i32, i32) {
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ %0, %entry ], [ %res, %loop ]
+ %res = call i32 @div(i32 %i, i32 %1)
+
+ %exitcond = icmp eq i32 %res, %0
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret i32 %res
+}
+
+define i32 @div(i32, i32) {
+ ret i32 0
+}
Added: llvm/trunk/test/CodeGen/NVPTX/libcall-fulfilled.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/libcall-fulfilled.ll?rev=350069&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/libcall-fulfilled.ll (added)
+++ llvm/trunk/test/CodeGen/NVPTX/libcall-fulfilled.ll Wed Dec 26 11:12:31 2018
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=nvptx 2>&1 | FileCheck %s
+; Allow to make libcalls that are defined in the current module
+
+; Underlying libcall declaration
+; CHECK: .visible .func (.param .align 16 .b8 func_retval0[16]) __umodti3
+
+define i128 @remainder(i128, i128) {
+bb0:
+ ; CHECK: { // callseq 0, 0
+ ; CHECK: call.uni (retval0),
+ ; CHECK-NEXT: __umodti3,
+ ; CHECK-NEXT: (
+ ; CHECK-NEXT: param0,
+ ; CHECK-NEXT: param1
+ ; CHECK-NEXT: );
+ ; CHECK-NEXT: ld.param.v2.b64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [retval0+0];
+ ; CHECK-NEXT: } // callseq 0
+ %a = urem i128 %0, %1
+ br label %bb1
+
+bb1:
+ ; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%[[REG0]], %[[REG1]]};
+ ; CHECK-NEXT: ret;
+ ret i128 %a
+}
+
+; Underlying libcall definition
+; CHECK: .visible .func (.param .align 16 .b8 func_retval0[16]) __umodti3(
+define i128 @__umodti3(i128, i128) {
+ ret i128 0
+}
Modified: llvm/trunk/test/CodeGen/NVPTX/libcall-instruction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/libcall-instruction.ll?rev=350069&r1=350068&r2=350069&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/libcall-instruction.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/libcall-instruction.ll Wed Dec 26 11:12:31 2018
@@ -1,7 +1,7 @@
; RUN: not llc < %s -march=nvptx 2>&1 | FileCheck %s
-; used to panic on failed assetion and now fails with a "Cannot select"
+; used to panic on failed assertion and now fails with an "Undefined external symbol"
-; CHECK: LLVM ERROR: Cannot select: {{t28|0x[0-9a-f]+}}: i32 = ExternalSymbol'__umodti3'
+; CHECK: LLVM ERROR: Undefined external symbol "__umodti3"
define hidden i128 @remainder(i128, i128) {
%3 = urem i128 %0, %1
ret i128 %3
Added: llvm/trunk/test/CodeGen/NVPTX/libcall-intrinsic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/libcall-intrinsic.ll?rev=350069&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/libcall-intrinsic.ll (added)
+++ llvm/trunk/test/CodeGen/NVPTX/libcall-intrinsic.ll Wed Dec 26 11:12:31 2018
@@ -0,0 +1,10 @@
+; RUN: not llc < %s -march=nvptx 2>&1 | FileCheck %s
+; used to seqfault and now fails with an "Undefined external symbol"
+
+; CHECK: LLVM ERROR: Undefined external symbol "__powidf2"
+define double @powi(double, i32) {
+ %a = call double @llvm.powi.f64(double %0, i32 %1)
+ ret double %a
+}
+
+declare double @llvm.powi.f64(double, i32) nounwind readnone
Added: llvm/trunk/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll?rev=350069&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll (added)
+++ llvm/trunk/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll Wed Dec 26 11:12:31 2018
@@ -0,0 +1,25 @@
+; RUN: llc -march=nvptx64 -stop-before=nvptx-proxyreg-erasure < %s 2>&1 \
+; RUN: | FileCheck %s --check-prefix=MIR --check-prefix=MIR-BEFORE
+
+; RUN: llc -march=nvptx64 -stop-after=nvptx-proxyreg-erasure < %s 2>&1 \
+; RUN: | FileCheck %s --check-prefix=MIR --check-prefix=MIR-AFTER
+
+; Check ProxyRegErasure pass MIR manipulation.
+
+declare <4 x i32> @callee_vec_i32()
+define <4 x i32> @check_vec_i32() {
+ ; MIR: body:
+ ; MIR-DAG: Callseq_Start {{[0-9]+}}, {{[0-9]+}}
+ ; MIR-DAG: %0:int32regs, %1:int32regs, %2:int32regs, %3:int32regs = LoadParamMemV4I32 0
+ ; MIR-DAG: Callseq_End {{[0-9]+}}
+
+ ; MIR-BEFORE-DAG: %4:int32regs = ProxyRegI32 killed %0
+ ; MIR-BEFORE-DAG: %5:int32regs = ProxyRegI32 killed %1
+ ; MIR-BEFORE-DAG: %6:int32regs = ProxyRegI32 killed %2
+ ; MIR-BEFORE-DAG: %7:int32regs = ProxyRegI32 killed %3
+ ; MIR-BEFORE-DAG: StoreRetvalV4I32 killed %4, killed %5, killed %6, killed %7, 0
+ ; MIR-AFTER-DAG: StoreRetvalV4I32 killed %0, killed %1, killed %2, killed %3, 0
+
+ %ret = call <4 x i32> @callee_vec_i32()
+ ret <4 x i32> %ret
+}
Added: llvm/trunk/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll?rev=350069&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll (added)
+++ llvm/trunk/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll Wed Dec 26 11:12:31 2018
@@ -0,0 +1,183 @@
+; RUN: llc -march=nvptx64 -stop-before=nvptx-proxyreg-erasure < %s 2>&1 \
+; RUN: | llc -x mir -march=nvptx64 -start-before=nvptx-proxyreg-erasure 2>&1 \
+; RUN: | FileCheck %s --check-prefix=PTX --check-prefix=PTX-WITH
+
+; RUN: llc -march=nvptx64 -stop-before=nvptx-proxyreg-erasure < %s 2>&1 \
+; RUN: | llc -x mir -march=nvptx64 -start-after=nvptx-proxyreg-erasure 2>&1 \
+; RUN: | FileCheck %s --check-prefix=PTX --check-prefix=PTX-WITHOUT
+
+; Thorough testing of ProxyRegErasure: PTX assembly with and without the pass.
+
+declare i1 @callee_i1()
+define i1 @check_i1() {
+ ; PTX-LABEL: check_i1
+ ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
+ ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0];
+ ; PTX-DAG: } // callseq {{[0-9]+}}
+
+ ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]];
+ ; PTX-WITHOUT-DAG: and.b32 [[RES:%r[0-9]+]], [[PROXY]], 1;
+ ; PTX-WITH-DAG: and.b32 [[RES:%r[0-9]+]], [[LD]], 1;
+
+ ; PTX-DAG: st.param.b32 [func_retval0+0], [[RES]];
+
+ %ret = call i1 @callee_i1()
+ ret i1 %ret
+}
+
+declare i16 @callee_i16()
+define i16 @check_i16() {
+ ; PTX-LABEL: check_i16
+ ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
+ ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0];
+ ; PTX-DAG: } // callseq {{[0-9]+}}
+
+ ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]];
+ ; PTX-WITHOUT-DAG: and.b32 [[RES:%r[0-9]+]], [[PROXY]], 65535;
+ ; PTX-WITH-DAG: and.b32 [[RES:%r[0-9]+]], [[LD]], 65535;
+
+ ; PTX-DAG: st.param.b32 [func_retval0+0], [[RES]];
+
+ %ret = call i16 @callee_i16()
+ ret i16 %ret
+}
+
+declare i32 @callee_i32()
+define i32 @check_i32() {
+ ; PTX-LABEL: check_i32
+ ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
+ ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0];
+ ; PTX-DAG: } // callseq {{[0-9]+}}
+
+ ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]];
+ ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0+0], [[PROXY]];
+ ; PTX-WITH-DAG: st.param.b32 [func_retval0+0], [[LD]];
+
+ %ret = call i32 @callee_i32()
+ ret i32 %ret
+}
+
+declare i64 @callee_i64()
+define i64 @check_i64() {
+ ; PTX-LABEL: check_i64
+ ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
+ ; PTX-DAG: ld.param.b64 [[LD:%rd[0-9]+]], [retval0+0];
+ ; PTX-DAG: } // callseq {{[0-9]+}}
+
+ ; PTX-WITHOUT-DAG: mov.b64 [[PROXY:%rd[0-9]+]], [[LD]];
+ ; PTX-WITHOUT-DAG: st.param.b64 [func_retval0+0], [[PROXY]];
+ ; PTX-WITH-DAG: st.param.b64 [func_retval0+0], [[LD]];
+
+ %ret = call i64 @callee_i64()
+ ret i64 %ret
+}
+
+declare i128 @callee_i128()
+define i128 @check_i128() {
+ ; PTX-LABEL: check_i128
+ ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
+ ; PTX-DAG: ld.param.v2.b64 {[[LD0:%rd[0-9]+]], [[LD1:%rd[0-9]+]]}, [retval0+0];
+ ; PTX-DAG: } // callseq {{[0-9]+}}
+
+ ; PTX-WITHOUT-DAG: mov.b64 [[PROXY0:%rd[0-9]+]], [[LD0]];
+ ; PTX-WITHOUT-DAG: mov.b64 [[PROXY1:%rd[0-9]+]], [[LD1]];
+ ; PTX-WITHOUT-DAG: st.param.v2.b64 [func_retval0+0], {[[PROXY0]], [[PROXY1]]};
+ ; PTX-WITH-DAG: st.param.v2.b64 [func_retval0+0], {[[LD0]], [[LD1]]};
+
+ %ret = call i128 @callee_i128()
+ ret i128 %ret
+}
+
+declare half @callee_f16()
+define half @check_f16() {
+ ; PTX-LABEL: check_f16
+ ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
+ ; PTX-DAG: ld.param.b16 [[LD:%h[0-9]+]], [retval0+0];
+ ; PTX-DAG: } // callseq {{[0-9]+}}
+
+ ; PTX-WITHOUT-DAG: mov.b16 [[PROXY:%h[0-9]+]], [[LD]];
+ ; PTX-WITHOUT-DAG: st.param.b16 [func_retval0+0], [[PROXY]];
+ ; PTX-WITH-DAG: st.param.b16 [func_retval0+0], [[LD]];
+
+ %ret = call half @callee_f16()
+ ret half %ret
+}
+
+declare float @callee_f32()
+define float @check_f32() {
+ ; PTX-LABEL: check_f32
+ ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
+ ; PTX-DAG: ld.param.f32 [[LD:%f[0-9]+]], [retval0+0];
+ ; PTX-DAG: } // callseq {{[0-9]+}}
+
+ ; PTX-WITHOUT-DAG: mov.f32 [[PROXY:%f[0-9]+]], [[LD]];
+ ; PTX-WITHOUT-DAG: st.param.f32 [func_retval0+0], [[PROXY]];
+ ; PTX-WITH-DAG: st.param.f32 [func_retval0+0], [[LD]];
+
+ %ret = call float @callee_f32()
+ ret float %ret
+}
+
+declare double @callee_f64()
+define double @check_f64() {
+ ; PTX-LABEL: check_f64
+ ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
+ ; PTX-DAG: ld.param.f64 [[LD:%fd[0-9]+]], [retval0+0];
+ ; PTX-DAG: } // callseq {{[0-9]+}}
+
+ ; PTX-WITHOUT-DAG: mov.f64 [[PROXY:%fd[0-9]+]], [[LD]];
+ ; PTX-WITHOUT-DAG: st.param.f64 [func_retval0+0], [[PROXY]];
+ ; PTX-WITH-DAG: st.param.f64 [func_retval0+0], [[LD]];
+
+ %ret = call double @callee_f64()
+ ret double %ret
+}
+
+declare <4 x i32> @callee_vec_i32()
+define <4 x i32> @check_vec_i32() {
+ ; PTX-LABEL: check_vec_i32
+ ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
+ ; PTX-DAG: ld.param.v4.b32 {[[LD0:%r[0-9]+]], [[LD1:%r[0-9]+]], [[LD2:%r[0-9]+]], [[LD3:%r[0-9]+]]}, [retval0+0];
+ ; PTX-DAG: } // callseq {{[0-9]+}}
+
+ ; PTX-WITHOUT-DAG: mov.b32 [[PROXY0:%r[0-9]+]], [[LD0]];
+ ; PTX-WITHOUT-DAG: mov.b32 [[PROXY1:%r[0-9]+]], [[LD1]];
+ ; PTX-WITHOUT-DAG: mov.b32 [[PROXY2:%r[0-9]+]], [[LD2]];
+ ; PTX-WITHOUT-DAG: mov.b32 [[PROXY3:%r[0-9]+]], [[LD3]];
+ ; PTX-WITHOUT-DAG: st.param.v4.b32 [func_retval0+0], {[[PROXY0]], [[PROXY1]], [[PROXY2]], [[PROXY3]]};
+ ; PTX-WITH-DAG: st.param.v4.b32 [func_retval0+0], {[[LD0]], [[LD1]], [[LD2]], [[LD3]]};
+
+ %ret = call <4 x i32> @callee_vec_i32()
+ ret <4 x i32> %ret
+}
+
+declare <2 x half> @callee_vec_f16()
+define <2 x half> @check_vec_f16() {
+ ; PTX-LABEL: check_vec_f16
+ ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
+ ; PTX-DAG: ld.param.b32 [[LD:%hh[0-9]+]], [retval0+0];
+ ; PTX-DAG: } // callseq {{[0-9]+}}
+
+ ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%hh[0-9]+]], [[LD]];
+ ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0+0], [[PROXY]];
+ ; PTX-WITH-DAG: st.param.b32 [func_retval0+0], [[LD]];
+
+ %ret = call <2 x half> @callee_vec_f16()
+ ret <2 x half> %ret
+}
+
+declare <2 x double> @callee_vec_f64()
+define <2 x double> @check_vec_f64() {
+ ; PTX-LABEL: check_vec_f64
+ ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}}
+ ; PTX-DAG: ld.param.v2.f64 {[[LD0:%fd[0-9]+]], [[LD1:%fd[0-9]+]]}, [retval0+0];
+ ; PTX-DAG: } // callseq {{[0-9]+}}
+
+ ; PTX-WITHOUT-DAG: mov.f64 [[PROXY0:%fd[0-9]+]], [[LD0]];
+ ; PTX-WITHOUT-DAG: mov.f64 [[PROXY1:%fd[0-9]+]], [[LD1]];
+ ; PTX-WITHOUT-DAG: st.param.v2.f64 [func_retval0+0], {[[PROXY0]], [[PROXY1]]};
+ ; PTX-WITH-DAG: st.param.v2.f64 [func_retval0+0], {[[LD0]], [[LD1]]};
+
+ %ret = call <2 x double> @callee_vec_f64()
+ ret <2 x double> %ret
+}
Removed: llvm/trunk/test/CodeGen/NVPTX/zero-cs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/zero-cs.ll?rev=350068&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/zero-cs.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/zero-cs.ll (removed)
@@ -1,10 +0,0 @@
-; RUN: not llc < %s -march=nvptx 2>&1 | FileCheck %s
-; used to seqfault and now fails with a "Cannot select"
-
-; CHECK: LLVM ERROR: Cannot select: {{t7|0x[0-9a-f]+}}: i32 = ExternalSymbol'__powidf2'
-define double @powi() {
- %1 = call double @llvm.powi.f64(double 1.000000e+00, i32 undef)
- ret double %1
-}
-
-declare double @llvm.powi.f64(double, i32) nounwind readnone
More information about the llvm-commits
mailing list