[llvm-commits] [llvm] r140309 - in /llvm/trunk/lib/Target/PTX: PTXAsmPrinter.cpp PTXISelDAGToDAG.cpp PTXISelLowering.cpp PTXISelLowering.h PTXInstrInfo.cpp PTXInstrInfo.td PTXMFInfoExtract.cpp PTXMachineFunctionInfo.h
Justin Holewinski
justin.holewinski at gmail.com
Thu Sep 22 09:45:46 PDT 2011
Author: jholewinski
Date: Thu Sep 22 11:45:46 2011
New Revision: 140309
URL: http://llvm.org/viewvc/llvm-project?rev=140309&view=rev
Log:
PTX: Use .param space for device function return values on SM 2.0+, and attempt
to fix up parameter passing on SM < 2.0
Modified:
llvm/trunk/lib/Target/PTX/PTXAsmPrinter.cpp
llvm/trunk/lib/Target/PTX/PTXISelDAGToDAG.cpp
llvm/trunk/lib/Target/PTX/PTXISelLowering.cpp
llvm/trunk/lib/Target/PTX/PTXISelLowering.h
llvm/trunk/lib/Target/PTX/PTXInstrInfo.cpp
llvm/trunk/lib/Target/PTX/PTXInstrInfo.td
llvm/trunk/lib/Target/PTX/PTXMFInfoExtract.cpp
llvm/trunk/lib/Target/PTX/PTXMachineFunctionInfo.h
Modified: llvm/trunk/lib/Target/PTX/PTXAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXAsmPrinter.cpp?rev=140309&r1=140308&r2=140309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/PTX/PTXAsmPrinter.cpp Thu Sep 22 11:45:46 2011
@@ -91,9 +91,13 @@
static const char PARAM_PREFIX[] = "__param_";
static const char RETURN_PREFIX[] = "__ret_";
-static const char *getRegisterTypeName(unsigned RegNo) {
-#define TEST_REGCLS(cls, clsstr) \
- if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
+static const char *getRegisterTypeName(unsigned RegNo,
+ const MachineRegisterInfo& MRI) {
+ const TargetRegisterClass *TRC = MRI.getRegClass(RegNo);
+
+#define TEST_REGCLS(cls, clsstr) \
+ if (PTX::cls ## RegisterClass == TRC) return # clsstr;
+
TEST_REGCLS(RegPred, pred);
TEST_REGCLS(RegI16, b16);
TEST_REGCLS(RegI32, b32);
@@ -288,18 +292,18 @@
}
}
- unsigned Index = 1;
+ //unsigned Index = 1;
// Print parameter passing params
- for (PTXMachineFunctionInfo::param_iterator
- i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) {
- std::string def = "\t.param .b";
- def += utostr(*i);
- def += " __ret_";
- def += utostr(Index);
- Index++;
- def += ";";
- OutStreamer.EmitRawText(Twine(def));
- }
+ //for (PTXMachineFunctionInfo::param_iterator
+ // i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) {
+ // std::string def = "\t.param .b";
+ // def += utostr(*i);
+ // def += " __ret_";
+ // def += utostr(Index);
+ // Index++;
+ // def += ";";
+ // OutStreamer.EmitRawText(Twine(def));
+ //}
}
void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
@@ -436,7 +440,8 @@
void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum,
raw_ostream &OS, const char *Modifier) {
- OS << RETURN_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+ //OS << RETURN_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+ OS << "__ret";
}
void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
@@ -559,6 +564,7 @@
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
const bool isKernel = MFI->isKernel();
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+ const MachineRegisterInfo& MRI = MF->getRegInfo();
std::string decl = isKernel ? ".entry" : ".func";
@@ -566,16 +572,22 @@
if (!isKernel) {
decl += " (";
- for (PTXMachineFunctionInfo::ret_iterator
- i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i;
- i != e; ++i) {
- if (i != b) {
- decl += ", ";
+ if (ST.useParamSpaceForDeviceArgs() && MFI->getRetParamSize() != 0) {
+ decl += ".param .b";
+ decl += utostr(MFI->getRetParamSize());
+ decl += " __ret";
+ } else {
+ for (PTXMachineFunctionInfo::ret_iterator
+ i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i;
+ i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+ decl += ".reg .";
+ decl += getRegisterTypeName(*i, MRI);
+ decl += " ";
+ decl += MFI->getRegisterName(*i);
}
- decl += ".reg .";
- decl += getRegisterTypeName(*i);
- decl += " ";
- decl += getRegisterName(*i);
}
decl += ")";
}
@@ -589,23 +601,32 @@
cnt = 0;
// Print parameters
- for (PTXMachineFunctionInfo::reg_iterator
- i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
- i != e; ++i) {
- if (i != b) {
- decl += ", ";
- }
- if (isKernel || ST.useParamSpaceForDeviceArgs()) {
+ if (isKernel || ST.useParamSpaceForDeviceArgs()) {
+ for (PTXMachineFunctionInfo::argparam_iterator
+ i = MFI->argParamBegin(), e = MFI->argParamEnd(), b = i;
+ i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+
decl += ".param .b";
decl += utostr(*i);
decl += " ";
decl += PARAM_PREFIX;
decl += utostr(++cnt);
- } else {
+ }
+ } else {
+ for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
+ i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+
decl += ".reg .";
- decl += getRegisterTypeName(*i);
+ decl += getRegisterTypeName(*i, MRI);
decl += " ";
- decl += getRegisterName(*i);
+ decl += MFI->getRegisterName(*i);
}
}
decl += ")";
Modified: llvm/trunk/lib/Target/PTX/PTXISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXISelDAGToDAG.cpp?rev=140309&r1=140308&r2=140309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/PTX/PTXISelDAGToDAG.cpp Thu Sep 22 11:45:46 2011
@@ -46,6 +46,9 @@
// pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td
SDNode *SelectBRCOND(SDNode *Node);
+ SDNode *SelectREADPARAM(SDNode *Node);
+ SDNode *SelectWRITEPARAM(SDNode *Node);
+
bool isImm(const SDValue &operand);
bool SelectImm(const SDValue &operand, SDValue &imm);
@@ -68,6 +71,10 @@
switch (Node->getOpcode()) {
case ISD::BRCOND:
return SelectBRCOND(Node);
+ case PTXISD::READ_PARAM:
+ return SelectREADPARAM(Node);
+ case PTXISD::WRITE_PARAM:
+ return SelectWRITEPARAM(Node);
default:
return SelectCode(Node);
}
@@ -90,6 +97,82 @@
return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4);
}
+SDNode *PTXDAGToDAGISel::SelectREADPARAM(SDNode *Node) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Index = Node->getOperand(1);
+
+ int OpCode;
+
+ // Get the type of parameter we are reading
+ EVT VT = Node->getValueType(0);
+ assert(VT.isSimple() && "READ_PARAM only implemented for MVT types");
+
+ MVT Type = VT.getSimpleVT();
+
+ if (Type == MVT::i1)
+ OpCode = PTX::READPARAMPRED;
+ else if (Type == MVT::i16)
+ OpCode = PTX::READPARAMI16;
+ else if (Type == MVT::i32)
+ OpCode = PTX::READPARAMI32;
+ else if (Type == MVT::i64)
+ OpCode = PTX::READPARAMI64;
+ else if (Type == MVT::f32)
+ OpCode = PTX::READPARAMF32;
+ else if (Type == MVT::f64)
+ OpCode = PTX::READPARAMF64;
+
+ SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue PredOp = CurDAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ DebugLoc dl = Node->getDebugLoc();
+
+ SDValue Ops[] = { Index, Pred, PredOp, Chain };
+ return CurDAG->getMachineNode(OpCode, dl, VT, Ops, 4);
+}
+
+SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) {
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue Value = Node->getOperand(1);
+
+ int OpCode;
+
+ //Node->dumpr(CurDAG);
+
+ // Get the type of parameter we are writing
+ EVT VT = Value->getValueType(0);
+ assert(VT.isSimple() && "WRITE_PARAM only implemented for MVT types");
+
+ MVT Type = VT.getSimpleVT();
+
+ if (Type == MVT::i1)
+ OpCode = PTX::WRITEPARAMPRED;
+ else if (Type == MVT::i16)
+ OpCode = PTX::WRITEPARAMI16;
+ else if (Type == MVT::i32)
+ OpCode = PTX::WRITEPARAMI32;
+ else if (Type == MVT::i64)
+ OpCode = PTX::WRITEPARAMI64;
+ else if (Type == MVT::f32)
+ OpCode = PTX::WRITEPARAMF32;
+ else if (Type == MVT::f64)
+ OpCode = PTX::WRITEPARAMF64;
+ else
+ llvm_unreachable("Invalid type in SelectWRITEPARAM");
+
+ SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue PredOp = CurDAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ DebugLoc dl = Node->getDebugLoc();
+
+ SDValue Ops[] = { Value, Pred, PredOp, Chain };
+ SDNode* Ret = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops, 4);
+
+ //dbgs() << "SelectWRITEPARAM produced:\n\t";
+ //Ret->dumpr(CurDAG);
+
+ return Ret;
+}
+
// Match memory operand of the form [reg+reg]
bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 ||
Modified: llvm/trunk/lib/Target/PTX/PTXISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXISelLowering.cpp?rev=140309&r1=140308&r2=140309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PTX/PTXISelLowering.cpp Thu Sep 22 11:45:46 2011
@@ -132,6 +132,10 @@
return "PTXISD::LOAD_PARAM";
case PTXISD::STORE_PARAM:
return "PTXISD::STORE_PARAM";
+ case PTXISD::READ_PARAM:
+ return "PTXISD::READ_PARAM";
+ case PTXISD::WRITE_PARAM:
+ return "PTXISD::WRITE_PARAM";
case PTXISD::EXIT:
return "PTXISD::EXIT";
case PTXISD::RET:
@@ -220,7 +224,6 @@
if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) {
// We just need to emit the proper LOAD_PARAM ISDs
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
-
assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) &&
"Kernels cannot take pred operands");
@@ -231,57 +234,71 @@
// Instead of storing a physical register in our argument list, we just
// store the total size of the parameter, in bits. The ASM printer
// knows how to process this.
- MFI->addArgReg(Ins[i].VT.getStoreSizeInBits());
+ MFI->addArgParam(Ins[i].VT.getStoreSizeInBits());
}
}
else {
// For device functions, we use the PTX calling convention to do register
// assignments then create CopyFromReg ISDs for the allocated registers
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs,
- *DAG.getContext());
+ //SmallVector<CCValAssign, 16> ArgLocs;
+ //CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs,
+ // *DAG.getContext());
- CCInfo.AnalyzeFormalArguments(Ins, CC_PTX);
+ //CCInfo.AnalyzeFormalArguments(Ins, CC_PTX);
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ //for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- CCValAssign& VA = ArgLocs[i];
- EVT RegVT = VA.getLocVT();
+ EVT RegVT = Ins[i].VT;
TargetRegisterClass* TRC = 0;
+ int OpCode;
- assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
+ //assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
// Determine which register class we need
if (RegVT == MVT::i1) {
TRC = PTX::RegPredRegisterClass;
+ OpCode = PTX::READPARAMPRED;
}
else if (RegVT == MVT::i16) {
TRC = PTX::RegI16RegisterClass;
+ OpCode = PTX::READPARAMI16;
}
else if (RegVT == MVT::i32) {
TRC = PTX::RegI32RegisterClass;
+ OpCode = PTX::READPARAMI32;
}
else if (RegVT == MVT::i64) {
TRC = PTX::RegI64RegisterClass;
+ OpCode = PTX::READPARAMI64;
}
else if (RegVT == MVT::f32) {
TRC = PTX::RegF32RegisterClass;
+ OpCode = PTX::READPARAMF32;
}
else if (RegVT == MVT::f64) {
TRC = PTX::RegF64RegisterClass;
+ OpCode = PTX::READPARAMF64;
}
else {
llvm_unreachable("Unknown parameter type");
}
+ // Use a unique index in the instruction to prevent instruction folding.
+ // Yes, this is a hack.
+ SDValue Index = DAG.getTargetConstant(i, MVT::i32);
unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
- MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
+ SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, RegVT, Chain,
+ Index);
+
+ SDValue Flag = ArgValue.getValue(1);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+ SDValue Copy = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+ SDValue RegValue = DAG.getRegister(Reg, RegVT);
InVals.push_back(ArgValue);
- MFI->addArgReg(VA.getLocReg());
+ MFI->addArgReg(Reg);
}
}
@@ -305,7 +322,7 @@
assert(Outs.size() == 0 && "Kernel must return void.");
return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
case CallingConv::PTX_Device:
- //assert(Outs.size() <= 1 && "Can at most return one value.");
+ assert(Outs.size() <= 1 && "Can at most return one value.");
break;
}
@@ -318,28 +335,84 @@
// device functions if SM >= 2.0 and the number of return arguments is
// only 1, we just always use registers since this makes the codegen
// easier.
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
- CCInfo.AnalyzeReturn(Outs, RetCC_PTX);
+ const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
- for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
- CCValAssign& VA = RVLocs[i];
+ if (ST.useParamSpaceForDeviceArgs()) {
+ assert(Outs.size() < 2 && "Device functions can return at most one value");
- assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
+ if (Outs.size() == 1) {
+ unsigned Size = OutVals[0].getValueType().getSizeInBits();
+ SDValue Index = DAG.getTargetConstant(MFI->getNextParam(Size), MVT::i32);
+ Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
+ Index, OutVals[0]);
- unsigned Reg = VA.getLocReg();
+ //Flag = Chain.getValue(1);
+ MFI->setRetParamSize(Outs[0].VT.getStoreSizeInBits());
+ }
+ } else {
+ //SmallVector<CCValAssign, 16> RVLocs;
+ //CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ //getTargetMachine(), RVLocs, *DAG.getContext());
- DAG.getMachineFunction().getRegInfo().addLiveOut(Reg);
+ //CCInfo.AnalyzeReturn(Outs, RetCC_PTX);
- Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag);
+ //for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ //CCValAssign& VA = RVLocs[i];
- // Guarantee that all emitted copies are stuck together,
- // avoiding something bad
- Flag = Chain.getValue(1);
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- MFI->addRetReg(Reg);
+ //assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
+
+ //unsigned Reg = VA.getLocReg();
+
+ EVT RegVT = Outs[i].VT;
+ TargetRegisterClass* TRC = 0;
+
+ // Determine which register class we need
+ if (RegVT == MVT::i1) {
+ TRC = PTX::RegPredRegisterClass;
+ }
+ else if (RegVT == MVT::i16) {
+ TRC = PTX::RegI16RegisterClass;
+ }
+ else if (RegVT == MVT::i32) {
+ TRC = PTX::RegI32RegisterClass;
+ }
+ else if (RegVT == MVT::i64) {
+ TRC = PTX::RegI64RegisterClass;
+ }
+ else if (RegVT == MVT::f32) {
+ TRC = PTX::RegF32RegisterClass;
+ }
+ else if (RegVT == MVT::f64) {
+ TRC = PTX::RegF64RegisterClass;
+ }
+ else {
+ llvm_unreachable("Unknown parameter type");
+ }
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
+
+ //DAG.getMachineFunction().getRegInfo().addLiveOut(Reg);
+
+ //Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag);
+ //SDValue Copy = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i]/*, Flag*/);
+
+ // Guarantee that all emitted copies are stuck together,
+ // avoiding something bad
+ //Flag = Chain.getValue(1);
+
+ SDValue Copy = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i]/*, Flag*/);
+ SDValue OutReg = DAG.getRegister(Reg, RegVT);
+
+ Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg);
+ //Flag = Chain.getValue(1);
+
+ MFI->addRetReg(Reg);
+
+ //MFI->addRetReg(Reg);
+ }
}
if (Flag.getNode() == 0) {
Modified: llvm/trunk/lib/Target/PTX/PTXISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXISelLowering.h?rev=140309&r1=140308&r2=140309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXISelLowering.h (original)
+++ llvm/trunk/lib/Target/PTX/PTXISelLowering.h Thu Sep 22 11:45:46 2011
@@ -26,6 +26,8 @@
FIRST_NUMBER = ISD::BUILTIN_OP_END,
LOAD_PARAM,
STORE_PARAM,
+ READ_PARAM,
+ WRITE_PARAM,
EXIT,
RET,
COPY_ADDRESS,
Modified: llvm/trunk/lib/Target/PTX/PTXInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXInstrInfo.cpp?rev=140309&r1=140308&r2=140309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/PTX/PTXInstrInfo.cpp Thu Sep 22 11:45:46 2011
@@ -50,11 +50,11 @@
bool KillSrc) const {
const MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo();
- assert(MRI.getRegClass(SrcReg) == MRI.getRegClass(DstReg) &&
- "Invalid register copy between two register classes");
+ //assert(MRI.getRegClass(SrcReg) == MRI.getRegClass(DstReg) &&
+ // "Invalid register copy between two register classes");
for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++i) {
- if (map[i].cls == MRI.getRegClass(SrcReg)) {
+ if (map[i].cls == MRI.getRegClass(DstReg)) {
const MCInstrDesc &MCID = get(map[i].opcode);
MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).
addReg(SrcReg, getKillRegState(KillSrc));
Modified: llvm/trunk/lib/Target/PTX/PTXInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXInstrInfo.td?rev=140309&r1=140308&r2=140309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXInstrInfo.td (original)
+++ llvm/trunk/lib/Target/PTX/PTXInstrInfo.td Thu Sep 22 11:45:46 2011
@@ -209,6 +209,13 @@
: SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+def PTXreadparam
+ : SDNode<"PTXISD::READ_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+def PTXwriteparam
+ : SDNode<"PTXISD::WRITE_PARAM", SDTypeProfile<0, 1, []>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
//===----------------------------------------------------------------------===//
// Instruction Class Templates
//===----------------------------------------------------------------------===//
@@ -617,7 +624,7 @@
// SM_13+ defaults to .rn for f32 and f64,
// SM10 must *not* provide a rounding
-// TODO:
+// TODO:
// - Allow user selection of rounding modes for fdiv
// - Add support for -prec-div=false (.approx)
@@ -1045,7 +1052,7 @@
// Conversion to f64
def CVT_f64_pred
- : InstPTX<(outs RegF64:$d), (ins RegPred:$a),
+ : InstPTX<(outs RegF64:$d), (ins RegPred:$a),
"selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a", // 1.0
[(set RegF64:$d, (uint_to_fp RegPred:$a))]>;
@@ -1114,6 +1121,27 @@
def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a),
"mov.f64\t$d, s$a", []>;
+///===- Parameter Passing Pseudo-Instructions -----------------------------===//
+
+def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b),
+ "mov.pred\t$a, %param$b", []>;
+def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b),
+ "mov.b16\t$a, %param$b", []>;
+def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b),
+ "mov.b32\t$a, %param$b", []>;
+def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b),
+ "mov.b64\t$a, %param$b", []>;
+def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b),
+ "mov.f32\t$a, %param$b", []>;
+def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b),
+ "mov.f64\t$a, %param$b", []>;
+
+def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>;
+def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>;
+def WRITEPARAMI32 : InstPTX<(outs), (ins RegI32:$a), "//w", []>;
+def WRITEPARAMI64 : InstPTX<(outs), (ins RegI64:$a), "//w", []>;
+def WRITEPARAMF32 : InstPTX<(outs), (ins RegF32:$a), "//w", []>;
+def WRITEPARAMF64 : InstPTX<(outs), (ins RegF64:$a), "//w", []>;
// Call handling
// def ADJCALLSTACKUP :
Modified: llvm/trunk/lib/Target/PTX/PTXMFInfoExtract.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXMFInfoExtract.cpp?rev=140309&r1=140308&r2=140309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXMFInfoExtract.cpp (original)
+++ llvm/trunk/lib/Target/PTX/PTXMFInfoExtract.cpp Thu Sep 22 11:45:46 2011
@@ -66,7 +66,7 @@
// FIXME: This is a slow linear scanning
for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg)
if (MRI.isPhysRegUsed(reg) &&
- !MFI->isRetReg(reg) &&
+ //!MFI->isRetReg(reg) &&
(MFI->isKernel() || !MFI->isArgReg(reg)))
MFI->addLocalVarReg(reg);
Modified: llvm/trunk/lib/Target/PTX/PTXMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXMachineFunctionInfo.h?rev=140309&r1=140308&r2=140309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXMachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/PTX/PTXMachineFunctionInfo.h Thu Sep 22 11:45:46 2011
@@ -20,16 +20,20 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
namespace llvm {
+
/// PTXMachineFunctionInfo - This class is derived from MachineFunction and
/// contains private PTX target-specific information for each MachineFunction.
///
class PTXMachineFunctionInfo : public MachineFunctionInfo {
private:
bool is_kernel;
- std::vector<unsigned> reg_arg, reg_local_var;
- std::vector<unsigned> reg_ret;
+ DenseSet<unsigned> reg_local_var;
+ DenseSet<unsigned> reg_arg;
+ DenseSet<unsigned> reg_ret;
std::vector<unsigned> call_params;
bool _isDoneAddArg;
@@ -40,29 +44,28 @@
RegisterMap usedRegs;
RegisterNameMap regNames;
+ SmallVector<unsigned, 8> argParams;
+
+ unsigned retParamSize;
+
public:
PTXMachineFunctionInfo(MachineFunction &MF)
: is_kernel(false), reg_ret(PTX::NoRegister), _isDoneAddArg(false) {
- reg_arg.reserve(8);
- reg_local_var.reserve(32);
-
usedRegs[PTX::RegPredRegisterClass] = RegisterList();
usedRegs[PTX::RegI16RegisterClass] = RegisterList();
usedRegs[PTX::RegI32RegisterClass] = RegisterList();
usedRegs[PTX::RegI64RegisterClass] = RegisterList();
usedRegs[PTX::RegF32RegisterClass] = RegisterList();
usedRegs[PTX::RegF64RegisterClass] = RegisterList();
+
+ retParamSize = 0;
}
void setKernel(bool _is_kernel=true) { is_kernel = _is_kernel; }
- void addArgReg(unsigned reg) { reg_arg.push_back(reg); }
- void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); }
- void addRetReg(unsigned reg) {
- if (!isRetReg(reg)) {
- reg_ret.push_back(reg);
- }
- }
+
+ void addLocalVarReg(unsigned reg) { reg_local_var.insert(reg); }
+
void doneAddArg(void) {
_isDoneAddArg = true;
@@ -71,17 +74,20 @@
bool isKernel() const { return is_kernel; }
- typedef std::vector<unsigned>::const_iterator reg_iterator;
- typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator;
- typedef std::vector<unsigned>::const_iterator ret_iterator;
+ typedef DenseSet<unsigned>::const_iterator reg_iterator;
+ //typedef DenseSet<unsigned>::const_reverse_iterator reg_reverse_iterator;
+ typedef DenseSet<unsigned>::const_iterator ret_iterator;
typedef std::vector<unsigned>::const_iterator param_iterator;
+ typedef SmallVector<unsigned, 8>::const_iterator argparam_iterator;
bool argRegEmpty() const { return reg_arg.empty(); }
int getNumArg() const { return reg_arg.size(); }
reg_iterator argRegBegin() const { return reg_arg.begin(); }
reg_iterator argRegEnd() const { return reg_arg.end(); }
- reg_reverse_iterator argRegReverseBegin() const { return reg_arg.rbegin(); }
- reg_reverse_iterator argRegReverseEnd() const { return reg_arg.rend(); }
+ argparam_iterator argParamBegin() const { return argParams.begin(); }
+ argparam_iterator argParamEnd() const { return argParams.end(); }
+ //reg_reverse_iterator argRegReverseBegin() const { return reg_arg.rbegin(); }
+ //reg_reverse_iterator argRegReverseEnd() const { return reg_arg.rend(); }
bool localVarRegEmpty() const { return reg_local_var.empty(); }
reg_iterator localVarRegBegin() const { return reg_local_var.begin(); }
@@ -103,42 +109,75 @@
return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end();
}
- bool isRetReg(unsigned reg) const {
+ /*bool isRetReg(unsigned reg) const {
return std::find(reg_ret.begin(), reg_ret.end(), reg) != reg_ret.end();
- }
+ }*/
bool isLocalVarReg(unsigned reg) const {
return std::find(reg_local_var.begin(), reg_local_var.end(), reg)
!= reg_local_var.end();
}
- void addVirtualRegister(const TargetRegisterClass *TRC, unsigned Reg) {
- usedRegs[TRC].push_back(Reg);
+ void addRetReg(unsigned Reg) {
+ if (!reg_ret.count(Reg)) {
+ reg_ret.insert(Reg);
+ std::string name;
+ name = "%ret";
+ name += utostr(reg_ret.size() - 1);
+ regNames[Reg] = name;
+ }
+ }
- std::string name;
+ void setRetParamSize(unsigned SizeInBits) {
+ retParamSize = SizeInBits;
+ }
- if (TRC == PTX::RegPredRegisterClass)
- name = "%p";
- else if (TRC == PTX::RegI16RegisterClass)
- name = "%rh";
- else if (TRC == PTX::RegI32RegisterClass)
- name = "%r";
- else if (TRC == PTX::RegI64RegisterClass)
- name = "%rd";
- else if (TRC == PTX::RegF32RegisterClass)
- name = "%f";
- else if (TRC == PTX::RegF64RegisterClass)
- name = "%fd";
- else
- llvm_unreachable("Invalid register class");
+ unsigned getRetParamSize() const {
+ return retParamSize;
+ }
- name += utostr(usedRegs[TRC].size() - 1);
+ void addArgReg(unsigned Reg) {
+ reg_arg.insert(Reg);
+ std::string name;
+ name = "%param";
+ name += utostr(reg_arg.size() - 1);
regNames[Reg] = name;
}
+ void addArgParam(unsigned SizeInBits) {
+ argParams.push_back(SizeInBits);
+ }
+
+ void addVirtualRegister(const TargetRegisterClass *TRC, unsigned Reg) {
+ std::string name;
+
+ if (!reg_ret.count(Reg) && !reg_arg.count(Reg)) {
+ usedRegs[TRC].push_back(Reg);
+ if (TRC == PTX::RegPredRegisterClass)
+ name = "%p";
+ else if (TRC == PTX::RegI16RegisterClass)
+ name = "%rh";
+ else if (TRC == PTX::RegI32RegisterClass)
+ name = "%r";
+ else if (TRC == PTX::RegI64RegisterClass)
+ name = "%rd";
+ else if (TRC == PTX::RegF32RegisterClass)
+ name = "%f";
+ else if (TRC == PTX::RegF64RegisterClass)
+ name = "%fd";
+ else
+ llvm_unreachable("Invalid register class");
+
+ name += utostr(usedRegs[TRC].size() - 1);
+ regNames[Reg] = name;
+ }
+ }
+
std::string getRegisterName(unsigned Reg) const {
if (regNames.count(Reg))
return regNames.lookup(Reg);
+ else if (Reg == PTX::NoRegister)
+ return "%noreg";
else
llvm_unreachable("Register not in register name map");
}
More information about the llvm-commits
mailing list