[llvm] r299998 - AMDGPU: Refactor argument lowering
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 11 15:29:25 PDT 2017
Author: arsenm
Date: Tue Apr 11 17:29:24 2017
New Revision: 299998
URL: http://llvm.org/viewvc/llvm-project?rev=299998&view=rev
Log:
AMDGPU: Refactor argument lowering
Split into smaller functions and prepare for handling
non-entry functions.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/trunk/test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=299998&r1=299997&r2=299998&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Tue Apr 11 17:29:24 2017
@@ -713,7 +713,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI
OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
- OutStreamer->EmitIntValue(MFI->PSInputEna, 4);
+ OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4);
OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=299998&r1=299997&r2=299998&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Tue Apr 11 17:29:24 2017
@@ -867,11 +867,6 @@ void AMDGPUTargetLowering::analyzeFormal
}
}
-void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
- const SmallVectorImpl<ISD::InputArg> &Ins) const {
- State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
-}
-
void AMDGPUTargetLowering::AnalyzeReturn(CCState &State,
const SmallVectorImpl<ISD::OutputArg> &Outs) const {
@@ -891,6 +886,24 @@ AMDGPUTargetLowering::LowerReturn(SDValu
// Target specific lowering
//===---------------------------------------------------------------------===//
+/// Selects the correct CCAssignFn for a given CallingConvention value.
+CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
+ bool IsVarArg) {
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return CC_AMDGPU_Kernel;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ return CC_AMDGPU;
+ default:
+ report_fatal_error("Unsupported calling convention.");
+ }
+}
+
SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SDValue Callee = CLI.Callee;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=299998&r1=299997&r2=299998&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Tue Apr 11 17:29:24 2017
@@ -17,6 +17,7 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
#include "AMDGPU.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
@@ -114,8 +115,6 @@ protected:
SmallVectorImpl<SDValue> &Results) const;
void analyzeFormalArgumentsCompute(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
- void AnalyzeFormalArguments(CCState &State,
- const SmallVectorImpl<ISD::InputArg> &Ins) const;
void AnalyzeReturn(CCState &State,
const SmallVectorImpl<ISD::OutputArg> &Outs) const;
@@ -161,6 +160,7 @@ public:
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=299998&r1=299997&r2=299998&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp Tue Apr 11 17:29:24 2017
@@ -1547,7 +1547,7 @@ SDValue R600TargetLowering::LowerFormalA
SmallVector<ISD::InputArg, 8> LocalIns;
if (AMDGPU::isShader(CallConv)) {
- AnalyzeFormalArguments(CCInfo, Ins);
+ CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
} else {
analyzeFormalArgumentsCompute(CCInfo, Ins);
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=299998&r1=299997&r2=299998&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Tue Apr 11 17:29:24 2017
@@ -845,13 +845,15 @@ bool SITargetLowering::isTypeDesirableFo
return TargetLowering::isTypeDesirableForOp(Op, VT);
}
-SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG,
- const SDLoc &SL, SDValue Chain,
- unsigned Offset) const {
+SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
+ const SDLoc &SL,
+ SDValue Chain,
+ uint64_t Offset) const {
const DataLayout &DL = DAG.getDataLayout();
MachineFunction &MF = DAG.getMachineFunction();
const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
- unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
+ unsigned InputPtrReg = TRI->getPreloadedValue(MF,
+ SIRegisterInfo::KERNARG_SEGMENT_PTR);
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
MVT PtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
@@ -861,24 +863,10 @@ SDValue SITargetLowering::LowerParameter
DAG.getConstant(Offset, SL, PtrVT));
}
-SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
- const SDLoc &SL, SDValue Chain,
- unsigned Offset, bool Signed,
+SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
+ const SDLoc &SL, SDValue Val,
+ bool Signed,
const ISD::InputArg *Arg) const {
- const DataLayout &DL = DAG.getDataLayout();
- Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
- PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
- MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
-
- unsigned Align = DL.getABITypeAlignment(Ty);
-
- SDValue Ptr = LowerParameterPtr(DAG, SL, Chain, Offset);
- SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Align,
- MachineMemOperand::MONonTemporal |
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant);
-
- SDValue Val = Load;
if (Arg && (Arg->Flags.isSExt() || Arg->Flags.isZExt()) &&
VT.bitsLT(MemVT)) {
unsigned Opc = Arg->Flags.isZExt() ? ISD::AssertZext : ISD::AssertSext;
@@ -892,360 +880,434 @@ SDValue SITargetLowering::LowerParameter
else
Val = DAG.getZExtOrTrunc(Val, SL, VT);
- return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
+ return Val;
}
-SDValue SITargetLowering::LowerFormalArguments(
- SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
- const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
-
- MachineFunction &MF = DAG.getMachineFunction();
- FunctionType *FType = MF.getFunction()->getFunctionType();
- SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+SDValue SITargetLowering::lowerKernargMemParameter(
+ SelectionDAG &DAG, EVT VT, EVT MemVT,
+ const SDLoc &SL, SDValue Chain,
+ uint64_t Offset, bool Signed,
+ const ISD::InputArg *Arg) const {
+ const DataLayout &DL = DAG.getDataLayout();
+ Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
+ PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
+ MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
- if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) {
- const Function *Fn = MF.getFunction();
- DiagnosticInfoUnsupported NoGraphicsHSA(
- *Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
- DAG.getContext()->diagnose(NoGraphicsHSA);
- return DAG.getEntryNode();
- }
+ unsigned Align = DL.getABITypeAlignment(Ty);
- // Create stack objects that are used for emitting debugger prologue if
- // "amdgpu-debugger-emit-prologue" attribute was specified.
- if (ST.debuggerEmitPrologue())
- createDebuggerPrologueStackObjects(MF);
+ SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, Offset);
+ SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Align,
+ MachineMemOperand::MONonTemporal |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant);
- SmallVector<ISD::InputArg, 16> Splits;
- BitVector Skipped(Ins.size());
+ SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load, Signed, Arg);
+ return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
+}
- for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
- const ISD::InputArg &Arg = Ins[i];
+static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
+ CallingConv::ID CallConv,
+ ArrayRef<ISD::InputArg> Ins,
+ BitVector &Skipped,
+ FunctionType *FType,
+ SIMachineFunctionInfo *Info) {
+ for (unsigned I = 0, E = Ins.size(), PSInputNum = 0; I != E; ++I) {
+ const ISD::InputArg &Arg = Ins[I];
- // First check if it's a PS input addr
+ // First check if it's a PS input addr.
if (CallConv == CallingConv::AMDGPU_PS && !Arg.Flags.isInReg() &&
!Arg.Flags.isByVal() && PSInputNum <= 15) {
if (!Arg.Used && !Info->isPSInputAllocated(PSInputNum)) {
- // We can safely skip PS inputs
- Skipped.set(i);
+ // We can safely skip PS inputs.
+ Skipped.set(I);
++PSInputNum;
continue;
}
Info->markPSInputAllocated(PSInputNum);
if (Arg.Used)
- Info->PSInputEna |= 1 << PSInputNum;
+ Info->markPSInputEnabled(PSInputNum);
++PSInputNum;
}
- if (AMDGPU::isShader(CallConv)) {
- // Second split vertices into their elements
- if (Arg.VT.isVector()) {
- ISD::InputArg NewArg = Arg;
- NewArg.Flags.setSplit();
- NewArg.VT = Arg.VT.getVectorElementType();
-
- // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
- // three or five element vertex only needs three or five registers,
- // NOT four or eight.
- Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
- unsigned NumElements = ParamType->getVectorNumElements();
-
- for (unsigned j = 0; j != NumElements; ++j) {
- Splits.push_back(NewArg);
- NewArg.PartOffset += NewArg.VT.getStoreSize();
- }
- } else {
- Splits.push_back(Arg);
+ // Second split vertices into their elements.
+ if (Arg.VT.isVector()) {
+ ISD::InputArg NewArg = Arg;
+ NewArg.Flags.setSplit();
+ NewArg.VT = Arg.VT.getVectorElementType();
+
+ // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
+ // three or five element vertex only needs three or five registers,
+ // NOT four or eight.
+ Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
+ unsigned NumElements = ParamType->getVectorNumElements();
+
+ for (unsigned J = 0; J != NumElements; ++J) {
+ Splits.push_back(NewArg);
+ NewArg.PartOffset += NewArg.VT.getStoreSize();
}
+ } else {
+ Splits.push_back(Arg);
}
}
+}
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext());
+// Allocate special inputs passed in VGPRs.
+static void allocateSpecialInputVGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) {
+ if (Info.hasWorkItemIDX()) {
+ unsigned Reg = TRI.getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X);
+ MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
- // At least one interpolation mode must be enabled or else the GPU will hang.
- //
- // Check PSInputAddr instead of PSInputEna. The idea is that if the user set
- // PSInputAddr, the user wants to enable some bits after the compilation
- // based on run-time states. Since we can't know what the final PSInputEna
- // will look like, so we shouldn't do anything here and the user should take
- // responsibility for the correct programming.
- //
- // Otherwise, the following restrictions apply:
- // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
- // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
- // enabled too.
- if (CallConv == CallingConv::AMDGPU_PS &&
- ((Info->getPSInputAddr() & 0x7F) == 0 ||
- ((Info->getPSInputAddr() & 0xF) == 0 && Info->isPSInputAllocated(11)))) {
- CCInfo.AllocateReg(AMDGPU::VGPR0);
- CCInfo.AllocateReg(AMDGPU::VGPR1);
- Info->markPSInputAllocated(0);
- Info->PSInputEna |= 1;
+ if (Info.hasWorkItemIDY()) {
+ unsigned Reg = TRI.getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y);
+ MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
}
- if (!AMDGPU::isShader(CallConv)) {
- assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
- } else {
- assert(!Info->hasDispatchPtr() &&
- !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
- !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
- !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
- !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
- !Info->hasWorkItemIDZ());
+ if (Info.hasWorkItemIDZ()) {
+ unsigned Reg = TRI.getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z);
+ MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
}
+}
- if (Info->hasPrivateMemoryInputPtr()) {
- unsigned PrivateMemoryPtrReg = Info->addPrivateMemoryPtr(*TRI);
- MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SReg_64RegClass);
+// Allocate special inputs passed in user SGPRs.
+static void allocateHSAUserSGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) {
+ if (Info.hasPrivateMemoryInputPtr()) {
+ unsigned PrivateMemoryPtrReg = Info.addPrivateMemoryPtr(TRI);
+ MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(PrivateMemoryPtrReg);
}
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
- if (Info->hasPrivateSegmentBuffer()) {
- unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
- MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
+ if (Info.hasPrivateSegmentBuffer()) {
+ unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
+ MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
CCInfo.AllocateReg(PrivateSegmentBufferReg);
}
- if (Info->hasDispatchPtr()) {
- unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
+ if (Info.hasDispatchPtr()) {
+ unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchPtrReg);
}
- if (Info->hasQueuePtr()) {
- unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
+ if (Info.hasQueuePtr()) {
+ unsigned QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);
}
- if (Info->hasKernargSegmentPtr()) {
- unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
+ if (Info.hasKernargSegmentPtr()) {
+ unsigned InputPtrReg = Info.addKernargSegmentPtr(TRI);
MF.addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(InputPtrReg);
}
- if (Info->hasDispatchID()) {
- unsigned DispatchIDReg = Info->addDispatchID(*TRI);
+ if (Info.hasDispatchID()) {
+ unsigned DispatchIDReg = Info.addDispatchID(TRI);
MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchIDReg);
}
- if (Info->hasFlatScratchInit()) {
- unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
+ if (Info.hasFlatScratchInit()) {
+ unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(FlatScratchInitReg);
}
- if (!AMDGPU::isShader(CallConv))
- analyzeFormalArgumentsCompute(CCInfo, Ins);
- else
- AnalyzeFormalArguments(CCInfo, Splits);
-
- SmallVector<SDValue, 16> Chains;
-
- for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
- const ISD::InputArg &Arg = Ins[i];
- if (Skipped[i]) {
- InVals.push_back(DAG.getUNDEF(Arg.VT));
- continue;
- }
-
- CCValAssign &VA = ArgLocs[ArgIdx++];
- MVT VT = VA.getLocVT();
-
- if (VA.isMemLoc()) {
- VT = Ins[i].VT;
- EVT MemVT = VA.getLocVT();
- const unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) +
- VA.getLocMemOffset();
- // The first 36 bytes of the input buffer contains information about
- // thread group and global sizes.
- SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, Chain,
- Offset, Ins[i].Flags.isSExt(),
- &Ins[i]);
- Chains.push_back(Arg.getValue(1));
-
- auto *ParamTy =
- dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
- if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS &&
- ParamTy && ParamTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) {
- // On SI local pointers are just offsets into LDS, so they are always
- // less than 16-bits. On CI and newer they could potentially be
- // real pointers, so we can't guarantee their size.
- Arg = DAG.getNode(ISD::AssertZext, DL, Arg.getValueType(), Arg,
- DAG.getValueType(MVT::i16));
- }
-
- InVals.push_back(Arg);
- Info->setABIArgOffset(Offset + MemVT.getStoreSize());
- continue;
- }
- assert(VA.isRegLoc() && "Parameter must be in a register!");
-
- unsigned Reg = VA.getLocReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
-
- Reg = MF.addLiveIn(Reg, RC);
- SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
-
- if (Arg.VT.isVector()) {
- // Build a vector from the registers
- Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
- unsigned NumElements = ParamType->getVectorNumElements();
-
- SmallVector<SDValue, 4> Regs;
- Regs.push_back(Val);
- for (unsigned j = 1; j != NumElements; ++j) {
- Reg = ArgLocs[ArgIdx++].getLocReg();
- Reg = MF.addLiveIn(Reg, RC);
-
- SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
- Regs.push_back(Copy);
- }
-
- // Fill up the missing vector elements
- NumElements = Arg.VT.getVectorNumElements() - NumElements;
- Regs.append(NumElements, DAG.getUNDEF(VT));
-
- InVals.push_back(DAG.getBuildVector(Arg.VT, DL, Regs));
- continue;
- }
-
- InVals.push_back(Val);
- }
-
// TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
// these from the dispatch pointer.
+}
- // Start adding system SGPRs.
- if (Info->hasWorkGroupIDX()) {
- unsigned Reg = Info->addWorkGroupIDX();
+// Allocate special input registers that are initialized per-wave.
+static void allocateSystemSGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ SIMachineFunctionInfo &Info,
+ bool IsShader) {
+ if (Info.hasWorkGroupIDX()) {
+ unsigned Reg = Info.addWorkGroupIDX();
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
CCInfo.AllocateReg(Reg);
}
- if (Info->hasWorkGroupIDY()) {
- unsigned Reg = Info->addWorkGroupIDY();
+ if (Info.hasWorkGroupIDY()) {
+ unsigned Reg = Info.addWorkGroupIDY();
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
CCInfo.AllocateReg(Reg);
}
- if (Info->hasWorkGroupIDZ()) {
- unsigned Reg = Info->addWorkGroupIDZ();
+ if (Info.hasWorkGroupIDZ()) {
+ unsigned Reg = Info.addWorkGroupIDZ();
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
CCInfo.AllocateReg(Reg);
}
- if (Info->hasWorkGroupInfo()) {
- unsigned Reg = Info->addWorkGroupInfo();
+ if (Info.hasWorkGroupInfo()) {
+ unsigned Reg = Info.addWorkGroupInfo();
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
CCInfo.AllocateReg(Reg);
}
- if (Info->hasPrivateSegmentWaveByteOffset()) {
+ if (Info.hasPrivateSegmentWaveByteOffset()) {
// Scratch wave offset passed in system SGPR.
unsigned PrivateSegmentWaveByteOffsetReg;
- if (AMDGPU::isShader(CallConv)) {
+ if (IsShader) {
PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
- Info->setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
+ Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
} else
- PrivateSegmentWaveByteOffsetReg = Info->addPrivateSegmentWaveByteOffset();
+ PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();
MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
}
+}
+static void reservePrivateMemoryRegs(const TargetMachine &TM,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) {
// Now that we've figured out where the scratch register inputs are, see if
// should reserve the arguments and use them directly.
bool HasStackObjects = MF.getFrameInfo().hasStackObjects();
+
// Record that we know we have non-spill stack objects so we don't need to
// check all stack objects later.
if (HasStackObjects)
- Info->setHasNonSpillStackObjects(true);
+ Info.setHasNonSpillStackObjects(true);
// Everything live out of a block is spilled with fast regalloc, so it's
// almost certain that spilling will be required.
- if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ if (TM.getOptLevel() == CodeGenOpt::None)
HasStackObjects = true;
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
if (ST.isAmdCodeObjectV2(MF)) {
if (HasStackObjects) {
// If we have stack objects, we unquestionably need the private buffer
// resource. For the Code Object V2 ABI, this will be the first 4 user
// SGPR inputs. We can reserve those and use them directly.
- unsigned PrivateSegmentBufferReg = TRI->getPreloadedValue(
+ unsigned PrivateSegmentBufferReg = TRI.getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
- Info->setScratchRSrcReg(PrivateSegmentBufferReg);
+ Info.setScratchRSrcReg(PrivateSegmentBufferReg);
- unsigned PrivateSegmentWaveByteOffsetReg = TRI->getPreloadedValue(
+ unsigned PrivateSegmentWaveByteOffsetReg = TRI.getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
- Info->setScratchWaveOffsetReg(PrivateSegmentWaveByteOffsetReg);
+ Info.setScratchWaveOffsetReg(PrivateSegmentWaveByteOffsetReg);
} else {
unsigned ReservedBufferReg
- = TRI->reservedPrivateSegmentBufferReg(MF);
+ = TRI.reservedPrivateSegmentBufferReg(MF);
unsigned ReservedOffsetReg
- = TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+ = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
// We tentatively reserve the last registers (skipping the last two
// which may contain VCC). After register allocation, we'll replace
// these with the ones immediately after those which were really
// allocated. In the prologue copies will be inserted from the argument
// to these reserved registers.
- Info->setScratchRSrcReg(ReservedBufferReg);
- Info->setScratchWaveOffsetReg(ReservedOffsetReg);
+ Info.setScratchRSrcReg(ReservedBufferReg);
+ Info.setScratchWaveOffsetReg(ReservedOffsetReg);
}
} else {
- unsigned ReservedBufferReg = TRI->reservedPrivateSegmentBufferReg(MF);
+ unsigned ReservedBufferReg = TRI.reservedPrivateSegmentBufferReg(MF);
// Without HSA, relocations are used for the scratch pointer and the
// buffer resource setup is always inserted in the prologue. Scratch wave
// offset is still in an input SGPR.
- Info->setScratchRSrcReg(ReservedBufferReg);
+ Info.setScratchRSrcReg(ReservedBufferReg);
if (HasStackObjects) {
- unsigned ScratchWaveOffsetReg = TRI->getPreloadedValue(
+ unsigned ScratchWaveOffsetReg = TRI.getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
- Info->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
+ Info.setScratchWaveOffsetReg(ScratchWaveOffsetReg);
} else {
unsigned ReservedOffsetReg
- = TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
- Info->setScratchWaveOffsetReg(ReservedOffsetReg);
+ = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
+ Info.setScratchWaveOffsetReg(ReservedOffsetReg);
}
}
+}
- if (Info->hasWorkItemIDX()) {
- unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X);
- MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
- CCInfo.AllocateReg(Reg);
+SDValue SITargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+ const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ FunctionType *FType = MF.getFunction()->getFunctionType();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+
+ if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) {
+ const Function *Fn = MF.getFunction();
+ DiagnosticInfoUnsupported NoGraphicsHSA(
+ *Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
+ DAG.getContext()->diagnose(NoGraphicsHSA);
+ return DAG.getEntryNode();
}
- if (Info->hasWorkItemIDY()) {
- unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y);
- MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
- CCInfo.AllocateReg(Reg);
+ // Create stack objects that are used for emitting debugger prologue if
+ // "amdgpu-debugger-emit-prologue" attribute was specified.
+ if (ST.debuggerEmitPrologue())
+ createDebuggerPrologueStackObjects(MF);
+
+ SmallVector<ISD::InputArg, 16> Splits;
+ SmallVector<CCValAssign, 16> ArgLocs;
+ BitVector Skipped(Ins.size());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
+
+ bool IsShader = AMDGPU::isShader(CallConv);
+ bool IsKernel = !IsShader;
+ bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CallConv);
+
+ if (IsShader) {
+ processShaderInputArgs(Splits, CallConv, Ins, Skipped, FType, Info);
+
+ // At least one interpolation mode must be enabled or else the GPU will
+ // hang.
+ //
+ // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
+ // set PSInputAddr, the user wants to enable some bits after the compilation
+ // based on run-time states. Since we can't know what the final PSInputEna
+ // will look like, so we shouldn't do anything here and the user should take
+ // responsibility for the correct programming.
+ //
+ // Otherwise, the following restrictions apply:
+ // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
+ // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
+ // enabled too.
+ if (CallConv == CallingConv::AMDGPU_PS &&
+ ((Info->getPSInputAddr() & 0x7F) == 0 ||
+ ((Info->getPSInputAddr() & 0xF) == 0 &&
+ Info->isPSInputAllocated(11)))) {
+ CCInfo.AllocateReg(AMDGPU::VGPR0);
+ CCInfo.AllocateReg(AMDGPU::VGPR1);
+ Info->markPSInputAllocated(0);
+ Info->markPSInputEnabled(0);
+ }
+
+ assert(!Info->hasDispatchPtr() &&
+ !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
+ !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
+ !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
+ !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
+ !Info->hasWorkItemIDZ());
+ } else {
+ assert(!IsKernel || (Info->hasWorkGroupIDX() && Info->hasWorkItemIDX()));
}
- if (Info->hasWorkItemIDZ()) {
- unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z);
- MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
- CCInfo.AllocateReg(Reg);
+ if (IsEntryFunc) {
+ allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
+ allocateHSAUserSGPRs(CCInfo, MF, *TRI, *Info);
+ }
+
+ if (IsKernel) {
+ analyzeFormalArgumentsCompute(CCInfo, Ins);
+ } else {
+ CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, isVarArg);
+ CCInfo.AnalyzeFormalArguments(Splits, AssignFn);
+ }
+
+ SmallVector<SDValue, 16> Chains;
+
+ for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
+ const ISD::InputArg &Arg = Ins[i];
+ if (Skipped[i]) {
+ InVals.push_back(DAG.getUNDEF(Arg.VT));
+ continue;
+ }
+
+ CCValAssign &VA = ArgLocs[ArgIdx++];
+ MVT VT = VA.getLocVT();
+
+ if (IsEntryFunc && VA.isMemLoc()) {
+ VT = Ins[i].VT;
+ EVT MemVT = VA.getLocVT();
+
+ const uint64_t Offset = Subtarget->getExplicitKernelArgOffset(MF) +
+ VA.getLocMemOffset();
+ Info->setABIArgOffset(Offset + MemVT.getStoreSize());
+
+ // The first 36 bytes of the input buffer contains information about
+ // thread group and global sizes.
+ SDValue Arg = lowerKernargMemParameter(
+ DAG, VT, MemVT, DL, Chain, Offset, Ins[i].Flags.isSExt(), &Ins[i]);
+ Chains.push_back(Arg.getValue(1));
+
+ auto *ParamTy =
+ dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
+ if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS &&
+ ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ // On SI local pointers are just offsets into LDS, so they are always
+ // less than 16-bits. On CI and newer they could potentially be
+ // real pointers, so we can't guarantee their size.
+ Arg = DAG.getNode(ISD::AssertZext, DL, Arg.getValueType(), Arg,
+ DAG.getValueType(MVT::i16));
+ }
+
+ InVals.push_back(Arg);
+ continue;
+ }
+
+ if (VA.isMemLoc())
+ report_fatal_error("memloc not supported with calling convention");
+
+ assert(VA.isRegLoc() && "Parameter must be in a register!");
+
+ unsigned Reg = VA.getLocReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
+
+ Reg = MF.addLiveIn(Reg, RC);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+
+ if (Arg.VT.isVector()) {
+ // Build a vector from the registers
+ Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
+ unsigned NumElements = ParamType->getVectorNumElements();
+
+ SmallVector<SDValue, 4> Regs;
+ Regs.push_back(Val);
+ for (unsigned j = 1; j != NumElements; ++j) {
+ Reg = ArgLocs[ArgIdx++].getLocReg();
+ Reg = MF.addLiveIn(Reg, RC);
+
+ SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+ Regs.push_back(Copy);
+ }
+
+ // Fill up the missing vector elements
+ NumElements = Arg.VT.getVectorNumElements() - NumElements;
+ Regs.append(NumElements, DAG.getUNDEF(VT));
+
+ InVals.push_back(DAG.getBuildVector(Arg.VT, DL, Regs));
+ continue;
+ }
+
+ InVals.push_back(Val);
}
- if (Chains.empty())
- return Chain;
+ // Start adding system SGPRs.
+ if (IsEntryFunc)
+ allocateSystemSGPRs(CCInfo, MF, *Info, IsShader);
+
+ reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ return Chains.empty() ? Chain :
+ DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
SDValue
@@ -2624,8 +2686,8 @@ SDValue SITargetLowering::lowerImplicitZ
MVT VT,
unsigned Offset) const {
SDLoc SL(Op);
- SDValue Param = LowerParameter(DAG, MVT::i32, MVT::i32, SL,
- DAG.getEntryNode(), Offset, false);
+ SDValue Param = lowerKernargMemParameter(DAG, MVT::i32, MVT::i32, SL,
+ DAG.getEntryNode(), Offset, false);
// The local size values will have the hi 16-bits as zero.
return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Param,
DAG.getValueType(VT));
@@ -2683,7 +2745,7 @@ SDValue SITargetLowering::LowerINTRINSIC
}
case Intrinsic::amdgcn_implicitarg_ptr: {
unsigned offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
- return LowerParameterPtr(DAG, DL, DAG.getEntryNode(), offset);
+ return lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), offset);
}
case Intrinsic::amdgcn_kernarg_segment_ptr: {
unsigned Reg
@@ -2725,38 +2787,38 @@ SDValue SITargetLowering::LowerINTRINSIC
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_X, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::NGROUPS_X, false);
case Intrinsic::r600_read_ngroups_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_Y, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::NGROUPS_Y, false);
case Intrinsic::r600_read_ngroups_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_Z, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::NGROUPS_Z, false);
case Intrinsic::r600_read_global_size_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
case Intrinsic::r600_read_global_size_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
case Intrinsic::r600_read_global_size_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
case Intrinsic::r600_read_local_size_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h?rev=299998&r1=299997&r2=299998&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h Tue Apr 11 17:29:24 2017
@@ -21,11 +21,13 @@
namespace llvm {
class SITargetLowering final : public AMDGPUTargetLowering {
- SDValue LowerParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain,
- unsigned Offset) const;
- SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL,
- SDValue Chain, unsigned Offset, bool Signed,
- const ISD::InputArg *Arg = nullptr) const;
+ SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
+ SDValue Chain, uint64_t Offset) const;
+ SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
+ const SDLoc &SL, SDValue Chain,
+ uint64_t Offset, bool Signed,
+ const ISD::InputArg *Arg = nullptr) const;
+
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const override;
SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
@@ -55,6 +57,10 @@ class SITargetLowering final : public AM
const SDLoc &DL,
EVT VT) const;
+ SDValue convertArgType(
+ SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Val,
+ bool Signed, const ISD::InputArg *Arg = nullptr) const;
+
/// \brief Custom lowering for ISD::FP_ROUND for MVT::f16.
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp?rev=299998&r1=299997&r2=299998&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Tue Apr 11 17:29:24 2017
@@ -41,13 +41,13 @@ SIMachineFunctionInfo::SIMachineFunction
WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
PSInputAddr(0),
+ PSInputEnable(0),
ReturnsVoid(true),
FlatWorkGroupSizes(0, 0),
WavesPerEU(0, 0),
DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
LDSWaveSpillSize(0),
- PSInputEna(0),
NumUserSGPRs(0),
NumSystemSGPRs(0),
HasSpilledSGPRs(false),
Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h?rev=299998&r1=299997&r2=299998&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h Tue Apr 11 17:29:24 2017
@@ -113,6 +113,8 @@ class SIMachineFunctionInfo final : publ
// Graphics info.
unsigned PSInputAddr;
+ unsigned PSInputEnable;
+
bool ReturnsVoid;
// A pair of default/requested minimum/maximum flat work group sizes.
@@ -134,9 +136,6 @@ class SIMachineFunctionInfo final : publ
public:
// FIXME: Make private
unsigned LDSWaveSpillSize;
- unsigned PSInputEna;
-
-
unsigned ScratchOffsetReg;
unsigned NumUserSGPRs;
unsigned NumSystemSGPRs;
@@ -424,6 +423,10 @@ public:
return PSInputAddr;
}
+ unsigned getPSInputEnable() const {
+ return PSInputEnable;
+ }
+
bool isPSInputAllocated(unsigned Index) const {
return PSInputAddr & (1 << Index);
}
@@ -432,6 +435,10 @@ public:
PSInputAddr |= 1 << Index;
}
+ void markPSInputEnabled(unsigned Index) {
+ PSInputEnable |= 1 << Index;
+ }
+
bool returnsVoid() const {
return ReturnsVoid;
}
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=299998&r1=299997&r2=299998&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Tue Apr 11 17:29:24 2017
@@ -510,6 +510,10 @@ bool isCompute(CallingConv::ID cc) {
return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
}
+bool isEntryFunctionCC(CallingConv::ID CC) {
+ return true;
+}
+
bool isSI(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
}
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=299998&r1=299997&r2=299998&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Tue Apr 11 17:29:24 2017
@@ -249,8 +249,15 @@ unsigned encodeWaitcnt(const IsaInfo::Is
unsigned getInitialPSInputAddr(const Function &F);
-bool isShader(CallingConv::ID cc);
-bool isCompute(CallingConv::ID cc);
+LLVM_READNONE
+bool isShader(CallingConv::ID CC);
+
+LLVM_READNONE
+bool isCompute(CallingConv::ID CC);
+
+LLVM_READNONE
+bool isEntryFunctionCC(CallingConv::ID CC);
+
bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
Modified: llvm/trunk/test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll?rev=299998&r1=299997&r2=299998&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll Tue Apr 11 17:29:24 2017
@@ -13,9 +13,10 @@ define amdgpu_cs float @shader_cc(<4 x i
; GCN-LABEL: {{^}}kernel_cc:
; GCN: s_endpgm
-define float @kernel_cc(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) {
+define amdgpu_kernel void @kernel_cc(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) {
%vi = bitcast float %v to i32
%x = add i32 %vi, %w
%xf = bitcast i32 %x to float
- ret float %xf
+ store float %xf, float addrspace(1)* undef
+ ret void
}
More information about the llvm-commits
mailing list