[PATCH 1/1] R600: Add new intrinsic to read work dimensions
Jan Vesely
jan.vesely at rutgers.edu
Tue Oct 14 11:43:48 PDT 2014
On Tue, 2014-10-14 at 12:59 -0400, Tom Stellard wrote:
> On Sat, Oct 11, 2014 at 06:03:13PM -0400, Jan Vesely wrote:
> > v2: Add SI lowering
> > Add test
> >
> > v3: Place work dimensions after the kernel arguments.
> > v4: Calculate offset while lowering arguments
> > v5: rebase
> >
> > Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> > ---
> > include/llvm/IR/IntrinsicsR600.td | 2 ++
> > lib/Target/R600/AMDGPUMachineFunction.h | 3 +++
> > lib/Target/R600/R600ISelLowering.cpp | 11 ++++++++---
> > lib/Target/R600/SIISelLowering.cpp | 10 ++++++++--
> > test/CodeGen/R600/work-item-intrinsics.ll | 16 ++++++++++++++++
> > 5 files changed, 37 insertions(+), 5 deletions(-)
> >
> > diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td
> > index 2e711a9..8dafc80 100644
> > --- a/include/llvm/IR/IntrinsicsR600.td
> > +++ b/include/llvm/IR/IntrinsicsR600.td
> > @@ -33,6 +33,8 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
> > "__builtin_r600_read_tgid">;
> > defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
> > "__builtin_r600_read_tidig">;
> > +def int_r600_read_workdim : R600ReadPreloadRegisterIntrinsic <
> > + "__builtin_r600_read_workdim">;
>
> This patch LGTM, except that I would prefer this intrinsic be moved into the
> TargetPrefix = "AMDGPU" block and renamed to int_AMDGPU_read_workdim.
I also added class AMDGPUReadPreloadRegisterIntrinsic to the target
prefix AMDGPU since I assume more stuff is going to to be mvoed there.
jan
>
> No need to submit another patch, just make this change before you
> commit.
>
> -Tom
>
>
>
> >
> > } // End TargetPrefix = "r600"
> >
> > diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h
> > index 886fb1b..f5e4694 100644
> > --- a/lib/Target/R600/AMDGPUMachineFunction.h
> > +++ b/lib/Target/R600/AMDGPUMachineFunction.h
> > @@ -30,6 +30,9 @@ public:
> > /// Number of bytes in the LDS that are being used.
> > unsigned LDSSize;
> >
> > + /// Start of implicit kernel args
> > + unsigned ABIArgOffset;
> > +
> > unsigned getShaderType() const {
> > return ShaderType;
> > }
> > diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> > index 87610e9..7c60bbe 100644
> > --- a/lib/Target/R600/R600ISelLowering.cpp
> > +++ b/lib/Target/R600/R600ISelLowering.cpp
> > @@ -808,6 +808,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
> > return LowerImplicitParameter(DAG, VT, DL, 7);
> > case Intrinsic::r600_read_local_size_z:
> > return LowerImplicitParameter(DAG, VT, DL, 8);
> > + case Intrinsic::r600_read_workdim: {
> > + return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
> > + }
> >
> > case Intrinsic::r600_read_tgid_x:
> > return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
> > @@ -1698,7 +1701,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
> > CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
> > *DAG.getContext());
> > MachineFunction &MF = DAG.getMachineFunction();
> > - unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->getShaderType();
> > + R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
> >
> > SmallVector<ISD::InputArg, 8> LocalIns;
> >
> > @@ -1716,7 +1719,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
> > MemVT = MemVT.getVectorElementType();
> > }
> >
> > - if (ShaderType != ShaderType::COMPUTE) {
> > + if (MFI->getShaderType() != ShaderType::COMPUTE) {
> > unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
> > SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
> > InVals.push_back(Register);
> > @@ -1748,16 +1751,18 @@ SDValue R600TargetLowering::LowerFormalArguments(
> >
> > unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
> > unsigned PartOffset = VA.getLocMemOffset();
> > + unsigned Offset = 36 + VA.getLocMemOffset();
> >
> > MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
> > SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
> > - DAG.getConstant(36 + PartOffset, MVT::i32),
> > + DAG.getConstant(Offset, MVT::i32),
> > DAG.getUNDEF(MVT::i32),
> > PtrInfo,
> > MemVT, false, true, true, 4);
> >
> > // 4 is the preferred alignment for the CONSTANT memory space.
> > InVals.push_back(Arg);
> > + MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
> > }
> > return Chain;
> > }
> > diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> > index 49ac269..ed1e746 100644
> > --- a/lib/Target/R600/SIISelLowering.cpp
> > +++ b/lib/Target/R600/SIISelLowering.cpp
> > @@ -519,11 +519,11 @@ SDValue SITargetLowering::LowerFormalArguments(
> > if (VA.isMemLoc()) {
> > VT = Ins[i].VT;
> > EVT MemVT = Splits[i].VT;
> > + const unsigned Offset = 36 + VA.getLocMemOffset();
> > // The first 36 bytes of the input buffer contains information about
> > // thread group and global sizes.
> > SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
> > - 36 + VA.getLocMemOffset(),
> > - Ins[i].Flags.isSExt());
> > + Offset, Ins[i].Flags.isSExt());
> >
> > const PointerType *ParamTy =
> > dyn_cast<PointerType>(FType->getParamType(Ins[i].OrigArgIndex));
> > @@ -537,6 +537,7 @@ SDValue SITargetLowering::LowerFormalArguments(
> > }
> >
> > InVals.push_back(Arg);
> > + Info->ABIArgOffset = Offset + MemVT.getStoreSize();
> > continue;
> > }
> > assert(VA.isRegLoc() && "Parameter must be in a register!");
> > @@ -927,6 +928,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
> > case Intrinsic::r600_read_local_size_z:
> > return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> > SI::KernelInputOffsets::LOCAL_SIZE_Z, false);
> > + case Intrinsic::r600_read_workdim: {
> > + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> > + MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset,
> > + false);
> > + }
> > case Intrinsic::r600_read_tgid_x:
> > return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
> > TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
> > diff --git a/test/CodeGen/R600/work-item-intrinsics.ll b/test/CodeGen/R600/work-item-intrinsics.ll
> > index a1337ae..d0ca92b 100644
> > --- a/test/CodeGen/R600/work-item-intrinsics.ll
> > +++ b/test/CodeGen/R600/work-item-intrinsics.ll
> > @@ -128,6 +128,20 @@ entry:
> > ret void
> > }
> >
> > +; FUNC-LABEL: @get_work_dim
> > +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> > +; EG: MOV [[VAL]], KC0[2].Z
> > +
> > +; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0xb
> > +; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
> > +; SI: BUFFER_STORE_DWORD [[VVAL]]
> > +define void @get_work_dim (i32 addrspace(1)* %out) {
> > +entry:
> > + %0 = call i32 @llvm.r600.read.workdim() #0
> > + store i32 %0, i32 addrspace(1)* %out
> > + ret void
> > +}
> > +
> > ; The tgid values are stored in sgprs offset by the number of user sgprs.
> > ; Currently we always use exactly 2 user sgprs for the pointer to the
> > ; kernel arguments, but this may change in the future.
> > @@ -209,4 +223,6 @@ declare i32 @llvm.r600.read.tidig.x() #0
> > declare i32 @llvm.r600.read.tidig.y() #0
> > declare i32 @llvm.r600.read.tidig.z() #0
> >
> > +declare i32 @llvm.r600.read.workdim() #0
> > +
> > attributes #0 = { readnone }
> > --
> > 1.9.3
> >
--
Jan Vesely <jan.vesely at rutgers.edu>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20141014/6ae1fc43/attachment.sig>
More information about the llvm-commits
mailing list