[PATCH 1/1] Reapply "R600: Add new intrinsic to read work dimensions"
Tom Stellard
tom at stellard.net
Tue Oct 14 12:55:14 PDT 2014
On Tue, Oct 14, 2014 at 03:53:28PM -0400, Jan Vesely wrote:
> This effectively reverts revert 219707. After fixing the test to work with
> new function name format and renamed intrinsic.
>
LGTM.
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
>
> sorry about that
>
> include/llvm/IR/IntrinsicsR600.td | 9 ++++++++-
> lib/Target/R600/AMDGPUMachineFunction.h | 3 +++
> lib/Target/R600/R600ISelLowering.cpp | 11 ++++++++---
> lib/Target/R600/SIISelLowering.cpp | 11 +++++++++--
> test/CodeGen/R600/work-item-intrinsics.ll | 16 ++++++++++++++++
> 5 files changed, 44 insertions(+), 6 deletions(-)
>
> diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td
> index 2e711a9..098ad53 100644
> --- a/include/llvm/IR/IntrinsicsR600.td
> +++ b/include/llvm/IR/IntrinsicsR600.td
> @@ -33,10 +33,14 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
> "__builtin_r600_read_tgid">;
> defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
> "__builtin_r600_read_tidig">;
> -
> } // End TargetPrefix = "r600"
>
> let TargetPrefix = "AMDGPU" in {
> +
> +class AMDGPUReadPreloadRegisterIntrinsic<string name>
> + : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> + GCCBuiltin<name>;
> +
> def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
> // 1st parameter: Numerator
> // 2nd parameter: Denominator
> @@ -72,4 +76,7 @@ def int_AMDGPU_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">,
> def int_AMDGPU_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">,
> Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
>
> +def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
> + "__builtin_amdgpu_read_workdim">;
> +
> } // End TargetPrefix = "AMDGPU"
> diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h
> index 886fb1b..f5e4694 100644
> --- a/lib/Target/R600/AMDGPUMachineFunction.h
> +++ b/lib/Target/R600/AMDGPUMachineFunction.h
> @@ -30,6 +30,9 @@ public:
> /// Number of bytes in the LDS that are being used.
> unsigned LDSSize;
>
> + /// Start of implicit kernel args
> + unsigned ABIArgOffset;
> +
> unsigned getShaderType() const {
> return ShaderType;
> }
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index e0ac13f..2e91270 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -818,6 +818,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
> case Intrinsic::r600_read_local_size_z:
> return LowerImplicitParameter(DAG, VT, DL, 8);
>
> + case Intrinsic::AMDGPU_read_workdim:
> + return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
> +
> case Intrinsic::r600_read_tgid_x:
> return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
> AMDGPU::T1_X, VT);
> @@ -1725,7 +1728,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
> CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
> *DAG.getContext());
> MachineFunction &MF = DAG.getMachineFunction();
> - unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->getShaderType();
> + R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
>
> SmallVector<ISD::InputArg, 8> LocalIns;
>
> @@ -1743,7 +1746,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
> MemVT = MemVT.getVectorElementType();
> }
>
> - if (ShaderType != ShaderType::COMPUTE) {
> + if (MFI->getShaderType() != ShaderType::COMPUTE) {
> unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
> SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
> InVals.push_back(Register);
> @@ -1775,16 +1778,18 @@ SDValue R600TargetLowering::LowerFormalArguments(
>
> unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
> unsigned PartOffset = VA.getLocMemOffset();
> + unsigned Offset = 36 + VA.getLocMemOffset();
>
> MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
> SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
> - DAG.getConstant(36 + PartOffset, MVT::i32),
> + DAG.getConstant(Offset, MVT::i32),
> DAG.getUNDEF(MVT::i32),
> PtrInfo,
> MemVT, false, true, true, 4);
>
> // 4 is the preferred alignment for the CONSTANT memory space.
> InVals.push_back(Arg);
> + MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
> }
> return Chain;
> }
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index 49ac269..2888195 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -519,11 +519,11 @@ SDValue SITargetLowering::LowerFormalArguments(
> if (VA.isMemLoc()) {
> VT = Ins[i].VT;
> EVT MemVT = Splits[i].VT;
> + const unsigned Offset = 36 + VA.getLocMemOffset();
> // The first 36 bytes of the input buffer contains information about
> // thread group and global sizes.
> SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
> - 36 + VA.getLocMemOffset(),
> - Ins[i].Flags.isSExt());
> + Offset, Ins[i].Flags.isSExt());
>
> const PointerType *ParamTy =
> dyn_cast<PointerType>(FType->getParamType(Ins[i].OrigArgIndex));
> @@ -537,6 +537,7 @@ SDValue SITargetLowering::LowerFormalArguments(
> }
>
> InVals.push_back(Arg);
> + Info->ABIArgOffset = Offset + MemVT.getStoreSize();
> continue;
> }
> assert(VA.isRegLoc() && "Parameter must be in a register!");
> @@ -927,6 +928,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
> case Intrinsic::r600_read_local_size_z:
> return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> SI::KernelInputOffsets::LOCAL_SIZE_Z, false);
> +
> + case Intrinsic::AMDGPU_read_workdim:
> + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> + MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset,
> + false);
> +
> case Intrinsic::r600_read_tgid_x:
> return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
> TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
> diff --git a/test/CodeGen/R600/work-item-intrinsics.ll b/test/CodeGen/R600/work-item-intrinsics.ll
> index a1337ae..cbefe25 100644
> --- a/test/CodeGen/R600/work-item-intrinsics.ll
> +++ b/test/CodeGen/R600/work-item-intrinsics.ll
> @@ -128,6 +128,20 @@ entry:
> ret void
> }
>
> +; FUNC-LABEL: {{^}}get_work_dim:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[2].Z
> +
> +; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0xb
> +; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
> +; SI: BUFFER_STORE_DWORD [[VVAL]]
> +define void @get_work_dim (i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.read.workdim() #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> ; The tgid values are stored in sgprs offset by the number of user sgprs.
> ; Currently we always use exactly 2 user sgprs for the pointer to the
> ; kernel arguments, but this may change in the future.
> @@ -209,4 +223,6 @@ declare i32 @llvm.r600.read.tidig.x() #0
> declare i32 @llvm.r600.read.tidig.y() #0
> declare i32 @llvm.r600.read.tidig.z() #0
>
> +declare i32 @llvm.AMDGPU.read.workdim() #0
> +
> attributes #0 = { readnone }
> --
> 1.9.3
>
More information about the llvm-commits
mailing list