[PATCH] D20297: AMDGPU/SI: Make kernarg.segment.ptr point to implicit arguments for non HSA
Jan Vesely via llvm-commits
llvm-commits at lists.llvm.org
Fri May 20 09:58:46 PDT 2016
jvesely retitled this revision from "AMDGPU/SI: Add amdgcn versions of remaining builtins" to "AMDGPU/SI: Make kernarg.segment.ptr point to implicit arguments for non HSA".
jvesely updated the summary for this revision.
jvesely updated this revision to Diff 57944.
jvesely added a comment.
Drop new amdgcn intrinsics. make kernarg segment mesa compatible instead.
Repository:
rL LLVM
http://reviews.llvm.org/D20297
Files:
lib/Target/AMDGPU/SIISelLowering.cpp
lib/Target/AMDGPU/SIISelLowering.h
test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
Index: test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -5,7 +5,8 @@
; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: s_load_dword s{{[0-9]+}}, s[4:5], 0xa
-; MESA: s_load_dword s{{[0-9]+}}, s[0:1], 0xa
+; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15
+; MESA: s_load_dword s{{[0-9]+}}, s[0:1], 0x15
define void @test(i32 addrspace(1)* %out) #1 {
%kernarg.segment.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
%header.ptr = bitcast i8 addrspace(2)* %kernarg.segment.ptr to i32 addrspace(2)*
Index: lib/Target/AMDGPU/SIISelLowering.h
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.h
+++ lib/Target/AMDGPU/SIISelLowering.h
@@ -21,6 +21,8 @@
namespace llvm {
class SITargetLowering final : public AMDGPUTargetLowering {
+ SDValue LowerParameterPtr(SelectionDAG &DAG, SDLoc SL, SDValue Chain,
+ unsigned Offset) const;
SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL,
SDValue Chain, unsigned Offset, bool Signed) const;
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -622,24 +622,29 @@
return TargetLowering::isTypeDesirableForOp(Op, VT);
}
-SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
- SDLoc SL, SDValue Chain,
- unsigned Offset, bool Signed) const {
+SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG,
+ SDLoc SL, SDValue Chain,
+ unsigned Offset) const {
const DataLayout &DL = DAG.getDataLayout();
MachineFunction &MF = DAG.getMachineFunction();
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
- Type *Ty = VT.getTypeForEVT(*DAG.getContext());
-
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
- PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
- SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
- DAG.getConstant(Offset, SL, PtrVT));
+ return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
+ DAG.getConstant(Offset, SL, PtrVT));
+}
+SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
+ SDLoc SL, SDValue Chain,
+ unsigned Offset, bool Signed) const {
+ const DataLayout &DL = DAG.getDataLayout();
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+ MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
+ PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
SDValue PtrOffset = DAG.getUNDEF(PtrVT);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
@@ -649,6 +654,7 @@
if (MemVT.isFloatingPoint())
ExtTy = ISD::EXTLOAD;
+ SDValue Ptr = LowerParameterPtr(DAG, SL, Chain, Offset);
return DAG.getLoad(ISD::UNINDEXED, ExtTy,
VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
false, // isVolatile
@@ -1565,6 +1571,12 @@
TRI->getPreloadedValue(MF, Reg), VT);
}
case Intrinsic::amdgcn_kernarg_segment_ptr: {
+ if (!Subtarget->isAmdHsaOS()) {
+ unsigned offset = getImplicitParameterOffset(MFI, GRID_DIM);
+ llvm::dbgs() << "FOUND offset of the first implicit arg " << offset << "\n";
+ return LowerParameterPtr(DAG, DL, DAG.getEntryNode(),
+ getImplicitParameterOffset(MFI, GRID_DIM));
+ }
unsigned Reg
= TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D20297.57944.patch
Type: text/x-patch
Size: 4450 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160520/880b2080/attachment.bin>
More information about the llvm-commits
mailing list