[PATCH v4 1/1] R600: Add new intrinsic to read work dimensions

Jan Vesely jan.vesely at rutgers.edu
Tue Aug 12 09:01:05 PDT 2014


v2: Add SI lowering
    Add test

v3: Place work dimensions after the kernel arguments.
v4: Calculate offset in lower formal arguments per Tom's suggestion

Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
---

Needs patches to mesa and libclc to be useful

 include/llvm/IR/IntrinsicsR600.td         |  2 ++
 lib/Target/R600/AMDGPUMachineFunction.h   |  3 +++
 lib/Target/R600/R600ISelLowering.cpp      | 11 ++++++++---
 lib/Target/R600/SIISelLowering.cpp        |  9 +++++++--
 test/CodeGen/R600/work-item-intrinsics.ll | 16 ++++++++++++++++
 5 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td
index ba69eaa..37a9771 100644
--- a/include/llvm/IR/IntrinsicsR600.td
+++ b/include/llvm/IR/IntrinsicsR600.td
@@ -33,6 +33,8 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
                                        "__builtin_r600_read_tgid">;
 defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
                                        "__builtin_r600_read_tidig">;
+def int_r600_read_workdim : R600ReadPreloadRegisterIntrinsic <
+                                       "__builtin_r600_read_workdim">;
 
 } // End TargetPrefix = "r600"
 
diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h
index 0854d58..1596b3c 100644
--- a/lib/Target/R600/AMDGPUMachineFunction.h
+++ b/lib/Target/R600/AMDGPUMachineFunction.h
@@ -30,6 +30,9 @@ public:
   /// Number of bytes in the LDS that are being used.
   unsigned LDSSize;
 
+  /// Start of implicit kernel args
+  unsigned ABIArgOffset;
+
   unsigned getShaderType() const {
     return ShaderType;
   }
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 8877cc8..58346ec 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -805,6 +805,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
       return LowerImplicitParameter(DAG, VT, DL, 7);
     case Intrinsic::r600_read_local_size_z:
       return LowerImplicitParameter(DAG, VT, DL, 8);
+    case Intrinsic::r600_read_workdim: {
+      return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
+    }
 
     case Intrinsic::r600_read_tgid_x:
       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
@@ -1695,7 +1698,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
                  *DAG.getContext());
   MachineFunction &MF = DAG.getMachineFunction();
-  unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->getShaderType();
+  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
 
   SmallVector<ISD::InputArg, 8> LocalIns;
 
@@ -1708,7 +1711,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
     EVT VT = Ins[i].VT;
     EVT MemVT = LocalIns[i].VT;
 
-    if (ShaderType != ShaderType::COMPUTE) {
+    if (MFI->getShaderType() != ShaderType::COMPUTE) {
       unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
       SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
       InVals.push_back(Register);
@@ -1730,13 +1733,15 @@ SDValue R600TargetLowering::LowerFormalArguments(
     // extload vecto parameters seems to be broken.
     //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
     ISD::LoadExtType Ext = ISD::SEXTLOAD;
+    const unsigned Offset = 36 + VA.getLocMemOffset();
     SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
-                                 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
+                                 DAG.getConstant(Offset, MVT::i32),
                                  MachinePointerInfo(UndefValue::get(PtrTy)),
                                  MemVT, false, false, false, 4);
 
     // 4 is the preferred alignment for the CONSTANT memory space.
     InVals.push_back(Arg);
+    MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
   }
   return Chain;
 }
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index f9be144..1c6bfad 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -441,12 +441,13 @@ SDValue SITargetLowering::LowerFormalArguments(
     if (VA.isMemLoc()) {
       VT = Ins[i].VT;
       EVT MemVT = Splits[i].VT;
+      const unsigned Offset = 36 + VA.getLocMemOffset();
       // The first 36 bytes of the input buffer contains information about
       // thread group and global sizes.
       SDValue Arg = LowerParameter(DAG, VT, MemVT,  DL, DAG.getRoot(),
-                                   36 + VA.getLocMemOffset(),
-                                   Ins[i].Flags.isSExt());
+                                   Offset, Ins[i].Flags.isSExt());
       InVals.push_back(Arg);
+      Info->ABIArgOffset = Offset + MemVT.getStoreSize();
       continue;
     }
     assert(VA.isRegLoc() && "Parameter must be in a register!");
@@ -866,6 +867,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28, false);
   case Intrinsic::r600_read_local_size_z:
     return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32, false);
+  case Intrinsic::r600_read_workdim: {
+    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+                          MFI->ABIArgOffset, false);
+  }
   case Intrinsic::r600_read_tgid_x:
     return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
       AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 0), VT);
diff --git a/test/CodeGen/R600/work-item-intrinsics.ll b/test/CodeGen/R600/work-item-intrinsics.ll
index 1dbd9b8..ef1be21 100644
--- a/test/CodeGen/R600/work-item-intrinsics.ll
+++ b/test/CodeGen/R600/work-item-intrinsics.ll
@@ -128,6 +128,20 @@ entry:
   ret void
 }
 
+; FUNC-LABEL: @get_work_dim
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[2].Z
+
+; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0xb
+; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: BUFFER_STORE_DWORD [[VVAL]]
+define void @get_work_dim (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.workdim() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
 ; The tgid values are stored in sgprs offset by the number of user sgprs.
 ; Currently we always use exactly 2 user sgprs for the pointer to the
 ; kernel arguments, but this may change in the future.
@@ -209,4 +223,6 @@ declare i32 @llvm.r600.read.tidig.x() #0
 declare i32 @llvm.r600.read.tidig.y() #0
 declare i32 @llvm.r600.read.tidig.z() #0
 
+declare i32 @llvm.r600.read.workdim() #0
+
 attributes #0 = { readnone }
-- 
1.9.3




More information about the llvm-commits mailing list