[llvm] r275024 - AMDGPU/R600: Add implicitarg.ptr intrinsic
Jan Vesely via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 10 14:20:29 PDT 2016
Author: jvesely
Date: Sun Jul 10 16:20:29 2016
New Revision: 275024
URL: http://llvm.org/viewvc/llvm-project?rev=275024&view=rev
Log:
AMDGPU/R600: Add implicitarg.ptr intrinsic
Differential Revision: http://reviews.llvm.org/D21622
Added:
llvm/trunk/test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll
llvm/trunk/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll
- copied, changed from r275022, llvm/trunk/test/CodeGen/AMDGPU/work-item-intrinsics.ll
llvm/trunk/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
Removed:
llvm/trunk/test/CodeGen/AMDGPU/work-item-intrinsics.ll
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td
llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/R600Instructions.td
Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=275024&r1=275023&r2=275024&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Sun Jul 10 16:20:29 2016
@@ -43,6 +43,12 @@ defm int_r600_read_tidig : AMDGPUReadPre
def int_r600_read_workdim : AMDGPUReadPreloadRegisterIntrinsic;
+
+// AS 7 is PARAM_I_ADDRESS, used for kernel arguments
+def int_r600_implicitarg_ptr :
+ GCCBuiltin<"__builtin_r600_implicitarg_ptr">,
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [], [IntrNoMem]>;
+
def int_r600_rat_store_typed :
// 1st parameter: Data
// 2nd parameter: Index
Modified: llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td?rev=275024&r1=275023&r2=275024&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td Sun Jul 10 16:20:29 2016
@@ -210,23 +210,23 @@ class VTX_READ_128_eg <bits<8> buffer_id
// VTX Read from parameter memory space
//===----------------------------------------------------------------------===//
-def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
+def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <3,
[(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))]
>;
-def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
+def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <3,
[(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))]
>;
-def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
+def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <3,
[(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
>;
-def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <0,
+def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <3,
[(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
>;
-def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
+def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <3,
[(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
>;
Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=275024&r1=275023&r2=275024&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp Sun Jul 10 16:20:29 2016
@@ -782,6 +782,11 @@ SDValue R600TargetLowering::LowerOperati
return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
}
+ case Intrinsic::r600_implicitarg_ptr: {
+ MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
+ uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
+ return DAG.getConstant(ByteOffset, DL, PtrVT);
+ }
case Intrinsic::r600_read_ngroups_x:
return LowerImplicitParameter(DAG, VT, DL, 0);
case Intrinsic::r600_read_ngroups_y:
Modified: llvm/trunk/lib/Target/AMDGPU/R600Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600Instructions.td?rev=275024&r1=275023&r2=275024&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600Instructions.td Sun Jul 10 16:20:29 2016
@@ -329,7 +329,8 @@ class VTX_READ <string name, bits<8> buf
class LoadParamFrag <PatFrag load_type> : PatFrag <
(ops node:$ptr), (load_type node:$ptr),
- [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), 0); }]
+ [{ return isConstantLoad(cast<LoadSDNode>(N), 0) ||
+ (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }]
>;
def load_param : LoadParamFrag<load>;
Added: llvm/trunk/test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll?rev=275024&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll Sun Jul 10 16:20:29 2016
@@ -0,0 +1,114 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}workdim:
+
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+define void @workdim (i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.amdgcn.read.workdim() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; The workgroup.id values are stored in sgprs offset by the number of user
+; sgprs.
+
+; FUNC-LABEL: {{^}}workgroup_id_x:
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
+; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
+; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
+; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
+; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
+define void @workgroup_id_x(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.amdgcn.workgroup.id.x() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_id_y:
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
+define void @workgroup_id_y(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.amdgcn.workgroup.id.y() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_id_z:
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
+; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
+; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
+; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
+; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
+define void @workgroup_id_z(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.amdgcn.workgroup.id.z() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-NOHSA: .section .AMDGPU.config
+; GCN-NOHSA: .long 47180
+; GCN-NOHSA-NEXT: .long 132{{$}}
+
+; FUNC-LABEL: {{^}}workitem_id_x:
+; GCN-NOHSA: buffer_store_dword v0
+define void @workitem_id_x(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.amdgcn.workitem.id.x() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-NOHSA: .section .AMDGPU.config
+; GCN-NOHSA: .long 47180
+; GCN-NOHSA-NEXT: .long 2180{{$}}
+
+; FUNC-LABEL: {{^}}workitem_id_y:
+
+; GCN-NOHSA: buffer_store_dword v1
+define void @workitem_id_y(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.amdgcn.workitem.id.y() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-NOHSA: .section .AMDGPU.config
+; GCN-NOHSA: .long 47180
+; GCN-NOHSA-NEXT: .long 4228{{$}}
+
+; FUNC-LABEL: {{^}}workitem_id_z:
+; GCN-NOHSA: buffer_store_dword v2
+define void @workitem_id_z(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.amdgcn.workitem.id.z() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workgroup.id.x() #0
+declare i32 @llvm.amdgcn.workgroup.id.y() #0
+declare i32 @llvm.amdgcn.workgroup.id.z() #0
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.y() #0
+declare i32 @llvm.amdgcn.workitem.id.z() #0
+
+declare i32 @llvm.amdgcn.read.workdim() #0
Copied: llvm/trunk/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll (from r275022, llvm/trunk/test/CodeGen/AMDGPU/work-item-intrinsics.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll?p2=llvm/trunk/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll&p1=llvm/trunk/test/CodeGen/AMDGPU/work-item-intrinsics.ll&r1=275022&r2=275024&rev=275024&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/work-item-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll Sun Jul 10 16:20:29 2016
@@ -2,15 +2,31 @@
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; Legacy intrinsics that just read implicit parameters
+
+; FUNC-LABEL: {{^}}workdim_legacy:
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
-; FUNC-LABEL: {{^}}ngroups_x:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
+; EG: MOV {{\*? *}}[[VAL]], KC0[2].Z
+define void @workdim_legacy (i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.read.workdim() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
-; GCN-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
+; FUNC-LABEL: {{^}}ngroups_x:
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
define void @ngroups_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.x() #0
@@ -19,13 +35,13 @@ entry:
}
; FUNC-LABEL: {{^}}ngroups_y:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
-
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
define void @ngroups_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.y() #0
@@ -34,13 +50,13 @@ entry:
}
; FUNC-LABEL: {{^}}ngroups_z:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
-
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
define void @ngroups_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.z() #0
@@ -49,13 +65,13 @@ entry:
}
; FUNC-LABEL: {{^}}global_size_x:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
-
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
define void @global_size_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.x() #0
@@ -64,13 +80,13 @@ entry:
}
; FUNC-LABEL: {{^}}global_size_y:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
-
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
define void @global_size_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.y() #0
@@ -79,13 +95,13 @@ entry:
}
; FUNC-LABEL: {{^}}global_size_z:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
-
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
define void @global_size_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.z() #0
@@ -93,10 +109,57 @@ entry:
ret void
}
+; FUNC-LABEL: {{^}}local_size_x:
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z
+define void @local_size_x (i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.local.size.x() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_y:
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[1].W
+define void @local_size_y (i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.local.size.y() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_z:
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[2].X
+define void @local_size_z (i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.local.size.z() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; Legacy use of r600 intrinsics by GCN
+
; The tgid values are stored in sgprs offset by the number of user
; sgprs.
-; FUNC-LABEL: {{^}}tgid_x:
+; FUNC-LABEL: {{^}}tgid_x_legacy:
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
; GCN-NOHSA: buffer_store_dword [[VVAL]]
@@ -105,26 +168,26 @@ entry:
; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
-define void @tgid_x(i32 addrspace(1)* %out) {
+define void @tgid_x_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.x() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}tgid_y:
+; FUNC-LABEL: {{^}}tgid_y_legacy:
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
; GCN-NOHSA: buffer_store_dword [[VVAL]]
; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
-define void @tgid_y(i32 addrspace(1)* %out) {
+define void @tgid_y_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.y() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}tgid_z:
+; FUNC-LABEL: {{^}}tgid_z_legacy:
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
; GCN-NOHSA: buffer_store_dword [[VVAL]]
@@ -133,7 +196,7 @@ entry:
; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
-define void @tgid_z(i32 addrspace(1)* %out) {
+define void @tgid_z_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.z() #0
store i32 %0, i32 addrspace(1)* %out
@@ -144,9 +207,9 @@ entry:
; GCN-NOHSA: .long 47180
; GCN-NOHSA-NEXT: .long 132{{$}}
-; FUNC-LABEL: {{^}}tidig_x:
+; FUNC-LABEL: {{^}}tidig_x_legacy:
; GCN-NOHSA: buffer_store_dword v0
-define void @tidig_x(i32 addrspace(1)* %out) {
+define void @tidig_x_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #0
store i32 %0, i32 addrspace(1)* %out
@@ -157,10 +220,10 @@ entry:
; GCN-NOHSA: .long 47180
; GCN-NOHSA-NEXT: .long 2180{{$}}
-; FUNC-LABEL: {{^}}tidig_y:
+; FUNC-LABEL: {{^}}tidig_y_legacy:
; GCN-NOHSA: buffer_store_dword v1
-define void @tidig_y(i32 addrspace(1)* %out) {
+define void @tidig_y_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.y() #0
store i32 %0, i32 addrspace(1)* %out
@@ -171,9 +234,9 @@ entry:
; GCN-NOHSA: .long 47180
; GCN-NOHSA-NEXT: .long 4228{{$}}
-; FUNC-LABEL: {{^}}tidig_z:
+; FUNC-LABEL: {{^}}tidig_z_legacy:
; GCN-NOHSA: buffer_store_dword v2
-define void @tidig_z(i32 addrspace(1)* %out) {
+define void @tidig_z_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.z() #0
store i32 %0, i32 addrspace(1)* %out
@@ -188,6 +251,10 @@ declare i32 @llvm.r600.read.global.size.
declare i32 @llvm.r600.read.global.size.y() #0
declare i32 @llvm.r600.read.global.size.z() #0
+declare i32 @llvm.r600.read.local.size.x() #0
+declare i32 @llvm.r600.read.local.size.y() #0
+declare i32 @llvm.r600.read.local.size.z() #0
+
declare i32 @llvm.r600.read.tgid.x() #0
declare i32 @llvm.r600.read.tgid.y() #0
declare i32 @llvm.r600.read.tgid.z() #0
Added: llvm/trunk/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll?rev=275024&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll Sun Jul 10 16:20:29 2016
@@ -0,0 +1,107 @@
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}tgid_x:
+; EG: MEM_RAT_CACHELESS STORE_RAW T1.X
+define void @tgid_x(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.tgid.x() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}tgid_y:
+; EG: MEM_RAT_CACHELESS STORE_RAW T1.Y
+define void @tgid_y(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.tgid.y() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}tgid_z:
+; EG: MEM_RAT_CACHELESS STORE_RAW T1.Z
+define void @tgid_z(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.tgid.z() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}tidig_x:
+; EG: MEM_RAT_CACHELESS STORE_RAW T0.X
+define void @tidig_x(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.tidig.x() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}tidig_y:
+; EG: MEM_RAT_CACHELESS STORE_RAW T0.Y
+define void @tidig_y(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.tidig.y() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}tidig_z:
+; EG: MEM_RAT_CACHELESS STORE_RAW T0.Z
+define void @tidig_z(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.tidig.z() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_implicit:
+; 36 prepended implicit bytes + 4(out pointer) + 4*4 = 56
+; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 56
+define void @test_implicit(i32 addrspace(1)* %out) #1 {
+ %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
+ %header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)*
+ %gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 4
+ %value = load i32, i32 addrspace(7)* %gep
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_implicit_dyn:
+; 36 prepended implicit bytes + 8(out pointer + in) = 44
+; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 44
+define void @test_implicit_dyn(i32 addrspace(1)* %out, i32 %in) #1 {
+ %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
+ %header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)*
+ %gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 %in
+ %value = load i32, i32 addrspace(7)* %gep
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
+
+
+; DEPRECATED but R600 only
+
+; FUNC-LABEL: {{^}}workdim:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV {{\*? *}}[[VAL]], KC0[2].Z
+define void @workdim (i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.workdim() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.r600.read.workdim() #0
+
+declare i8 addrspace(7)* @llvm.r600.implicitarg.ptr() #0
+
+declare i32 @llvm.r600.read.tgid.x() #0
+declare i32 @llvm.r600.read.tgid.y() #0
+declare i32 @llvm.r600.read.tgid.z() #0
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.r600.read.tidig.y() #0
+declare i32 @llvm.r600.read.tidig.z() #0
+
+attributes #0 = { readnone }
Removed: llvm/trunk/test/CodeGen/AMDGPU/work-item-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/work-item-intrinsics.ll?rev=275023&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/work-item-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/work-item-intrinsics.ll (removed)
@@ -1,201 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-
-; FUNC-LABEL: {{^}}ngroups_x:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
-
-; GCN-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
-; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN-NOHSA: buffer_store_dword [[VVAL]]
-
-define void @ngroups_x (i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.r600.read.ngroups.x() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}ngroups_y:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
-
-; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
-; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
-; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN-NOHSA: buffer_store_dword [[VVAL]]
-define void @ngroups_y (i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.r600.read.ngroups.y() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}ngroups_z:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
-
-; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
-; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
-; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN-NOHSA: buffer_store_dword [[VVAL]]
-define void @ngroups_z (i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.r600.read.ngroups.z() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}global_size_x:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
-
-; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
-; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
-; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN-NOHSA: buffer_store_dword [[VVAL]]
-define void @global_size_x (i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.r600.read.global.size.x() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}global_size_y:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
-
-; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
-; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
-; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN-NOHSA: buffer_store_dword [[VVAL]]
-define void @global_size_y (i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.r600.read.global.size.y() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}global_size_z:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
-
-; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
-; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
-; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN-NOHSA: buffer_store_dword [[VVAL]]
-define void @global_size_z (i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.r600.read.global.size.z() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; The tgid values are stored in sgprs offset by the number of user
-; sgprs.
-
-; FUNC-LABEL: {{^}}tgid_x:
-; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
-; GCN-NOHSA: buffer_store_dword [[VVAL]]
-
-; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
-; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
-; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
-; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
-; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
-define void @tgid_x(i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.r600.read.tgid.x() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}tgid_y:
-; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
-; GCN-NOHSA: buffer_store_dword [[VVAL]]
-
-; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
-define void @tgid_y(i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.r600.read.tgid.y() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}tgid_z:
-; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
-; GCN-NOHSA: buffer_store_dword [[VVAL]]
-
-; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
-; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
-; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
-; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
-; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
-define void @tgid_z(i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.r600.read.tgid.z() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; GCN-NOHSA: .section .AMDGPU.config
-; GCN-NOHSA: .long 47180
-; GCN-NOHSA-NEXT: .long 132{{$}}
-
-; FUNC-LABEL: {{^}}tidig_x:
-; GCN-NOHSA: buffer_store_dword v0
-define void @tidig_x(i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.r600.read.tidig.x() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; GCN-NOHSA: .section .AMDGPU.config
-; GCN-NOHSA: .long 47180
-; GCN-NOHSA-NEXT: .long 2180{{$}}
-
-; FUNC-LABEL: {{^}}tidig_y:
-
-; GCN-NOHSA: buffer_store_dword v1
-define void @tidig_y(i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.r600.read.tidig.y() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; GCN-NOHSA: .section .AMDGPU.config
-; GCN-NOHSA: .long 47180
-; GCN-NOHSA-NEXT: .long 4228{{$}}
-
-; FUNC-LABEL: {{^}}tidig_z:
-; GCN-NOHSA: buffer_store_dword v2
-define void @tidig_z(i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.r600.read.tidig.z() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-declare i32 @llvm.r600.read.ngroups.x() #0
-declare i32 @llvm.r600.read.ngroups.y() #0
-declare i32 @llvm.r600.read.ngroups.z() #0
-
-declare i32 @llvm.r600.read.global.size.x() #0
-declare i32 @llvm.r600.read.global.size.y() #0
-declare i32 @llvm.r600.read.global.size.z() #0
-
-declare i32 @llvm.r600.read.tgid.x() #0
-declare i32 @llvm.r600.read.tgid.y() #0
-declare i32 @llvm.r600.read.tgid.z() #0
-
-declare i32 @llvm.r600.read.tidig.x() #0
-declare i32 @llvm.r600.read.tidig.y() #0
-declare i32 @llvm.r600.read.tidig.z() #0
-
-declare i32 @llvm.AMDGPU.read.workdim() #0
-
-attributes #0 = { readnone }
More information about the llvm-commits
mailing list