[llvm] r307978 - AMDGPU: Detect kernarg segment pointer
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 13 17:11:13 PDT 2017
Author: arsenm
Date: Thu Jul 13 17:11:13 2017
New Revision: 307978
URL: http://llvm.org/viewvc/llvm-project?rev=307978&view=rev
Log:
AMDGPU: Detect kernarg segment pointer
This is necessary to pass the kernarg segment pointer
to callee functions. Also don't unconditionally enable
for kernels.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll
llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
llvm/trunk/test/CodeGen/AMDGPU/mubuf-offset-private.ll
llvm/trunk/test/CodeGen/AMDGPU/private-access-no-objects.ll
llvm/trunk/test/CodeGen/AMDGPU/trap.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp?rev=307978&r1=307977&r2=307978&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp Thu Jul 13 17:11:13 2017
@@ -150,6 +150,9 @@ static StringRef intrinsicToAttrName(Int
return "amdgpu-dispatch-ptr";
case Intrinsic::amdgcn_dispatch_id:
return "amdgpu-dispatch-id";
+ case Intrinsic::amdgcn_kernarg_segment_ptr:
+ case Intrinsic::amdgcn_implicitarg_ptr:
+ return "amdgpu-kernarg-segment-ptr";
case Intrinsic::amdgcn_queue_ptr:
case Intrinsic::trap:
case Intrinsic::debugtrap:
@@ -181,7 +184,8 @@ static void copyFeaturesToFunction(Funct
{ "amdgpu-work-group-id-y" },
{ "amdgpu-work-group-id-z" },
{ "amdgpu-dispatch-ptr" },
- { "amdgpu-dispatch-id" }
+ { "amdgpu-dispatch-id" },
+ { "amdgpu-kernarg-segment-ptr" }
};
if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp?rev=307978&r1=307977&r2=307978&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Thu Jul 13 17:11:13 2017
@@ -92,7 +92,7 @@ SIMachineFunctionInfo::SIMachineFunction
CallingConv::ID CC = F->getCallingConv();
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
- KernargSegmentPtr = true;
+ KernargSegmentPtr = !F->arg_empty();
WorkGroupIDX = true;
WorkItemIDX = true;
} else if (CC == CallingConv::AMDGPU_PS) {
@@ -154,6 +154,9 @@ SIMachineFunctionInfo::SIMachineFunction
ImplicitBufferPtr = true;
}
+ if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
+ KernargSegmentPtr = true;
+
// We don't need to worry about accessing spills with flat instructions.
// TODO: On VI where we must use flat for global, we should be able to omit
// this if it is never used for generic access.
Modified: llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll?rev=307978&r1=307977&r2=307978&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll Thu Jul 13 17:11:13 2017
@@ -8,6 +8,8 @@ declare i32 @llvm.amdgcn.workitem.id.z()
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
+declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
+declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #0
declare i64 @llvm.amdgcn.dispatch.id() #0
; HSA: define void @use_workitem_id_y() #1 {
@@ -182,6 +184,32 @@ define void @indirect_use_group_to_flat_
ret void
}
+; HSA: define void @use_kernarg_segment_ptr() #12 {
+define void @use_kernarg_segment_ptr() #1 {
+ %kernarg.segment.ptr = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
+ store volatile i8 addrspace(2)* %kernarg.segment.ptr, i8 addrspace(2)* addrspace(1)* undef
+ ret void
+}
+
+; HSA: define void @func_indirect_use_kernarg_segment_ptr() #12 {
+define void @func_indirect_use_kernarg_segment_ptr() #1 {
+ call void @use_kernarg_segment_ptr()
+ ret void
+}
+
+; HSA: define void @use_implicitarg_ptr() #12 {
+define void @use_implicitarg_ptr() #1 {
+ %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+ store volatile i8 addrspace(2)* %implicitarg.ptr, i8 addrspace(2)* addrspace(1)* undef
+ ret void
+}
+
+; HSA: define void @func_indirect_use_implicitarg_ptr() #12 {
+define void @func_indirect_use_implicitarg_ptr() #1 {
+ call void @use_implicitarg_ptr()
+ ret void
+}
+
attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind "target-cpu"="fiji" }
attributes #2 = { nounwind "target-cpu"="gfx900" }
@@ -198,3 +226,4 @@ attributes #2 = { nounwind "target-cpu"=
; HSA: attributes #9 = { nounwind "target-cpu"="fiji" }
; HSA: attributes #10 = { nounwind "target-cpu"="gfx900" }
; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" }
+; HSA: attributes #12 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll?rev=307978&r1=307977&r2=307978&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll Thu Jul 13 17:11:13 2017
@@ -10,6 +10,7 @@ declare i32 @llvm.amdgcn.workitem.id.z()
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
+declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
@@ -164,6 +165,15 @@ define amdgpu_kernel void @use_queue_ptr
ret void
}
+; HSA: define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #12 {
+define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 {
+ %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
+ %bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
+ %val = load i32, i32 addrspace(2)* %bc
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 {
define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
@@ -236,3 +246,4 @@ attributes #1 = { nounwind }
; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" }
+; HSA: attributes #12 = { nounwind "amdgpu-kernarg-segment-ptr" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll?rev=307978&r1=307977&r2=307978&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll Thu Jul 13 17:11:13 2017
@@ -36,7 +36,7 @@ attributes #2 = {"amdgpu-flat-work-group
; CHECK-LABEL: {{^}}min_1024_max_2048
; CHECK: SGPRBlocks: 1
; CHECK: VGPRBlocks: 7
-; CHECK: NumSGPRsForWavesPerEU: 13
+; CHECK: NumSGPRsForWavesPerEU: 12
; CHECK: NumVGPRsForWavesPerEU: 32
@var = addrspace(1) global float 0.0
define amdgpu_kernel void @min_1024_max_2048() #3 {
Modified: llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll?rev=307978&r1=307977&r2=307978&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll Thu Jul 13 17:11:13 2017
@@ -118,7 +118,7 @@ attributes #8 = {"amdgpu-waves-per-eu"="
; CHECK-LABEL: {{^}}exactly_10:
; CHECK: SGPRBlocks: 1
; CHECK: VGPRBlocks: 5
-; CHECK: NumSGPRsForWavesPerEU: 13
+; CHECK: NumSGPRsForWavesPerEU: 12
; CHECK: NumVGPRsForWavesPerEU: 24
define amdgpu_kernel void @exactly_10() #9 {
%val0 = load volatile float, float addrspace(1)* @var
Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa.ll?rev=307978&r1=307977&r2=307978&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa.ll Thu Jul 13 17:11:13 2017
@@ -40,7 +40,7 @@
; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
-; HSA: .amdgpu_hsa_kernel simple
+; HSA-LABEL: .amdgpu_hsa_kernel simple
; HSA: {{^}}simple:
; HSA: .amd_kernel_code_t
; HSA: enable_sgpr_private_segment_buffer = 1
@@ -65,3 +65,11 @@ entry:
store i32 0, i32 addrspace(1)* %out
ret void
}
+
+; HSA-LABEL: .amdgpu_hsa_kernel simple_no_kernargs
+; HSA: enable_sgpr_kernarg_segment_ptr = 0
+define amdgpu_kernel void @simple_no_kernargs() {
+entry:
+ store volatile i32 0, i32 addrspace(1)* undef
+ ret void
+}
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll?rev=307978&r1=307977&r2=307978&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll Thu Jul 13 17:11:13 2017
@@ -49,6 +49,18 @@ define amdgpu_kernel void @test_implicit
ret void
}
+; ALL-LABEL: {{^}}test_no_kernargs:
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: s_load_dword s{{[0-9]+}}, s[4:5]
+define amdgpu_kernel void @test_no_kernargs() #1 {
+ %kernarg.segment.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
+ %header.ptr = bitcast i8 addrspace(2)* %kernarg.segment.ptr to i32 addrspace(2)*
+ %gep = getelementptr i32, i32 addrspace(2)* %header.ptr, i64 10
+ %value = load i32, i32 addrspace(2)* %gep
+ store volatile i32 %value, i32 addrspace(1)* undef
+ ret void
+}
+
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #0
Modified: llvm/trunk/test/CodeGen/AMDGPU/mubuf-offset-private.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mubuf-offset-private.ll?rev=307978&r1=307977&r2=307978&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mubuf-offset-private.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mubuf-offset-private.ll Thu Jul 13 17:11:13 2017
@@ -5,42 +5,42 @@
; Test addressing modes when the scratch base is not a frame index.
; GCN-LABEL: {{^}}store_private_offset_i8:
-; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s8 offset:8
+; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_i8() #0 {
store volatile i8 5, i8* inttoptr (i32 8 to i8*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_i16:
-; GCN: buffer_store_short v{{[0-9]+}}, off, s[4:7], s8 offset:8
+; GCN: buffer_store_short v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_i16() #0 {
store volatile i16 5, i16* inttoptr (i32 8 to i16*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_i32:
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s8 offset:8
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_i32() #0 {
store volatile i32 5, i32* inttoptr (i32 8 to i32*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_v2i32:
-; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8
+; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_v2i32() #0 {
store volatile <2 x i32> <i32 5, i32 10>, <2 x i32>* inttoptr (i32 8 to <2 x i32>*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_v4i32:
-; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8
+; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_v4i32() #0 {
store volatile <4 x i32> <i32 5, i32 10, i32 15, i32 0>, <4 x i32>* inttoptr (i32 8 to <4 x i32>*)
ret void
}
; GCN-LABEL: {{^}}load_private_offset_i8:
-; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s8 offset:8
+; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_i8() #0 {
%load = load volatile i8, i8* inttoptr (i32 8 to i8*)
ret void
@@ -65,7 +65,7 @@ define amdgpu_kernel void @zextload_priv
}
; GCN-LABEL: {{^}}load_private_offset_i16:
-; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s8 offset:8
+; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_i16() #0 {
%load = load volatile i16, i16* inttoptr (i32 8 to i16*)
ret void
@@ -90,28 +90,28 @@ define amdgpu_kernel void @zextload_priv
}
; GCN-LABEL: {{^}}load_private_offset_i32:
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s[4:7], s8 offset:8
+; GCN: buffer_load_dword v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_i32() #0 {
%load = load volatile i32, i32* inttoptr (i32 8 to i32*)
ret void
}
; GCN-LABEL: {{^}}load_private_offset_v2i32:
-; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8
+; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_v2i32() #0 {
%load = load volatile <2 x i32>, <2 x i32>* inttoptr (i32 8 to <2 x i32>*)
ret void
}
; GCN-LABEL: {{^}}load_private_offset_v4i32:
-; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_v4i32() #0 {
%load = load volatile <4 x i32>, <4 x i32>* inttoptr (i32 8 to <4 x i32>*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_i8_max_offset:
-; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s8 offset:4095
+; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s2 offset:4095
define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 {
store volatile i8 5, i8* inttoptr (i32 4095 to i8*)
ret void
@@ -119,7 +119,7 @@ define amdgpu_kernel void @store_private
; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus1:
; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
-; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s8 offen{{$}}
+; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s2 offen{{$}}
define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 {
store volatile i8 5, i8* inttoptr (i32 4096 to i8*)
ret void
@@ -127,7 +127,7 @@ define amdgpu_kernel void @store_private
; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus2:
; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
-; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s8 offen offset:1{{$}}
+; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s2 offen offset:1{{$}}
define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 {
store volatile i8 5, i8* inttoptr (i32 4097 to i8*)
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/private-access-no-objects.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/private-access-no-objects.ll?rev=307978&r1=307977&r2=307978&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/private-access-no-objects.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/private-access-no-objects.ll Thu Jul 13 17:11:13 2017
@@ -10,14 +10,14 @@
; GCN-LABEL: {{^}}store_to_undef:
; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
-; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}}
+; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}}
; OPT: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}}
; -O0 should assume spilling, so the input scratch resource descriptor
; -should be used directly without any copies.
; OPTNONE-NOT: s_mov_b32
-; OPTNONE: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s7 offen{{$}}
+; OPTNONE: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s5 offen{{$}}
define amdgpu_kernel void @store_to_undef() #0 {
store volatile i32 0, i32* undef
ret void
@@ -26,7 +26,7 @@ define amdgpu_kernel void @store_to_unde
; GCN-LABEL: {{^}}store_to_inttoptr:
; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
-; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}}
+; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}}
; OPT: buffer_store_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}}
define amdgpu_kernel void @store_to_inttoptr() #0 {
store volatile i32 0, i32* inttoptr (i32 124 to i32*)
@@ -36,7 +36,7 @@ define amdgpu_kernel void @store_to_intt
; GCN-LABEL: {{^}}load_from_undef:
; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
-; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}}
+; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}}
; OPT: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}}
define amdgpu_kernel void @load_from_undef() #0 {
%ld = load volatile i32, i32* undef
@@ -46,7 +46,7 @@ define amdgpu_kernel void @load_from_und
; GCN-LABEL: {{^}}load_from_inttoptr:
; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
-; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}}
+; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}}
; OPT: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}}
define amdgpu_kernel void @load_from_inttoptr() #0 {
%ld = load volatile i32, i32* inttoptr (i32 124 to i32*)
Modified: llvm/trunk/test/CodeGen/AMDGPU/trap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/trap.ll?rev=307978&r1=307977&r2=307978&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/trap.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/trap.ll Thu Jul 13 17:11:13 2017
@@ -19,11 +19,11 @@ declare void @llvm.debugtrap() #0
; MESA-TRAP: .section .AMDGPU.config
; MESA-TRAP: .long 47180
-; MESA-TRAP-NEXT: .long 208
+; MESA-TRAP-NEXT: .long 204
; NOMESA-TRAP: .section .AMDGPU.config
; NOMESA-TRAP: .long 47180
-; NOMESA-TRAP-NEXT: .long 144
+; NOMESA-TRAP-NEXT: .long 140
; GCN-LABEL: {{^}}hsa_trap:
; HSA-TRAP: enable_trap_handler = 1
@@ -45,11 +45,11 @@ define amdgpu_kernel void @hsa_trap() {
; MESA-TRAP: .section .AMDGPU.config
; MESA-TRAP: .long 47180
-; MESA-TRAP-NEXT: .long 208
+; MESA-TRAP-NEXT: .long 204
; NOMESA-TRAP: .section .AMDGPU.config
; NOMESA-TRAP: .long 47180
-; NOMESA-TRAP-NEXT: .long 144
+; NOMESA-TRAP-NEXT: .long 140
; GCN-WARNING: warning: <unknown>:0:0: in function hsa_debugtrap void (): debugtrap handler not supported
; GCN-LABEL: {{^}}hsa_debugtrap:
More information about the llvm-commits
mailing list